Install the dependencies

In [None]:
import numpy as np
import pandas as pd
import pandas_ta as ta # pandas technical analysis
import pandas_datareader.data as web
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight") # plt.style.available[:] gives list of all available stylesheets
%matplotlib inline
import seaborn as sns
import datetime as dt

import yfinance as yf # as a means to access yahoo finance data

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.svm import SVC # svm
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, accuracy_score
from sklearn.model_selection import GridSearchCV # c, gamma parameter optimisation

plt.rcParams["font.size"] = 11 # rc refers to run commands
plt.rcParams["figure.facecolor"] = "#000080"
sns.set_style("darkgrid")

import warnings # ignore warnings
warnings.filterwarnings('ignore')

In [None]:
end = dt.datetime.today()
start = dt.datetime(end.year-1, end.month, end.day)

list_of_tickers = ["^GSPC", "^IXIC"] # add tickers here
data = web.DataReader(list_of_tickers, 'yahoo', start, end)
ticker = data.stack().reset_index()
ticker = pd.DataFrame(ticker)

In [None]:
gspc = ticker[ticker["Symbols"] == "^GSPC"]
ixic = ticker[ticker["Symbols"] == "^IXIC"]

In [None]:
days = [5,50,100,200]
list_of_assets = [gspc, ixic]

for i in days:
    for asset in list_of_assets:
        column_name = f"MA for {i} days"
        asset[column_name] = asset["Adj Close"].rolling(window=i).mean().fillna("-")

In [None]:
# bollinger bands
def boll(closing, length, num_sd):
    """
    returns average, upper band, lower band
    """
    av = closing.rolling(window=length).mean()
    sd = closing.rolling(window=length).std()
    up = av + (num_sd*sd)
    down = av - (num_sd*sd)
    return np.round(down,6), np.round(av,6), np.round(up,6)

In [None]:
length = 50
num_sd = 2

down = f"Lower band"
av = f"Middle band"
up = f"Upper band"

for asset in list_of_assets:
    asset[down], asset[av], asset[up] = boll(asset["Adj Close"], 
                                             length=length, 
                                             num_sd=num_sd
                                            )

In [None]:
# rsi & momentum
def rsi(values):
    up = values[values>0].mean()
    down = -1*values[values<0].mean()
    return 100*up / (up + down)

window = 14

for asset in list_of_assets:
    asset["Momentum_1D"] = asset["Adj Close"] - asset["Adj Close"].shift(1)
    asset["RSI_14D"] = asset["Momentum_1D"].rolling(window=window).apply(rsi)
    asset["Momentum_1D"] = asset["Momentum_1D"].fillna("-")
    asset["RSI_14D"] = asset["RSI_14D"].fillna("-")

In [None]:
# macd
for asset in list_of_assets:
    asset["EWMA_26D"] = asset["Adj Close"].ewm(span=26).mean()
    asset["EWMA_12D"] = asset["Adj Close"].ewm(span=12).mean()
    asset["MACD"] = asset["EWMA_12D"] - asset["EWMA_26D"]

In [None]:
# stochastic oscillator (%K and %D)
def stoch_osci(asset, n=10):
    """
    returns the value for stochastic oscillator (%K and %D)
    """
    asset["Stoch Osci %K/%D"] = ((asset["Close"]                                 
                                  - asset["Low"].rolling(window=n).mean()
                                 ) /                                 
                                 (asset["High"].rolling(window=n).max()
                                  - asset["Low"].rolling(window=n).min()
                                 )
                                ) * 100

for asset in list_of_assets:
    stoch_osci(asset)
    asset["Stoch Osci %K/%D"] = asset["Stoch Osci %K/%D"].fillna("-")

In [None]:
# df = pd.DataFrame(jpm)
# df["Returns"] = (df["Adj Close"]/df["Adj Close"].shift(1))-1

# def direction(x):
#     if x>0:
#         return 1.0
#     else:
#         return 0.0
    
# df["Direction"] = df["Returns"].apply(direction)
# df = df.dropna()

# df = df[["High", "Low", "Open", "Close", "Volume", "Adj Close","Returns", "Direction"]]
# df

In [None]:
# l = [gspc, ixic]
# plt.figure(figsize=(15,6))

# for i, j in enumerate(l, 1):
#     plt.subplot(2,2,i)
#     j["Adj Close"].plot()
#     plt.xlabel("Date")
#     plt.ylabel("Adj Close")
#     plt.title(f"Closing price of {list_of_tickers[i-1]}")
# plt.tight_layout()

In [None]:
# # high c/high gamma overfitting
# # low c/low gamma underfitting

# # data pre-processing
# X = df.iloc[:,0:5]
# y = df.iloc[:,-1]

# # MinMaxScaler
# scaler = MinMaxScaler()
# df = scaler.fit_transform(df)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# X_train = np.array(X_train)
# y_train = np.array(y_train)
# X_test = np.array(X_test)
# y_test = np.array(y_test)

# # train the model
# svm = SVC(C=1.0, kernel="linear", gamma="scale")
# svm.fit(X_train, y_train)

# # prediction
# pred = svm.predict(X_test)
# pred

# # classification report and confusion matrix

# print("Classification report:")
# print(classification_report(y_test, pred))

# print("Confusion Matrix:")
# print(confusion_matrix(y_test, pred))

# print("Accuracy score:")
# print(accuracy_score(y_test, pred).round(2))

In [None]:
# low gamma considers further away points to fit a hyperplane
# high gamma considers only close points to fit a hyperplane (prone to overfitting)
# too much emphasis on close points and not much on further away points - high gamma

# gridsearchcv
# param_grid = {"C":[0.1,1,10,100,1000], "gamma":[1,0.1,0.01,0.001], "kernel":["rbf", "poly", "sigmoid"]}
# svm_grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
# # verbose just shows how much of text output during calculation to display

# svm_grid.fit(X_train, y_train)
# svm_grid.best_params_

In [None]:
# # prediction with gridsearchcv (then classification report/confusion matrix)/accuracy score
# svm_grid_pred = svm_grid.predict(X_test)

# print("Classification report:")
# print(classification_report(y_test, svm_grid_pred))

# print("Confusion Matrix:")
# print(confusion_matrix(y_test, svm_grid_pred))

# print("Accuracy score:")
# print(accuracy_score(y_test, svm_grid_pred).round(2))