Install the dependencies

In [None]:
import numpy as np
import pandas as pd
import pandas_ta as ta # pandas technical analysis
import pandas_datareader.data as web
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime

import yfinance as yf # as a means to access yahoo finance data

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.svm import SVC # svm
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, accuracy_score
from sklearn.model_selection import GridSearchCV # c, gamma parameter optimisation

# customising matplotlib with style sheets and rcParams
# global change, for only temporary change use "with"
# rc stands for run commands
plt.style.use("classic") # plt.style.available[:] gives list of all available stylesheets
plt.rcParams["font.size"] = 11
plt.rcParams["lines.linestyle"] = "-"
# plt.rcParams["figure.dpi"] = 500 #sharp image

import warnings # ignore warnings
warnings.filterwarnings('ignore')

from IPython.display import display # display all columns
pd.options.display.max_columns = None

In [None]:
end = datetime.today()
start = datetime(end.year-2, end.month, end.day)

list_of_tickers = ["^GSPC", "^IXIC", "^N225"] # add tickers here
d = web.DataReader(list_of_tickers, 'yahoo', start, end)

In [None]:
d

In [None]:
# .stack() converts the innermost column index into the innermost row index
t = d.stack() # https://medium.com/swlh/reshaping-in-pandas-with-stack-and-unstack-functions-bb169f64467d
t = t.reset_index() # not necessary, puts the same date to each innermost row index
t

In [None]:
gspc = t[t["Symbols"] == "^GSPC"]
ixic = t[t["Symbols"] == "^IXIC"]
n225 = t[t["Symbols"] == "^N225"]
list_of_assets = [gspc, ixic, n225]

In [None]:
for asset in list_of_assets:
    asset["RoC"] = ((asset["Adj Close"] - asset["Adj Close"].shift(1))/asset["Adj Close"].shift(1))*100

In [None]:
days = [5,10,20] # last week, last two weeks, last month

for i in days:
    for asset in list_of_assets:
        column_name = f"MA for {i} days"
        asset[column_name] = asset["Adj Close"].rolling(window=i).mean()

In [None]:
# bollinger bands
def boll(closing, length, num_sd):
    """
    returns average, upper band, lower band
    """
    av = closing.rolling(window=length).mean()
    sd = closing.rolling(window=length).std()
    up = av + (num_sd*sd)
    down = av - (num_sd*sd)
    return np.round(down,6), np.round(av,6), np.round(up,6)

In [None]:
length = 50
num_sd = 2

down = f"Lower band"
av = f"Middle band"
up = f"Upper band"

for asset in list_of_assets:
    asset[down], asset[av], asset[up] = boll(asset["Adj Close"], 
                                             length=length, 
                                             num_sd=num_sd
                                            )

In [None]:
# rsi & momentum
def rsi(values):
    up = values[values>0].mean()
    down = -1*values[values<0].mean()
    return 100*up / (up + down)

window = 14

for asset in list_of_assets:
    asset["Momentum_1D"] = asset["Adj Close"] - asset["Adj Close"].shift(1)
    asset["RSI_14D"] = asset["Momentum_1D"].rolling(window=window).apply(rsi)

In [None]:
# macd
for asset in list_of_assets:
    asset["EWMA_26D"] = asset["Adj Close"].ewm(span=26).mean()
    asset["EWMA_12D"] = asset["Adj Close"].ewm(span=12).mean()
    asset["MACD"] = asset["EWMA_12D"] - asset["EWMA_26D"]

In [None]:
# stochastic oscillator (%K and %D)
def stoch_osci(asset, n=10):
    """
    returns the value for stochastic oscillator (%K and %D)
    """
    asset["Stoch Osci %K/%D"] = ((asset["Close"]                                 
                                  - asset["Low"].rolling(window=n).mean()
                                 ) /                                 
                                 (asset["High"].rolling(window=n).max()
                                  - asset["Low"].rolling(window=n).min()
                                 )
                                ) * 100

for asset in list_of_assets:
    stoch_osci(asset)

In [None]:
# william's %R
def willR(asset):
    h = asset["High"].rolling(window=window, center=False).max()
    l = asset["Low"].rolling(window=window, center=False).min()
    asset["Williams %R"] = -100*((h - asset["Adj Close"])/(h - l))

for asset in list_of_assets:
    willR(asset)

In [None]:
# aaron oscillator
def a(data, tp = 25):
    u = []
    d = []
    x = tp
    while x < len(asset["Date"]):
        u_ = ((asset["High"][x-tp:x].tolist().index(max(asset["High"][x-tp:x])))/float(tp))*100
        d_ = ((asset["Low"][x-tp:x].to_list().index(max(asset["Low"][x-tp:x])))/float(tp))*100
        u.append(u_)
        d.append(d_)
        x+=1
    return u,d

In [None]:
for asset in list_of_assets:
    zeros = [0] * 25
    up, down = a(asset)
    a_list = [a - b for a,b in zip(up, down)]
    if len(a_list) == 0:
        a_list = [0] * asset.shape[0]
        asset["Aroon Oscillator"] = a_list
    else:
        asset["Aroon Oscillator"] = zeros + a_list

In [None]:
def direction(x):
    if x>0:
        return 1.0
    else:
        return 0.0

for asset in list_of_assets:
    asset["Direction"] = asset["RoC"].apply(direction)

In [None]:
gspc.dtypes # all columns are float64

In [None]:
ixic

In [None]:
gspc.fillna("-", axis=1, inplace=True) # once I put "-" it converts data type from float64 into object
ixic.fillna("-", axis=1, inplace=True) # put this at very end so that I can work with float64
gspc

In [None]:
plt.style.available

In [None]:
def p_twinx():
    fig, ax = plt.subplots(1,1,figsize=(15,5), constrained_layout=True)
    ax.plot(gspc["Date"], gspc["Adj Close"], color="blue")
    ax.set(title="^GSPC")

    ax2 = ax.twinx()
    ax2.plot(ixic["Date"], ixic["Adj Close"], color="red")

    ax.tick_params("y", colors="blue")
    ax2.tick_params("y", colors="red")

In [None]:
# def p():
#     fig, (ax1, ax2, ax3) = plt.subplots(3,1,figsize=(15,10), constrained_layout=True)
#     ax1.plot(gspc["Date"], gspc["Adj Close"], color="blue")
#     ax1.set(title="^GSPC")
    
#     ax2.plot(ixic["Date"], ixic["Adj Close"])
#     ax2.set(title="^IXIC")
    
#     ax3.plot(n225["Date"], n225["Adj Close"])
#     ax3.set(title="^N225")

#     fig.tight_layout(h_pad=2)

In [None]:
# used for local change only, rcParams used for global change
with plt.style.context("seaborn-ticks"):
    p_twinx()

In [None]:
# # high c/high gamma overfitting
# # low c/low gamma underfitting

# # data pre-processing
# X = df.iloc[:,0:5]
# y = df.iloc[:,-1]

# # MinMaxScaler
# scaler = MinMaxScaler()
# df = scaler.fit_transform(df)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# X_train = np.array(X_train)
# y_train = np.array(y_train)
# X_test = np.array(X_test)
# y_test = np.array(y_test)

# # train the model
# svm = SVC(C=1.0, kernel="linear", gamma="scale")
# svm.fit(X_train, y_train)

# # prediction
# pred = svm.predict(X_test)
# pred

# # classification report and confusion matrix

# print("Classification report:")
# print(classification_report(y_test, pred))

# print("Confusion Matrix:")
# print(confusion_matrix(y_test, pred))

# print("Accuracy score:")
# print(accuracy_score(y_test, pred).round(2))

In [None]:
# low gamma considers further away points to fit a hyperplane
# high gamma considers only close points to fit a hyperplane (prone to overfitting)
# too much emphasis on close points and not much on further away points - high gamma

# gridsearchcv
# param_grid = {"C":[0.1,1,10,100,1000], "gamma":[1,0.1,0.01,0.001], "kernel":["rbf", "poly", "sigmoid"]}
# svm_grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
# # verbose just shows how much of text output during calculation to display

# svm_grid.fit(X_train, y_train)
# svm_grid.best_params_

In [None]:
# # prediction with gridsearchcv (then classification report/confusion matrix)/accuracy score
# svm_grid_pred = svm_grid.predict(X_test)

# print("Classification report:")
# print(classification_report(y_test, svm_grid_pred))

# print("Confusion Matrix:")
# print(confusion_matrix(y_test, svm_grid_pred))

# print("Accuracy score:")
# print(accuracy_score(y_test, svm_grid_pred).round(2))