Install the dependencies

In [1]:
import numpy as np
import pandas as pd
import pandas_ta as ta # pandas technical analysis
import pandas_datareader.data as web
import matplotlib.pyplot as plt
plt.style.use("ggplot") # plt.style.available[:] gives list of all available stylesheets
%matplotlib inline
import seaborn as sns
import datetime as dt

import yfinance as yf # as a means to access yahoo finance data

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.svm import SVC # svm
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, accuracy_score
from sklearn.model_selection import GridSearchCV # c, gamma parameter optimisation

plt.rcParams["font.size"] = 11 # rc refers to run commands
plt.rcParams["figure.facecolor"] = "#000080"
sns.set_style("darkgrid")

import warnings # ignores warnings
warnings.filterwarnings('ignore')

In [2]:
end = dt.datetime.today()
start = dt.datetime(end.year-2, end.month, end.day)
spx = web.DataReader("^GSPC", "yahoo", start, end) #s&p500
ixic = web.DataReader("^IXIC", "yahoo", start, end) #nasdaq

In [3]:
# moving average
days = [10,20,50]
for i in days: 
    column = f"MA for {i} days"
    spx[column] = spx["Adj Close"].rolling(window=i).mean()

In [4]:
spx = pd.DataFrame(spx)
spx.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'MA for 10 days',
       'MA for 20 days', 'MA for 50 days'],
      dtype='object')

In [9]:
# bollinger bands
length = 50
num_sd = 2
def boll(closing, length, num_sd):
    """
    returns average, upper band, lower band
    """
    av = closing.rolling(window=length).mean()
    sd = closing.rolling(window=length).std()
    up = av + (num_sd*sd)
    down = av - (num_sd*sd)
    return np.round(down,6), np.round(av,6), np.round(up,6)

In [10]:
down = f"Lower band"
av = f"Middle band"
up = f"Upper band"
spx[down], spx[av], spx[up] = boll(spx["Adj Close"], length=length, num_sd=num_sd)
spx

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,MA for 10 days,MA for 20 days,MA for 50 days,Lower band,Middle band,Upper band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-08-19,2931.000000,2913.479980,2913.479980,2923.649902,3212880000,2923.649902,,,,,,
2019-08-20,2923.629883,2899.600098,2919.010010,2900.510010,3066300000,2900.510010,,,,,,
2019-08-21,2928.729980,2917.909912,2922.040039,2924.429932,3011190000,2924.429932,,,,,,
2019-08-22,2939.080078,2904.510010,2930.939941,2922.949951,2890880000,2922.949951,,,,,,
2019-08-23,2927.010010,2834.969971,2911.070068,2847.110107,3937300000,2847.110107,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-11,4449.439941,4436.419922,4442.180176,4442.410156,2803060000,4442.410156,4420.451025,4391.780518,4321.455986,4160.936834,4321.455986,4481.975139
2021-08-12,4461.770020,4435.959961,4446.080078,4460.830078,2543860000,4460.830078,4424.619043,4396.820532,4326.510186,4164.648251,4326.510186,4488.372120
2021-08-13,4468.370117,4460.819824,4464.839844,4468.000000,2371630000,4468.000000,4431.893066,4403.862524,4332.013184,4169.989892,4332.013184,4494.036475
2021-08-16,4473.259766,4437.660156,4461.649902,4472.939941,2707170000,4472.939941,4440.471045,4414.585010,4336.874180,4172.785796,4336.874180,4500.962563


In [None]:
# df = pd.DataFrame(jpm)
# df["Returns"] = (df["Adj Close"]/df["Adj Close"].shift(1))-1

# def direction(x):
#     if x>0:
#         return 1.0
#     else:
#         return 0.0
    
# df["Direction"] = df["Returns"].apply(direction)
# df = df.dropna()

# df = df[["High", "Low", "Open", "Close", "Volume", "Adj Close","Returns", "Direction"]]
# df

In [None]:
# l = [jpm, gs, ms, bac]
# l_s = ["JPM", "GS", "MS", "BAC"]
# plt.figure(figsize=(15,6))

# for i, j in enumerate(l, 1):
#     plt.subplot(2,2,i)
#     j["Adj Close"].plot()
#     plt.xlabel("Date")
#     plt.ylabel("Adj Close")
#     plt.title(f"Closing price of {l_s[i-1]}")
# plt.tight_layout()

In [None]:
# data pre-processing
X = df.iloc[:,0:5]
y = df.iloc[:,-1]

# MinMaxScaler
scaler = MinMaxScaler()
df = scaler.fit_transform(df)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# train the model
svm = SVC(C=1.0, kernel="linear", gamma="scale")
svm.fit(X_train, y_train)

# prediction
pred = svm.predict(X_test)
pred

# classification report and confusion matrix

print("Classification report:")
print(classification_report(y_test, pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, pred))

print("Accuracy score:")
print(accuracy_score(y_test, pred).round(2))

In [None]:
# gridsearchcv
param_grid = {"C":[0.1,1,10,100,1000], "gamma":[1,0.1,0.01,0.001]}
svm_grid = GridSearchCV(SVC(), param_grid)
# verbose just shows how much of text output during calculation to display

svm_grid.fit(X_train, y_train)
svm_grid.best_params_

In [None]:
# # prediction with gridsearchcv (then classification report/confusion matrix)/accuracy score
svm_grid_pred = svm_grid.predict(X_test)

print("Classification report:")
print(classification_report(y_test, svm_grid_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, svm_grid_pred))

print("Accuracy score:")
print(accuracy_score(y_test, svm_grid_pred).round(2))