Install the dependencies

In [None]:
import numpy as np
import pandas as pd
import pandas_ta as ta # pandas technical analysis
import pandas_datareader.data as web
import matplotlib.pyplot as plt
plt.style.use("ggplot") # plt.style.available[:] gives list of all available stylesheets
%matplotlib inline
import seaborn as sns
import datetime as dt

import yfinance as yf # as a means to access yahoo finance data

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.svm import SVC # svm
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, accuracy_score
from sklearn.model_selection import GridSearchCV # c, gamma parameter optimisation

plt.rcParams["font.size"] = 11 # rc refers to run commands
plt.rcParams["figure.facecolor"] = "#000080"
sns.set_style("darkgrid")

import warnings # ignores warnings
warnings.filterwarnings('ignore')

In [2]:
end = dt.datetime.today()
start = dt.datetime(end.year-1, end.month, end.day)
jpm = web.DataReader("JPM", "yahoo", start, end)

In [3]:
df = pd.DataFrame(jpm)
df["Returns"] = (df["Adj Close"]/df["Adj Close"].shift(1))-1

def direction(x):
    if x>0:
        return 1.0
    else:
        return 0.0
    
df["Direction"] = df["Returns"].apply(direction)
df = df.dropna()

df = df[["High", "Low", "Open", "Close", "Volume", "Adj Close","Returns", "Direction"]]
df

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Returns,Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-08-18,99.910004,98.120003,99.910004,98.320000,13246500.0,95.602272,-0.013940,0.0
2020-08-19,100.589996,98.339996,98.919998,98.550003,13533000.0,95.825912,0.002339,1.0
2020-08-20,98.029999,96.800003,97.260002,97.370003,13672600.0,94.678528,-0.011974,0.0
2020-08-21,98.570000,96.940002,97.269997,97.320000,12228300.0,94.629913,-0.000513,0.0
2020-08-24,100.209999,97.419998,98.449997,100.059998,17898300.0,97.294174,0.028155,1.0
...,...,...,...,...,...,...,...,...
2021-08-10,159.979996,156.309998,156.740005,159.259995,8667000.0,159.259995,0.012267,1.0
2021-08-11,161.500000,159.080002,160.000000,161.160004,9156500.0,161.160004,0.011930,1.0
2021-08-12,162.369995,160.550003,161.880005,161.789993,8844200.0,161.789993,0.003909,1.0
2021-08-13,162.160004,159.570007,161.789993,159.979996,8980000.0,159.979996,-0.011187,0.0


In [4]:
# l = [jpm, gs, ms, bac]
# l_s = ["JPM", "GS", "MS", "BAC"]
# plt.figure(figsize=(15,6))

# for i, j in enumerate(l, 1):
#     plt.subplot(2,2,i)
#     j["Adj Close"].plot()
#     plt.xlabel("Date")
#     plt.ylabel("Adj Close")
#     plt.title(f"Closing price of {l_s[i-1]}")
# plt.tight_layout()

In [5]:
# # bollinger bands
# length = 30
# num_sd = 2
# def boll(closing, length, num_sd):
#     """
#     returns average, upper band, lower band
#     """
#     av = closing.rolling(window=length).mean()
#     sd = closing.rolling(window=length).std()
#     up = av + (num_sd*sd)
#     down = av - (num_sd*sd)
#     return np.round(av,4), np.round(up,4), np.round(down,4)

In [6]:
# data pre-processing
X = df.iloc[:,0:5]
y = df.iloc[:,-1]

# MinMaxScaler
scaler = MinMaxScaler()
df = scaler.fit_transform(df)

Support vector machines

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# train the model
svm = SVC(C=1.0, kernel="linear", degree=3, gamma="scale")
svm.fit(X_train, y_train)

# prediction
pred = svm.predict(X_test)
pred

# classification report and confusion matrix

print("Classification report:")
print(classification_report(y_test, pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, pred))

print("Accuracy score:")
print(accuracy_score(y_test, pred).round(2))

In [None]:
# gridsearchcv
param_grid = {"C":[0.1,1,10,100,1000], "gamma":[1,0.1,0.01,0.001]}
svm_grid = GridSearchCV(SVC(), param_grid)
# verbose just shows how much of text output during calculation to display

svm_grid.fit(X_train, y_train)
svm_grid.best_params_

In [None]:
# # prediction with gridsearchcv (then classification report/confusion matrix)/accuracy score
svm_grid_pred = svm_grid.predict(X_test)

print("Classification report:")
print(classification_report(y_test, svm_grid_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, svm_grid_pred))

print("Accuracy score:")
print(accuracy_score(y_test, svm_grid_pred).round(2))