In [None]:
# install the dependencies
import numpy as np
import pandas as pd
# pandas technical analysis
import pandas_ta as ta
import pandas_datareader.data as web
import matplotlib.pyplot as plt
plt.style.use("bmh")
# plt.style.available[:] gives list of all available stylesheets
%matplotlib inline
import seaborn as sns
import datetime as dt

import yfinance as yf
# yfinance is a popular open source library developed by Ran Aroussi 
# as a means to access the financial data available on Yahoo Finance

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# svm
from sklearn.svm import SVR
from sklearn.metrics import classification_report, confusion_matrix, r2_score, mean_squared_error
from sklearn.grid_search import GridSearchCV

# rc refers to run commands
plt.rcParams["font.size"] = 11
plt.rcParams["figure.facecolor"] = "#000080"
sns.set_style("darkgrid")

In [None]:
end = dt.datetime.today()
start = dt.datetime(end.year-1, end.month, end.day)

jpm = web.DataReader("JPM", "yahoo", start, end)
ms = web.DataReader("MS", "yahoo", start, end)
gs = web.DataReader("GS", "yahoo", start, end)
bac = web.DataReader("BAC", "yahoo", start, end)

In [None]:
jpm

In [None]:
l = [jpm, gs, ms, bac]
l_s = ["JPM", "GS", "MS", "BAC"]
plt.figure(figsize=(15,6))

for i, j in enumerate(l, 1):
    plt.subplot(2,2,i)
    j["Adj Close"].plot()
    plt.xlabel("Date")
    plt.ylabel("Adj Close")
    plt.title(f"Closing price of {l_s[i-1]}")
plt.tight_layout()

In [None]:
# MinMaxScaler
# scaler = MinMaxScaler(feature_range=(0,1)
# df_scaled = scaler.fit_transform(df)
# df_scaled

In [None]:
# bollinger bands
length = 30
num_sd = 2
def boll(closing, length, num_sd):
    """
    returns average, upper band, lower band
    """
    av = closing.rolling(window=length).mean()
    sd = closing.rolling(window=length).std()
    up = av + (num_sd*sd)
    down = av - (num_sd*sd)
    return np.round(av,4), np.round(up,4), np.round(down,4)

In [None]:
# data pre-processing
x = df[["High", "Low", "Open", "Close", "Volume"]]
y = df["Adj Close"]

X = df.x.values.reshape(-1,1)
y = df.y.values.reshape(-1,1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train the model
SupportVectorRegModel = SVR()
SupportVectorRegModel.fit(X_train, y_train)

# prediction
y_pred = SupportVectorRegModel.predict(x_test)
y_pred

# classification report and confusion matrix
print(confusion_matrix(y_test, y_pred))
print("\n")
print(classification_report(y_test, y_pred))

# gridsearchcv
# "c" parameter controls the cost of misclassification on the training data
# large "c" gives low bias (penalise the cost of misclassification a lot) and high variance
# small "c" values does not penalise the cost of misclassification much (high bias/low variance)

# small "gamma" means gaussian with large variance and small bias
# large "gamma" means gaussian with small variance and high bias (support vector does not have a widespread influence)

param_grid = {"C":[0.1,1,10,100,1000], "gamma":[1,0.1,0.01,0.001]}
grid = GridSearchCV(SVR(), param_grid, refit=True, verbose=3)
# verbose just shows how much of text output during calculation to display
grid.fit(X_train, y_train)
grid.best_params_
grid.best_estimator

# prediction with gridsearchcv (then classification report/confusion matrix)
grid_pred = grid.predict(X_test)

# model evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(sme)
rmse