In [1]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
import io
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objects as go
import plotly.express as px
import plotly.graph_objects as go
import plotly as py
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import copy
from IPython.display import display
from statsmodels.tools.eval_measures import rmse
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [None]:
sample_files = ['AAPL.csv', 'MSFT.csv', 'AMD.csv']

df = pd.DataFrame()
stocks = []
target_stock = ''

counter = 0

for i in sample_files:
    stocks.append(i.split('.')[0])
    temp_df = pd.read_csv(i)
    if counter == 0:
        df['Date'] = temp_df['Date']
    if i == 'AAPL.csv':
        df['Target'] = temp_df['Close']
        target_stock = stocks[counter]
        stocks[counter] = 'Target'
    else:
        df[stocks[counter]] = temp_df['Close']

    counter=counter+1

df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

df_corr = df.corr()
x = []
y = []
values = df_corr.values
for i in range(len(values)):
    for j in values[i]:
        x.append(j)
    for j in values[:, i]:
        y.append(j)

dendro = ff.create_dendrogram(df_corr.values, labels=stocks)
dendro.update_layout(width=600, height=400)

heatmap = go.Figure(data=go.Heatmap(x=stocks, y=stocks,
                    z=df_corr.values))
heatmap.update_layout(width=500, height=500)
    
heatmap.show()
dendro.show()

def shape_data(data, input_size):
    
    x = []
    y = []

    for i in range((len(data)-input_size)):
        indx = i + input_size
        if indx > len(data):
            break
        x.append(data[i:indx])
        y.append(data[indx])

    return np.array(x), np.array(y)

#Declare constants for the model
n_input = 10
n_feat = len(df.columns.values)
epochs=40
n_predict=5
BATCH_SIZE = 20

#Create a scaler for each of the features
def scale_data(stocks, dataframe):
    scalers = {}
    for i in stocks:
        scalers[i] = MinMaxScaler()
        scalers[i].fit(np.array(dataframe[i]).reshape((-1, 1)))
        dataframe[i] = scalers[i].transform(np.array(dataframe[i]).reshape((-1, 1)))

    return dataframe, scalers

#Create the model with 4 layers in total
model = Sequential()

#Drop any null values within the data
df.dropna(inplace=True)

#Create a train split -- we will train the model on 80% of the dataset
train_split = int(len(df)*.80)

#Split the data into the training set
df_train = copy.deepcopy(df[:train_split])
df_val = copy.deepcopy(df[train_split:])

df_train, train_scalers = scale_data(stocks, df_train)
df_val, val_scalers = scale_data(stocks, df_val)

# train_data = df_train.values
# val_data = df_val.values

#Format the data into tensors for the model

x_train, y_train = shape_data(df_train.values, n_input)
x_val, y_val = shape_data(df_val.values, n_input)
model.add(LSTM(256, activation='relu', input_shape=x_train.shape[1:], return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(n_feat))
model.compile(Adam(lr=.002), loss='mse')

print(model.summary)
print("Training.. Please wait..")
model.fit(x_train, y_train, epochs=epochs, batch_size=BATCH_SIZE, verbose=0)
print("Training complete.. Loading results..")
predictions = []
batch = np.array(df_train.values[-n_input:]).reshape(1,n_input,n_feat)

for i in range(n_predict):
    predictions.append(model.predict(batch)[0])
    batch = np.append(batch[:, 1: , :], [[predictions[i]]], axis=1)

predictions.insert(0, df_train.values[-(n_predict)])

temp = pd.DataFrame(predictions, columns=stocks)

for i in stocks:
    temp[i] = train_scalers[i].inverse_transform(np.array(temp[i]).reshape((-1, 1)))

df_predictions = pd.DataFrame(temp.values,
                      index=df_train[-(n_predict+1):].index,
                          columns=['Target_Prediction' if i == 'Target' else i for i in stocks])

df_test = pd.concat([df_train, df_predictions], axis=1)

trace1 = go.Scatter(x = df_train.index, y = df['Target'], 
                    mode = 'lines', name = 'Data')

trace2 = go.Scatter(x = df_train[-(n_predict+1):].index, 
                    y = df_predictions['Target_Prediction'],
                    mode = 'lines', name = 'Prediction')

layout = go.Layout(title = f'{target_stock} Stock', xaxis = {'title' : "Date"},
                   yaxis = {'title' : "Close"})

fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()

train_data = copy.deepcopy(df)
train_data, scalers = scale_data(stocks, train_data)

x_train, y_train = shape_data(train_data.values, n_input)

model.fit(x_train, y_train,
         epochs=epochs, batch_size=BATCH_SIZE, verbose=0)
    
predictions = []
batch = np.array(train_data[-n_input:]).reshape(1,n_input,n_feat)

for i in range(n_predict):
    predictions.append(model.predict(batch)[0])
    batch = np.append(batch[:, 1: , :], [[predictions[i]]], axis=1)

predictions.insert(0, train_data.values[-1])

add_dates = [df.index[-1] + DateOffset(days=x) for x in range(0,n_predict+1)]
future_dates = pd.DataFrame(index=add_dates[:],columns=df.columns)

temp = pd.DataFrame(predictions, columns=stocks)
for i in stocks:
    temp[i] = scalers[i].inverse_transform(np.array(temp[i]).reshape((-1, 1)))

df_predictions = pd.DataFrame(temp.values,
                              index=future_dates[-(n_predict+1):].index,
                              columns=['Target_Prediction' if i == 'Target' else i for i in stocks])

df_proj = pd.concat([df,df_predictions], axis=1)

trace1 = go.Scatter(x = df_proj.index, y = df_proj['Target'], mode = 'lines',
                    name = 'Data')

trace2 = go.Scatter(x = df_proj.index, y = df_proj['Target_Prediction'],
                    mode = 'lines', name = 'Prediction')

layout = go.Layout(title = f'{target_stock} Stock', xaxis = {'title' : "Date"}, 
                   yaxis = {'title' : "Close"})

fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()