# Stock Prediction Neural Network

This application is designed for *Good Small Firm* with the intention of helping to target correlated stocks and identifying stock trends.

## Index

<ul>
  <li><a href='#Intro'>How-To</a></li>
  <li><a href='#UploadData'>Upload & Process Data</a></li>
  <li><a href='#Train'>Train the Model</a></li>
</ul> 


In [69]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
import io
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objects as go
import plotly.express as px
import plotly.graph_objects as go
import plotly as py
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import copy
import math
from ipywidgets import Layout
from IPython.display import display
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

## Sample Run Results

The following charts contains the results loaded from a previous run of the neural network. The previous attempt was conducted using "Apple" at the target to predict, and used "Microsoft" and "AMD" as the independent features.

In [70]:
### Load Previous Test Results
df_full = pd.read_csv("APPLE_Data.csv")
df_full['Date'] = pd.to_datetime(df_full['Date'])
df_full = df_full.set_index('Date')

stocks = df_full.columns.values

df_train = pd.read_csv("APPLE_Train_Data.csv")
df_train['Date'] = pd.to_datetime(df_train['Date'])
df_train = df_train.set_index('Date')

df_corr = pd.read_csv("APPLE_Correlation_Data.csv")
df_corr['Stocks'] = df_corr['Unnamed: 0']
df_corr = df_corr.set_index('Stocks')
df_corr = df_corr.drop(['Unnamed: 0'], 1)

df_predict_train = pd.read_csv("APPLE_Prediction_Data_Train.csv")
df_predict_train['Date'] = pd.to_datetime(df_predict_train['Date'])
df_predict_train = df_predict_train.set_index('Date')

df_predict = pd.read_csv("Other_data_Predictions_APPLE.csv")
df_predict.head()
df_predict['Date'] = pd.to_datetime(df_predict['Unnamed: 0'])
df_predict = df_predict.set_index('Date')

#Declare constants
n_input = 10
n_feat = len(stocks)
epochs=40
n_predict=5
BATCH_SIZE = 20

dendro = ff.create_dendrogram(df_corr.values, labels=stocks)
dendro.update_layout(width=600, height=400, title="Correlation Dendrograph")

heatmap = go.Figure(data=go.Heatmap(x=stocks, y=stocks,
                    z=df_corr.values))
heatmap.update_layout(width=500, height=500, title="Correlation Heatmap")
    
heatmap.show()
dendro.show()

In [71]:
trace1 = go.Scatter(x = df_train.index, y = df_full['Target'], 
                    mode = 'lines', name = 'Data')

trace2 = go.Scatter(x = df_train[-(n_predict+1):].index, 
                    y = df_predict_train['Target_Prediction'],
                    mode = 'lines', name = 'Prediction')

layout = go.Layout(title = 'Apple Stock Prediction Train', xaxis = {'title' : "Date"},
                   yaxis = {'title' : "Close"}, height=400, width=800)

test = df_train['Target'].values[-n_predict:]

fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()

In [72]:
trace1 = go.Scatter(x = df_predict.index, y = df_predict['Target'], mode = 'lines',
                    name = 'Data')

trace2 = go.Scatter(x = df_predict.index, y = df_predict['Target_Prediction'],
                    mode = 'lines', name = 'Prediction')

layout = go.Layout(title = 'Apple Stock Prediction', xaxis = {'title' : "Date"}, 
                   yaxis = {'title' : "Close"}, height=400, width=800)

fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
    

<a id='UserOp'></a>

# User Operation

This section will walk through loading the data and starting the neural network processing on the dataset

<a id='UploadData'></a>
## Loading the data set(s)

Click on the **Select Files** button to select the .csv file(s) that contain the data to evaluate. Then click the **Upload Files** button to complete file upload. Make sure documentation is read and data is in correct format prior to proceeding.

In [73]:

########################
file_wdgt = widgets.FileUpload(description="Select Files", accept='.csv',
                               multiple=True, button_style="info")
upload_btn = widgets.Button(description="Upload Files")
select_target_drop = widgets.Dropdown()
select_target_drop.disable = True
display(file_wdgt)
display(upload_btn)

def on_upload_button_clicked(b):
    select_target_drop.options = []
    
    uploads = file_wdgt.value
    file_names = []
    file_wdgt._counter = 0
    
    select_target_drop.disable = False
    
    for i, n in enumerate(uploads):
        file_names.append(n)
        
    select_target_drop.options = file_names
    

upload_btn.on_click(on_upload_button_clicked)

FileUpload(value={}, accept='.csv', button_style='info', description='Select Files', multiple=True)

Button(description='Upload Files', style=ButtonStyle())

Use the dropdown to select the ***target dataset***.

In [32]:
display(select_target_drop)

Dropdown(options=(), value=None)

You can click on **View Descriptive Analytics** to see visualizations of the data correlations. This is will display a heatmap and dendrogram visualizing the correlation between the target and independent features.

In [74]:
# Create global Variables used for the neural network
df = pd.DataFrame()
stocks = []
target_stock = ''

# Build the dataframe that will be used to train and test the neural network.
# Dataframes are built from parsing the CSV files.
def build_df(files):
    global df
    global stocks
    global target_stock
    
    counter = 0
    
    for i in files:
        stocks.append(i.split('.')[0])
        temp_df = pd.read_csv(i)
        if counter == 0:
            df['Date'] = temp_df['Date']
        if i == select_target_drop.value:
            df['Target'] = temp_df['Close']
            target_stock = stocks[counter]
            stocks[counter] = 'Target'
        else:
            df[stocks[counter]] = temp_df['Close']

        counter=counter+1
        
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')

In [75]:
# This method will display the heatmaps and dengrogram for the corrlation table
def display_visual(corr):
    hmap = go.Figure(data=go.Heatmap(x=stocks, y=stocks,
            z=corr.values))
    hmap.update_layout(width=500, height=500, title="Correlation Heatmap")
    
    dendro = ff.create_dendrogram(corr.values, labels=stocks)
    dendro.update_layout(width=600, height=400, title="Correlation Dendrograph")
    
    hmap.show()
    dendro.show()

corr_btn = widgets.Button(description='View Descriptive Analytics',
                         layout=widgets.Layout(width='25%', height='60px'))
output = widgets.Output()

def on_corr_button_clicked(b):
    build_df(select_target_drop.options)
    df_corr = df.corr()
    with output:
        display_visual(df_corr)
        
    x = []
    y = []
    values = df_corr.values
    for i in range(len(values)):
        for j in values[i]:
            x.append(j)
        for j in values[:, i]:
            y.append(j)
    
    corr_btn.disabled = True
    
display(corr_btn)
display(output)
corr_btn.on_click(on_corr_button_clicked)


Button(description='View Descriptive Analytics', layout=Layout(height='60px', width='25%'), style=ButtonStyle(…

Output()

In [76]:
#Method that will shape the data into appropriate 3D tensors for the model.
def shape_data(data, input_size):
    
    x = []
    y = []

    for i in range((len(data)-input_size)):
        indx = i + input_size
        if indx > len(data):
            break
        x.append(data[i:indx])
        y.append(data[indx])

    return np.array(x), np.array(y)

In [82]:
#Declare constants for the model
n_input = 10
n_feat = len(df.columns.values)
epochs=20
n_predict=5
BATCH_SIZE = 50
print(n_feat)

4


In [78]:
'''Normalize and scale the data for each stock to avoid skewing the scale.
Programming scaling in this manner leaves the application
generic to handle other scales of data.'''

#Create a scaler for each of the features
def scale_data(stocks, dataframe):
    scalers = {}
    for i in stocks:
        scalers[i] = MinMaxScaler()
        scalers[i].fit(np.array(dataframe[i]).reshape((-1, 1)))
        dataframe[i] = scalers[i].transform(np.array(dataframe[i]).reshape((-1, 1)))

    return dataframe, scalers



In [79]:
#Create the model with 4 layers in total
model = Sequential()




<a id="Train"></a> 
## Train the Model

Data has been scaled and processed. Click on the button to being training the model.. this can take a while.

In [84]:
train_btn = widgets.Button(description='Train Model',
                           layout=widgets.Layout(width='30%', height='80px'))

train_out = widgets.Output()

display(train_btn)

def on_train_button_clicked(b):
    global n_feat
    n_feat = len(df.columns.values)

    
    with train_out:
        print("Training.. Please wait, this can take a while..")
        print(n_feat)
        
    train_btn.disabled=True
    #Drop any null values within the data
    df.dropna(inplace=True)

    #Create a train split -- we will train the model on 80% of the dataset
    train_split = int(len(df)*.80)

    #Split the data into the training set
    df_train = copy.deepcopy(df[:train_split])
    df_val = copy.deepcopy(df[train_split:])

    df_train, train_scalers = scale_data(stocks, df_train)
    df_val, val_scalers = scale_data(stocks, df_val)

    #Format the data into tensors for the model
    x_train, y_train = shape_data(df_train.values, n_input)
    x_val, y_val = shape_data(df_val.values, n_input)

    #Add layers to the model
    model.add(LSTM(256, activation='relu', input_shape=x_train.shape[1:], return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(128, activation='relu', return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(64, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(n_feat))
    model.compile(Adam(lr=.002), loss='mse')

    with train_out:
        print(model.summary())
        model.fit(x_train, y_train, epochs=epochs, batch_size=BATCH_SIZE)

    #Fit and train the model
    with train_out:
        print("Training complete.. Loading results..")

    #Predict the n_days
    predictions = []
    batch = np.array(df_train.values[-n_input:]).reshape(1,n_input,n_feat)

    for i in range(n_predict):
        predictions.append(model.predict(batch)[0])
        batch = np.append(batch[:, 1: , :], [[predictions[i]]], axis=1)

    predictions.insert(0, df_train.values[-(n_predict)])

    temp = pd.DataFrame(predictions, columns=stocks)

    for i in stocks:
        temp[i] = train_scalers[i].inverse_transform(np.array(temp[i]).reshape((-1, 1)))

    df_predictions = pd.DataFrame(temp.values,
                          index=df_train[-(n_predict+1):].index,
                              columns=['Target_Prediction' if i == 'Target' else i for i in stocks])

    df_test = pd.concat([df_train, df_predictions], axis=1)

    #create our graph
    trace1 = go.Scatter(x = df_train.index, y = df['Target'], 
                        mode = 'lines', name = 'Data')

    trace2 = go.Scatter(x = df_train[-(n_predict+1):].index, 
                        y = df_predictions['Target_Prediction'],
                        mode = 'lines', name = 'Prediction')

    layout = go.Layout(title = f'{target_stock} Stock', xaxis = {'title' : "Date"},
                       yaxis = {'title' : "Close"})

    train_btn.disabled=False

    fig = go.Figure(data=[trace1, trace2], layout=layout)
    rms = math.sqrt(mean_squared_error(df_train['Target'].values[-n_predict:],
                               df_predictions['Target_Prediction'].values[-n_predict:]))

    with train_out:
        print(rms)
        fig.show()


display(train_out)
train_btn.on_click(on_train_button_clicked)

Button(description='Train Model', layout=Layout(height='80px', width='30%'), style=ButtonStyle())

Output()

<a id="Predict"></a>
## Run Model Prediction

Click on the **Run Prediction** button to run the model prediction and forecast the next 5 days of stock prices.

In [81]:
predict_btn = widgets.Button(description='Run Prediction',
                            layout=widgets.Layout(width='30%', height='80px'))

display(predict_btn)
out_pred = widgets.Output()

def on_predict_button_clicked(b):

    train_data = copy.deepcopy(df)
    train_data, scalers = scale_data(stocks, train_data)

    x_train, y_train = shape_data(train_data.values, n_input)
    
    with out_pred:
        print('Training in progress.. please wait')
        model.fit(x_train, y_train,
         epochs=epochs, batch_size=BATCH_SIZE)

    with out_pred:
        print('Training complete, making predictions and loading data..')
        
    predictions = []
    batch = np.array(train_data[-n_input:]).reshape(1,n_input,n_feat)

    for i in range(n_predict):
        predictions.append(model.predict(batch)[0])
        batch = np.append(batch[:, 1: , :], [[predictions[i]]], axis=1)

    predictions.insert(0, train_data.values[-1])
    
    add_dates = [df.index[-1] + DateOffset(days=x) for x in range(0,n_predict+1)]
    future_dates = pd.DataFrame(index=add_dates[:],columns=df.columns)
    
    temp = pd.DataFrame(predictions, columns=stocks)
    for i in stocks:
        temp[i] = scalers[i].inverse_transform(np.array(temp[i]).reshape((-1, 1)))
    
    df_predictions = pd.DataFrame(temp.values,
                                  index=future_dates[-(n_predict+1):].index,
                                  columns=['Target_Prediction' if i == 'Target' else i for i in stocks])

    df_proj = pd.concat([df,df_predictions], axis=1)
    
    trace1 = go.Scatter(x = df_proj.index, y = df_proj['Target'], mode = 'lines',
                        name = 'Data')
    
    trace2 = go.Scatter(x = df_proj.index, y = df_proj['Target_Prediction'],
                        mode = 'lines', name = 'Prediction')
    
    layout = go.Layout(title = f'{target_stock} Stock', xaxis = {'title' : "Date"}, 
                       yaxis = {'title' : "Close"})
    
    fig = go.Figure(data=[trace1, trace2], layout=layout)
    
    with out_pred:
        fig.show()
    
display(out_pred)
predict_btn.on_click(on_predict_button_clicked)

Button(description='Run Prediction', layout=Layout(height='80px', width='30%'), style=ButtonStyle())

Output()