## Import libraries and packages

In [1]:
import os 
import pandas as pd 
import numpy as np 
import math
import datetime as dt 

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU

from itertools import cycle
import plotly.graph_objects as go 
import plotly.express as px 
from plotly.subplots import make_subplots

## Import Dataset

In [2]:
main_df = pd.read_csv('./data/TSLA.csv')
main_df = main_df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Adj Close': 'adj_close', 'Volume': 'volume'})

main_df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2016-08-16,45.098,45.438,44.681999,44.722,44.722,11335500
1,2016-08-17,44.866001,44.966,44.560001,44.647999,44.647999,8935500
2,2016-08-18,44.764,45.132,44.458,44.702,44.702,8572500
3,2016-08-19,44.708,45.034,44.506001,45.0,45.0,8297500
4,2016-08-22,44.834,45.021999,44.535999,44.585999,44.585999,10327500


In [3]:
print('Total number of days:', main_df.shape[0])
print('Total number of fields', main_df.shape[1])

Total number of days: 1258
Total number of fields 7


## Checking Null and NA value

In [4]:
print('Null values:', main_df.isnull().values.sum())
print('NA values:', main_df.isna().values.any())

Null values: 0
NA values: False


## Convert Date field into datetime format

In [5]:
main_df['date'] = pd.to_datetime(main_df.date)
main_df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2016-08-16,45.098,45.438,44.681999,44.722,44.722,11335500
1,2016-08-17,44.866001,44.966,44.560001,44.647999,44.647999,8935500
2,2016-08-18,44.764,45.132,44.458,44.702,44.702,8572500
3,2016-08-19,44.708,45.034,44.506001,45.0,45.0,8297500
4,2016-08-22,44.834,45.021999,44.535999,44.585999,44.585999,10327500


## EDA - Exploratory Data Anaylsis

In [8]:
print('Starting date: ', main_df.iloc[0][0])
print('Ending date: ', main_df.iloc[-1][0])
print('Duration: ', main_df.iloc[-1][0] - main_df.iloc[0][0])

Starting date:  2016-08-16 00:00:00
Ending date:  2021-08-13 00:00:00
Duration:  1823 days 00:00:00


Monthwise comparision between Stock open and close price

In [9]:
monthvise = main_df.groupby(main_df['date'].dt.strftime('%B'))[['open', 'close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
monthvise = monthvise.reindex(new_order, axis=0)
monthvise

Unnamed: 0_level_0,open,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
January,211.862118,212.589568
February,227.558569,227.040441
March,194.185782,193.161236
April,205.59503,205.671611
May,181.159451,180.826095
June,204.369197,204.726543
July,226.404302,226.023074
August,173.212144,174.174936
September,129.064456,128.870416
October,129.46928,128.815189


In [10]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x = monthvise.index,
    y = monthvise['open'],
    name = 'Stock Open Price',
    marker_color = 'crimson'
))

fig.add_trace(go.Bar(
    x = monthvise.index,
    y = monthvise['close'],
    name = 'Stock Closes Price',
    marker_color = 'lightsalmon'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45, title='Monthwise comparison between Stock open and close price')

fig.show()

Monthwise High and Low stock price

In [11]:
main_df.groupby(main_df['date'].dt.strftime('%B'))['low'].min()
monthvise_high = main_df.groupby(main_df['date'].dt.strftime('%B'))['high'].max()
monthvise_high = monthvise_high.reindex(new_order, axis=0)

monthvise_low = main_df.groupby(main_df['date'].dt.strftime('%B'))['low'].min()
monthvise_low = monthvise_low.reindex(new_order, axis=0)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=monthvise_high.index,
    y=monthvise_high,
    name='Stock high Price',
    marker_color='rgb(0, 153, 204)'
))
fig.add_trace(go.Bar(
    x=monthvise_low.index,
    y=monthvise_low,
    name='Stock low Price',
    marker_color='rgb(255, 128, 0)'
))

fig.update_layout(barmode='group', 
                  title=' Monthwise High and Low stock price')
fig.show()

Trend comparision between stock open price, close price, high price, low price

In [13]:
names = cycle(['Stock Open Price', 'Stock Close Price', 'Stock High Price', 'Stock Low Price'])

fig = px.line(main_df, x=main_df.date, y=[main_df['open'], main_df['close'], main_df['high'], main_df['low']],
labels={'date': 'Date', 'value': 'Stock value'})

fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black', legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t: t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

# Prepare Stock Close price

### Make separate dataframe of close price

In [14]:
close_df = main_df[['date', 'close']]
print('Shape of close dataframe: ', close_df.shape)

Shape of close dataframe:  (1258, 2)


### Plotting Stock Close price chart

In [15]:
fig = px.line(close_df, x=close_df.date, y=close_df.close,labels={'date':'Date','close':'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8)
fig.update_layout(title_text='Stock close price chart', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

# Consider only last 1 year data for prediction

In [16]:
close_df = close_df[close_df['date'] > '2020-08-16']
close_stock = close_df.copy()
print('Total data for prediction: ', close_df.shape[0])

Total data for prediction:  251


In [17]:
fig = px.line(close_df, x=close_df.date, y=close_df.close, labels={'date': 'Date', 'close': 'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Considered period to predict Stock close price', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [18]:
# Normalizing close price

del close_df['close']
scaler = MinMaxScaler(feature_range=(0,1))
close_df = scaler.fit_transform(np.array(close_df).reshape(-1, 1))
print(close_df.shape)

(251, 1)


Prepare Data for train and test

In [19]:
training_size = int(len(close_df)*0.60)
test_size = len(close_df) - training_size
train_data, test_data = close_df[0:training_size, :], close_df[training_size: len(close_df), :1]
print('train_data: ', train_data.shape)
print('test_data: ', test_data.shape)

train_data:  (150, 1)
test_data:  (101, 1)


In [20]:
# convert an array of values into a dataset matrix

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step -1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [21]:
time_step = 15

X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print('X_train: ', X_train.shape)
print('y_train: ', y_train.shape)
print('X_test: ', X_test.shape)
print('y_test: ', y_test.shape)

X_train:  (134, 15)
y_train:  (134,)
X_test:  (85, 15)
y_test:  (85,)


In [22]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print('X_train: ', X_train.shape)
print('X_test: ', X_test.shape)

X_train:  (134, 15, 1)
X_test:  (85, 15, 1)


# Model Building (GRU)

In [23]:
tf.keras.backend.clear_session()

model = Sequential()
model.add(GRU(32, return_sequences=True, input_shape=(time_step, 1)))
model.add(GRU(32, return_sequences=True))
model.add(GRU(32))
model.add(Dropout(0.20))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

NotImplementedError: Cannot convert a symbolic Tensor (gru/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

In [None]:
model.summary()

In [None]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)

In [None]:
import matplotlib.pyplot as plt

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(loss))

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

# Model Evaluation

In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

In [None]:
# Evaluation metrices RMSE and MAE
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

In [None]:
print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))

In [None]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

# Comparision of original stock close price and predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

# Predicting next 30 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 30
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        
        lst_output.extend(yhat.tolist())
        i=i+1
               
print("Output of predicted next days: ", len(lst_output))

## Plotting last 15 days of dataset and next predicted 30 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
print(last_days)
print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle(['Last 15 days close price','Predicted next 30 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 30 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')

fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

## Plotting entire Closing Stock Price with next 30 days period of prediction

In [None]:
lstmdf=closedf.tolist()
lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()