## Import Required Libraries

# LetsGrowMore (LGMVIP- APRIL2022)
Sumit Jangir

## Task 2 - Stock Market Prediction And Forecasting Using Stacked LSTM

### Data = https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv 

In this model I have used the **Stacked LSTM** (Long Short Term Memory), a Machine Learning Model for Stock Market Prediction. Stock market prediction is the act of trying to determine the future value of a company stock or other financial instrument traded on a financial exchange.

![0_dtiuqS8kNB66Mp5P.png](attachment:0_dtiuqS8kNB66Mp5P.png)

In [None]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import LSTM
%matplotlib inline
from warnings import filterwarnings
filterwarnings("ignore")

## Import Data

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv')
df.head()

## Data Exploration

In [None]:
df.shape

In [None]:
# check basic info of data
df.info()

In [None]:
# get statistical summaries of dataset
df.describe()

In [None]:
df_close = df.reset_index()['Close']
df_close

In [None]:
# check is there any null values present of not
df.isnull().sum()

Here we can see no null values present in dataset

## Exploratory Data Analysis (EDA)
## Data visualization

In [None]:
sns.pairplot(df, hue= 'Turnover (Lacs)', palette= "rocket")
plt.show()

In [None]:
df_close = df.reset_index()['Close']
df_close

Let us plot the Close value graph using pyplot

* **Let us plot the Close value graph using pyplot**

In [None]:
plt.figure(figsize=(15,6))
plt.plot(df_close, c= "b")
plt.ylabel("Close value")
plt.title('Close value graph')
plt.show()

* **Let us plot the High value graph using pyplot**

In [None]:
plt.figure(figsize=(15,6))

df_high=df.reset_index()['High']
plt.plot(df_high, c="g")
plt.ylabel("High value")
plt.title('High value graph')
plt.show()

* **Since LSTM are sensitive to the scale of the data, so we apply MinMax Scaler to transform our values between 0 and 1**

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
df_high = scaler.fit_transform(np.array(df_high).reshape(-1,1))
df_high

In [None]:
df_high.shape

## Train Test Split

* In time-series data the one data is dependent on other data. The training size should be 75% of the total length of the data frame, the test size should be the difference between the length of the dataset and the training size.

In [None]:
training_size = int(len(df_high) * 0.75)
test_size = len(df_high) - training_size
train_data, test_data = df_high[0:training_size,:], df_high[training_size:len(df_high),:1]

In [None]:
print('Training Data :',train_data.size)
print('Training Data :',test_data.size)

## Data Preprocessing

In [None]:
def create_dataset(dataset, time_step = 1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i+time_step, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
time_step = 100
x_train, y_train = create_dataset(train_data, time_step)
x_test, y_test = create_dataset(test_data, time_step)

## LSTM
* Reshape the input to be [samples, time steps, features] which is the requirement of LSTM

In [None]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

In [None]:
print("X Training Data :",x_train.shape)
print("X testing Data :",x_test.shape)
print("Y Training Data :",y_train.shape)
print("Y Tresting Data :",y_test.shape)

* **Import required modules for the stacked LSTM.**


In [None]:
import math
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import LSTM

In [None]:
#checking my tensorflow version
tf.__version__

## Creating model

In [None]:
#Create the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences = True, input_shape = (100,1)))
model.add(LSTM(50, return_sequences = True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

In [None]:
model.summary()

In [None]:
model.fit(x_train, y_train, validation_data = (x_test, y_test), epochs = 100, batch_size = 64, verbose = 1)

In [None]:
#Lets predict and check performance metrics
train_predict = model.predict(x_train)
test_predict = model.predict(x_test)

In [None]:
#Transform back to original form
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

## Calculating RMSE

In [None]:
#Calculate RMSE performance metrics
math.sqrt(mean_squared_error(y_train, train_predict))

In [None]:
#Test Data RMSE
math.sqrt(mean_squared_error(y_test, test_predict))

## Plotting the graph according to train and test data

In [None]:
#Plotting

#Shift train prediction for plotting
look_back = 100
trainPredictPlot = np.empty_like(df_high)
trainPredictPlot[:,:] = np.nan
trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict

#Shift test prediction for plotting
testPredictPlot = np.empty_like(df_high)
testPredictPlot[:,:] = np.nan
testPredictPlot[len(train_predict) + (look_back * 2)+1:len(df_high) - 1, :] = test_predict

In [None]:
#Plot baseline and predictions
plt.figure(figsize=(10,6))

plt.plot(scaler.inverse_transform(df_high))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

print("Green indicates the Predicted Data")
print("Blue indicates the Complete Data")
print("Orange indicates the Train Data")

In [None]:
#Predict the next 28 days Stock Price
print("Length of Test Data : ",len(test_data))
print("Shape of x Test Data :",x_test.shape)

In [None]:
x_input=test_data[409:].reshape(1,-1)
x_input.shape

## Predicting values for next 30 days

In [None]:
temp_input = list(x_input)
temp_input = temp_input[0].tolist()

In [None]:
lst_output=[]
n_steps=100
i=0
while(i<30):
    
    if(len(temp_input)>100):
        x_input=np.array(temp_input[1:])
        print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))

        yhat = model.predict(x_input, verbose=0)
        print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]

        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1
    

print(lst_output)

In [None]:
day_new = np.arange(1,101)
day_pred = np.arange(101,131)

In [None]:
print(day_new.shape)
print(day_pred.shape)

In [None]:
ds3 = df_high.tolist()
ds3.extend(lst_output)

len(df_high)

* **Graph of actual values in last 100 days**

In [None]:
plt.figure(figsize=(13,6))

plt.plot(day_new, scaler.inverse_transform(df_high[1935:]))
plt.plot(day_pred, scaler.inverse_transform(lst_output))
plt.xlabel('Days')
plt.ylabel('values')

plt.show()

* **Graph of predicted values for last 65 days**

In [None]:
plt.figure(figsize=(13,6))

ds3=df_high.tolist()
ds3.extend(lst_output)
plt.plot(ds3[2000:])
plt.xlabel("Days")
plt.ylabel("Predicted Value")
plt.show()

In [None]:
plt.figure(figsize=(13,6))

ds3=scaler.inverse_transform(ds3).tolist()
plt.plot(ds3)

plt.show()

## Model Created Successfully !
# Thank You!