In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing dataset

In [None]:
df=pd.read_csv("/kaggle/input/stock-dataset/Stock.csv")
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df1=df.reset_index()["close"]

In [None]:
df1

In [None]:
df1.shape

In [None]:
import matplotlib.pyplot as plt
plt.plot(df1)

# Data Preprocessing

In [None]:
#LSTM are sensitive to the scaled data, so we apply minmax scaler
from sklearn.preprocessing import MinMaxScaler
#to set the range of values between 0 and 1
scaler=MinMaxScaler(feature_range=(0,1))
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))

In [None]:
df1.shape

# splitting the dataset into training and testing set

In [None]:
#splitting the dataset into training and test consecutively as it is an time series problem
training_size=int(len(df1)*0.65)
test_size=len(df1)-training_size
train_data,test_data=df1[0:training_size,:],df1[training_size:len(df1),:1]

In [None]:
training_size,test_size

In [None]:
train_data

In [None]:
test_data

In [None]:
def create_dataset(dataset,time_step=1):
    #Convert an array of values into a dataset matrix
    dataX,dataY=[],[]
    for i in range(len(dataset)-time_step-1):
        a=dataset[i:(i+time_step),0]   #i=0 then values will be from "i" to "n-1" will be in "x" and "n" will be in "y".
        dataX.append(a)
        dataY.append(dataset[i+time_step,0])
    return np.array(dataX) , np.array(dataY)

In [None]:
#reshape into X=t,t+1,t+2,t+3 and y=t+4
time_step=100
X_train,y_train = create_dataset(train_data,time_step)
X_test,y_test = create_dataset(test_data,time_step)

In [None]:
X_train

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape , y_test.shape

In [None]:
#reshape the input to be [samples , time_step , features] which is required for LSTM
X_train = X_train.reshape(X_train.shape[0] , X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0] , X_test.shape[1] , 1)

# Model Creation

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import LSTM , Dense

In [None]:
#It is a stack LSTM model which means one LSTM after another
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(100,1)))  #input shape must be (X_train.shape[1],1)
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss="mean_squared_error" , optimizer="adam")

In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=100,batch_size=64,verbose=1)

# Predicting the values

In [None]:
#Doing prediction and checking performance matrix
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

In [None]:
#Inverse transforming the values back to original
train_predict=scaler.inverse_transform(train_predict)
test_predict=scaler.inverse_transform(test_predict)

In [None]:
#Calculating RMS performance matrix
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_train,train_predict))

In [None]:
#RMS for test data
math.sqrt(mean_squared_error(y_test,test_predict))

In [None]:
### Plotting 
# shift train predictions for plotting
look_back=100
trainPredictPlot = np.empty_like(df1)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
# shift test predictions for plotting
testPredictPlot = np.empty_like(df1)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(df1)-1, :] = test_predict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(df1))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

Here "Blue" line represents the whole dataset , "Yellow" line represents the training data and "Green" line represents the predicted values

# Predicting the values for next 30 days

In [None]:
len(test_data)

In [None]:
#because if we predict the output for next 30 days we will use the previous 100 days values for prediction so from 341 to 441 we have the data of previous 100 days
x_input=test_data[341:].reshape(1,-1)
x_input.shape

In [None]:
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

In [None]:
temp_input

In [None]:
# demonstrate prediction for next 10 days
from numpy import array

lst_output=[]
n_steps=100
i=0
while(i<30):
    
    if(len(temp_input)>100):
        #print(temp_input)
        #shifting one postion ahead
        x_input=np.array(temp_input[1:])
        print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        #print(x_input)
        yhat = model.predict(x_input, verbose=0)
        print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        #first cycle will start from here 
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1
    

print(lst_output)

In [None]:
#taking the previous 100 outputs
day_new=np.arange(1,101)
#taking the next 30 values as per predictions
day_pred=np.arange(101,131)

In [None]:
len(df1)

In [None]:
#Plotting for the previous values in the dataset
plt.plot(day_new,scaler.inverse_transform(df1[1158:]) , color="red")
#Plotting for the predicted values
plt.plot(day_pred,scaler.inverse_transform(lst_output) , color="green")

In [None]:
df3=df1.tolist()
df3.extend(lst_output)
plt.plot(df3[1200:] )

In [None]:
df3=scaler.inverse_transform(df3).tolist()

In [None]:
plt.plot(df3)