<a href="https://colab.research.google.com/github/ranshu1601/stock_market_Prediction/blob/main/Stock_Market_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance

In [2]:
import yfinance as yf # for fetching the historical stock market data
import numpy as np 
import pandas as pd
import tensorflow as tf # used for creating deep learning piplines 



In [3]:
data = yf.download('GOOGL',start= '2018-01-01', interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [4]:
data.shape

(1022, 6)

In [5]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1053.02002,1075.97998,1053.02002,1073.209961,1073.209961,1588300
2018-01-03,1073.930054,1096.099976,1073.430054,1091.52002,1091.52002,1565900
2018-01-04,1097.089966,1104.079956,1094.26001,1095.76001,1095.76001,1302600


In [6]:
#sort the data points based on indexes just for configuration 
data.sort_index(inplace=True)

In [7]:
# Remove any duplicate index 
#this will make sure that their is only one record for every date
data = data.loc[~data.index.duplicated(keep='first')]

In [8]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-19,2730.469971,2759.189941,2700.310059,2702.330078,2702.330078,1432400
2022-01-20,2725.0,2752.52002,2659.290039,2666.149902,2666.149902,1495400
2022-01-21,2651.870117,2697.310059,2601.72998,2607.030029,2607.030029,2774100


In [9]:
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [10]:
#here above we see that there isn't any missing values
data.describe()
#Get the statistics of the data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1022.0,1022.0,1022.0,1022.0,1022.0,1022.0
mean,1585.245205,1601.27262,1569.307195,1585.900636,1585.900636,1770152.0
std,596.859979,600.801706,592.205803,596.611791,596.611791,825334.7
min,984.320007,1012.119995,977.659973,984.669983,984.669983,465600.0
25%,1144.744965,1154.447479,1132.934998,1146.417542,1146.417542,1241050.0
50%,1300.38501,1310.765015,1293.844971,1302.974976,1302.974976,1556900.0
75%,1820.460022,1841.057465,1802.670044,1823.279999,1823.279999,2017975.0
max,2999.51001,3019.330078,2977.97998,2996.77002,2996.77002,6658900.0


In [11]:
import plotly.graph_objects as go

#Check the trend in Closing Values
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index , y = data['Close'], mode = 'lines'))
fig.update_layout(height = 500 , width = 900 , 
                                   xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [12]:
fig.add_trace(go.Scatter(x=data.index , y = data['Volume'], mode = 'lines'))
fig.update_layout(height = 500 , width = 900 , 
                                   xaxis_title = 'Date', yaxis_title = 'Volume')
fig.show()

In [13]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [14]:
#Filter only required data
data = data[['Close' , 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,1073.209961,1588300
2018-01-03,1091.52002,1565900
2018-01-04,1095.76001,1302600


In [15]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2021-09-01')].shape[0]

In [16]:
def CreateFeatures_and_Targets(data , feature_length):
  X = []
  Y = []

  for i in tnrange(len(data) - feature_length):
     X.append(data.iloc[i:i+ feature_length , : ].values)
     Y.append(data['Close'].values[i+feature_length])

  X= np.array(X)
  Y= np.array(Y)

  return X, Y    

In [17]:
X , Y = CreateFeatures_and_Targets(data, 32)

  0%|          | 0/990 [00:00<?, ?it/s]

In [18]:
# Check the shapes 
X.shape , Y.shape

((990, 32, 2), (990,))

In [19]:
Xtrain , Xtest , Ytrain , Ytest = X[:-test_length] , X[-test_length:] , Y[:-test_length], Y[-test_length:]

In [20]:
#checking training dataset shape
Xtrain.shape , Ytrain.shape

((891, 32, 2), (891,))

In [21]:
# check testing dataset shape
Xtest.shape , Ytest.shape

((99, 32, 2), (99,))

In [22]:
# create a Scaler to scale Vectors with Multiple Dimensions 
class MultiDimensionScaler():
  def __init__(self):
    self.scalers = []
  def fit_transform(self,X) :
    total_dims = X.shape[2]
    for i in range(total_dims):
      Scaler = MinMaxScaler()
      X[: , :, i] = Scaler.fit_transform(X[: , :, i])
      self.scalers.append(Scaler)
    return X

  def transform(self , X):
      for i in range(X.shape[2]):
          X[:,:,i] = self.scalers[i].transform(X[:,:,i]) 
      return X   

In [23]:
Feature_Scaler = MultiDimensionScaler()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [24]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [25]:
def save_object(obj , name : str ):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name:str):
    pickle_in = open(f"{name}.pck", "rb")
    data = pickle.load(pickle_in)
    return data 

In [26]:
# Save your object for future puroses
#save_object(Feature_Scaler , "Feature_Scaler")
#save_object(Target_Scaler , "Target_Scaler")


Model Buliding 

In [27]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weight.h5",monitor = 'val_loss' , save_best_only=True , save_weights_only = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.25 , patience=5 ,  min_lr = 0.00001,verbose = 1)


In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512 , return_sequences = True ,  recurrent_dropout=0.1 , input_shape=(32,2))))
model.add(LSTM(256, recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='elu'))
model.add(Dense(1,activation='linear')) # Final Layer 

In [29]:
# optimizer = tf.keras.optimizer.Adam(learning_rate=0.002)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.002)
model.compile(loss='mse',optimizer=optimizer)

In [30]:
history = model.fit(Xtrain, Ytrain,
                     epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle = False,
                    validation_data=(Xtest, Ytest),
                    callbacks= [reduce_lr , save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
#load the best weights
model.load_weights("best_weight.h5")

In [32]:
Predictions = model.predict(Xtest)

In [34]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [35]:
Predictions.shape

(99, 1)

In [36]:
Predictions = np.squeeze(Predictions , axis =1 )
Actual = np.squeeze(Actual , axis = 1)

In [37]:
# check the prediction vs Actual 
fig =  go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y= Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x=data.index[-test_length:] , y =Predictions , mode = 'lines', name = 'Predicted' ))
fig.show()

In [38]:
Total_features = np.concatenate((Xtrain , Xtest) , axis=0)

In [39]:
Total_Targets = np.concatenate((Ytrain , Ytest) , axis=0) 

In [40]:
Predictions = model.predict(Total_features)

In [41]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [42]:
Predictions = np.squeeze(Predictions , axis =1)
Actual = np.squeeze(Actual , axis = 1)

In [43]:
# check the Trend in Volume Trade
fig =  go.Figure()

fig.add_trace(go.Scatter(x = data.index, y= Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x=data.index , y =Predictions , mode = 'lines', name = 'Predicted' ))
fig.show()

In [44]:
import requests

response = requests.get('https://www.alphavantage.co/query?function=RSI&symbol=GOOGL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM')
response = response.json()

In [45]:
response.keys()

dict_keys(['Meta Data', 'Technical Analysis: RSI'])

In [46]:
rsi_data  = pd.DataFrame.from_dict(response['Technical Analysis: RSI'], orient='index')


In [47]:
rsi_data.head()

Unnamed: 0,RSI
2004-08-26,72.7683
2004-08-27,64.2686
2004-08-30,47.8388
2004-08-31,49.249
2004-09-01,41.0745


In [48]:
rsi_data = rsi_data[rsi_data.index >= '2018-01-01']

In [49]:
rsi_data['RSI']=rsi_data['RSI'].astype(np.float64)

In [50]:
rsi_data.head()

Unnamed: 0,RSI
2018-01-02,65.431
2018-01-03,78.3686
2018-01-04,80.483
2018-01-05,86.2429
2018-01-08,87.4882


In [51]:
data = data.merge(rsi_data,left_index= True , right_index=True ,  how='inner')

In [52]:
data.head()

Unnamed: 0_level_0,Close,Volume,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02,1073.209961,1588300,65.431
2018-01-03,1091.52002,1565900,78.3686
2018-01-04,1095.76001,1302600,80.483
2018-01-05,1110.290039,1512500,86.2429
2018-01-08,1114.209961,1232200,87.4882


Retraining MOdel

In [53]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2021-09-01')].shape[0]

In [54]:
def CreateFeatures_and_Targets(data , feature_length):
  X = []
  Y = []

  for i in tnrange(len(data) - feature_length):
     X.append(data.iloc[i:i+ feature_length , : ].values)
     Y.append(data['Close'].values[i+feature_length])

  X= np.array(X)
  Y= np.array(Y)

  return X, Y  

In [55]:
X , Y = CreateFeatures_and_Targets(data, 32)

  0%|          | 0/990 [00:00<?, ?it/s]

In [56]:
# Check the shapes 
X.shape , Y.shape

((990, 32, 3), (990,))

In [57]:
Xtrain , Xtest , Ytrain , Ytest = X[:-test_length] , X[-test_length:] , Y[:-test_length], Y[-test_length:]

In [58]:
#checking training dataset shape
Xtrain.shape , Ytrain.shape

((891, 32, 3), (891,))

In [59]:
Xtest.shape , Ytest.shape 

((99, 32, 3), (99,))

In [60]:
# create a Scaler to scale Vectors with Multiple Dimensions 
class MultiDimensionScaler():
  def __init__(self):
    self.scalers = []
  def fit_transform(self,X) :
    total_dims = X.shape[2]
    for i in range(total_dims):
      Scaler = MinMaxScaler()
      X[: , :, i] = Scaler.fit_transform(X[: , :, i])
      self.scalers.append(Scaler)
    return X

  def transform(self , X):
      for i in range(X.shape[2]):
          X[:,:,i] = self.scalers[i].transform(X[:,:,i]) 
      return X   

In [61]:
Feature_Scaler = MultiDimensionScaler()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [62]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [63]:
def save_object(obj , name : str ):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name:str):
    pickle_in = open(f"{name}.pck", "rb")
    data = pickle.load(pickle_in)
    return data 

In [64]:
# Save your object for future puroses
#save_object(Feature_Scaler , "Feature_Scaler")
#save_object(Target_Scaler , "Target_Scaler")


In [65]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weight.h5",monitor = 'val_loss' , save_best_only=True , save_weights_only = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.25 , patience=5 ,  min_lr = 0.00001,verbose = 1)


In [66]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512 , return_sequences = True ,  recurrent_dropout=0.1 , input_shape=(32,2))))
model.add(LSTM(256, recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='elu'))
model.add(Dense(1,activation='linear')) # Final Layer 

In [67]:
# optimizer = tf.keras.optimizer.Adam(learning_rate=0.002)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.002)
model.compile(loss='mse',optimizer=optimizer)

In [68]:
history = model.fit(Xtrain, Ytrain,
                     epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle = False,
                    validation_data=(Xtest, Ytest),
                    callbacks= [reduce_lr , save_best]) 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [69]:
#load the best weights
model.load_weights("best_weight.h5")

In [70]:
Predictions = model.predict(Xtest)

In [71]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [72]:
Predictions.shape

(99, 1)

In [73]:
Predictions = np.squeeze(Predictions , axis =1 )
Actual = np.squeeze(Actual , axis = 1)

In [74]:
# check the prediction vs Actual 
fig =  go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y= Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x=data.index[-test_length:] , y =Predictions , mode = 'lines', name = 'Predicted' ))
fig.show()

In [75]:
Total_features = np.concatenate((Xtrain , Xtest) , axis=0)

In [76]:
Total_Targets = np.concatenate((Ytrain , Ytest) , axis=0) 

In [77]:
Predictions = model.predict(Total_features)

In [78]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [79]:
Predictions = np.squeeze(Predictions , axis =1)
Actual = np.squeeze(Actual , axis = 1)

In [80]:
# check the Trend in Volume Trade
fig =  go.Figure()

fig.add_trace(go.Scatter(x = data.index, y= Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x=data.index , y =Predictions , mode = 'lines', name = 'Predicted' ))
fig.show()