In [53]:
#Importing the libraries
from nsepy import get_history as stock_hist
import datetime as dt
from matplotlib import pyplot as plt
from sklearn import model_selection
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
import json
import plotly.express as px

In [54]:
def read_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)

In [55]:
def date_parser(dt):
    dt = dt.split(',')
    year = int(dt[0])
    month = int(dt[1])
    day = int(dt[2])
    return year, month, day

In [56]:
config_file = read_json('config.json')
# print (config_file['symbol'])
# print (config_file['train_start_date'])
# print (config_file['train_end_date'])
# print (config_file['test_start_date'])
# print (config_file['test_end_date'])

In [57]:
symbol = config_file['symbol']

y, m, d = date_parser(config_file['train_start_date'])
train_start_date = dt.datetime(y,m,d)

y, m, d = date_parser(config_file['train_end_date'])
train_end_date = dt.datetime(y,m,d)

y, m, d = date_parser(config_file['test_start_date'])
test_start_date = dt.datetime(y,m,d)

y, m, d = date_parser(config_file['test_end_date'])
test_end_date = dt.datetime(y,m,d)

n_step = int(config_file['n_step']) #number of step or values for feature

date_col = config_file['date_col'] # date column

feature_col_1 = config_file['feature_col_1'] # Open column

optimizer = config_file['optimizer'] #adam

loss = config_file['loss'] #mean_squared_error

epochs = int(config_file['epochs']) #15

batch_size = int(config_file['batch_size']) #32

In [58]:
#Setting start and end dates and fetching the historical data

# stk_data = stock_hist(symbol=symbol,start=train_start_date,end=train_end_date)
stk_data = pd.read_csv('SBIN_dataset.csv')

In [59]:
stk_data.head()

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
0,2015-01-01,SBIN,EQ,311.85,312.45,315.0,310.7,314.0,314.0,313.67,6138488,192548900000000.0,58688,1877677,0.3059
1,2015-01-02,SBIN,EQ,314.0,314.35,318.3,314.35,315.6,315.25,316.8,9935094,314738900000000.0,79553,4221685,0.4249
2,2015-01-05,SBIN,EQ,315.25,316.25,316.8,312.1,312.8,312.75,313.84,9136716,286743200000000.0,88236,3845173,0.4208
3,2015-01-06,SBIN,EQ,312.75,310.0,311.1,298.7,299.9,299.9,305.14,15329257,467760100000000.0,169268,7424847,0.4844
4,2015-01-07,SBIN,EQ,299.9,300.0,302.55,295.15,301.4,300.15,299.95,15046745,451324300000000.0,147185,5631400,0.3743


In [60]:
stk_data.tail()

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
1732,2021-12-30,SBIN,EQ,454.4,452.75,454.9,448.5,453.05,451.7,451.71,28879190,1304510000000000.0,198099,14825094,0.5133
1733,2021-12-31,SBIN,EQ,451.7,454.25,461.4,453.1,460.2,460.45,459.07,10005842,459337200000000.0,128788,3150588,0.3149
1734,2022-01-03,SBIN,EQ,460.45,462.0,472.0,460.1,471.5,470.8,467.17,13124509,613138800000000.0,168001,4183151,0.3187
1735,2022-01-04,SBIN,EQ,470.8,472.5,484.7,471.05,483.75,483.5,479.89,23296671,1117985000000000.0,246431,7667089,0.3291
1736,2022-01-05,SBIN,EQ,483.5,481.9,495.0,479.15,491.7,492.4,489.5,24694169,1208792000000000.0,240463,8282621,0.3354


In [61]:
#Visualizing the fetched data
fig = px.line(stk_data, x='Date', y=["Open"])
fig.show()

In [62]:
#Data Preprocessing
stk_data['Date'] = stk_data.index
data2 = pd.DataFrame(columns = ['Date', 'Open'])
data2['Date'] = stk_data['Date']
data2['Open'] = stk_data['Open']


#Handling null values
data2.fillna(method='ffill', inplace=True)

train_set = data2.iloc[:, 1:2].values

sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(train_set)
X_train = []
y_train = []
for i in range(n_step, len(train_set)):
    X_train.append(training_set_scaled[i-n_step:i, 0])
    y_train.append(training_set_scaled[i, 0]) 
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [63]:
X_train.shape

(1677, 60, 1)

In [64]:
model = Sequential()
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
model.add(Dense(units = 1))

In [65]:
model.compile(optimizer = optimizer, loss = loss)
model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fc7635e4e90>

In [66]:
testdataframe= stock_hist(symbol=symbol,start=test_start_date,end=test_end_date)
testdataframe['Date'] = testdataframe.index
testdata = pd.DataFrame(columns = ['Date', 'Open'])
testdata['Date'] = testdataframe['Date']
testdata['Open'] = testdataframe['Open']

#Handling null values
testdata.fillna(method='ffill', inplace=True)
testdata.shape

(251, 2)

In [67]:
real_stock_price = testdata.iloc[:, 1:2].values
dataset_total = pd.concat((data2['Open'], testdata['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(testdata) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(n_step, len(inputs)):
    X_test.append(inputs[i-n_step:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [68]:
X_test.shape

(251, 60, 1)

In [69]:
predicted_stock_price = model.predict(X_test)
print (predicted_stock_price.shape)

(251, 1)


In [70]:
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [71]:
concat_df = pd.DataFrame(columns = ['Date', 'Predicted','Open'])
concat_df['Date'] = testdata.index
#unwrapping list data to numbers
predicted_numbers =[item[0] for item in predicted_stock_price.tolist()]

concat_df['Predicted'] = predicted_numbers

concat_df['Open'] = real_stock_price

concat_df.head()

Unnamed: 0,Date,Predicted,Open
0,2021-01-01,449.224579,274.9
1,2021-01-04,445.848541,281.85
2,2021-01-05,432.303558,278.05
3,2021-01-06,409.30835,283.0
4,2021-01-07,380.282257,289.0


In [72]:
concat_df.tail()
# concat_df.to_csv('predicted_dataset.csv')


Unnamed: 0,Date,Predicted,Open
246,2021-12-30,444.962891,452.75
247,2021-12-31,443.897888,454.25
248,2022-01-03,443.718353,462.0
249,2022-01-04,444.346832,472.5
250,2022-01-05,446.065033,481.9


In [73]:
fig = px.line(concat_df, x='Date', y=["Open",'Predicted'], template = 'plotly_dark')
# add a vertical rectange for test-set separation
# fig.add_vrect(x0="2020-01-01", x1="2022-01-05", fillcolor="grey", opacity=0.25, line_width=0)
fig.show()