In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### **Train an LSTM model for stock recommendation**

In [6]:
# Importing neccessary libraries.

import pandas as pd
import numpy as np
from collections import deque
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import random
import warnings
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,Dropout,BatchNormalization,Flatten

warnings.filterwarnings("ignore")

In [7]:
# Initialize constants

# Timestep is the number of previous days to consider for predicting next days price.
# Essentially Timestep = No. of features for the algorithm
timestep = 30

# future_predict is the value to be predicted, eg: value after 1 day, after 2 days,etc.

future_predict = 1

epochs = 10
batch_size = 128

##### **Data preparation**

In [8]:
dataset = pd.read_csv("/content/drive/MyDrive/RELIANCE.csv")
dataset.head()

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
0,2000-01-03,RELIANCE,EQ,233.05,237.5,251.7,237.5,251.7,251.7,249.37,4456424,111131900000000.0,,,
1,2000-01-04,RELIANCE,EQ,251.7,258.4,271.85,251.3,271.85,271.85,263.52,9487878,250022200000000.0,,,
2,2000-01-05,RELIANCE,EQ,271.85,256.65,287.9,256.65,286.75,282.5,274.79,26833684,737369700000000.0,,,
3,2000-01-06,RELIANCE,EQ,282.5,289.0,300.7,289.0,293.5,294.35,295.45,15682286,463325400000000.0,,,
4,2000-01-07,RELIANCE,EQ,294.35,295.0,317.9,293.0,314.5,314.55,308.91,19870977,613838800000000.0,,,


In [9]:
# Will predict only the "Close" price.

data = dataset.filter(["Close"], axis = 1)

data.shape

(5075, 1)

In [10]:
# Taking only the last 30 days.

data = data[-30:]
data.shape

(30, 1)

In [11]:
def predict(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

def preprocess(df):
    df.drop("Future",inplace = True,axis = 1)
    df["Close"] = df["Close"].pct_change()
    df.dropna(inplace = True)
    df["Close"] = preprocessing.scale(df["Close"])
    seq_data = []
    prev_days = deque(maxlen = timestep)
    for i in tqdm(df.values):
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == timestep:
            seq_data.append([np.array(prev_days),i[-1]])
        random.shuffle(seq_data)
        X =[]
        y = []
        for seq,target in seq_data:
            X.append(seq)
            y.append(target)
    return np.array(X),np.array(y)

In [12]:
# Creating a column with future day value.
data["Future"] = data["Close"].shift(-future_predict)

data["Target"] = list(map(predict, data["Close"], data["Future"]))

data.head()

Unnamed: 0,Close,Future,Target
5045,1168.05,1224.0,1
5046,1224.0,1243.8,1
5047,1243.8,1237.35,0
5048,1237.35,1363.6,1
5049,1363.6,1370.9,1


In [13]:
data, data_test = train_test_split(data,test_size = 0.10,shuffle = False)

data

Unnamed: 0,Close,Future,Target
5045,1168.05,1224.0,1
5046,1224.0,1243.8,1
5047,1243.8,1237.35,0
5048,1237.35,1363.6,1
5049,1363.6,1370.9,1
5050,1370.9,1417.0,1
5051,1417.0,1429.75,1
5052,1429.75,1428.15,0
5053,1428.15,1426.95,0
5054,1426.95,1466.0,1


In [14]:
X_train,y_train = preprocess(data)
X_test,y_test = preprocess(data_test)

100%|██████████| 26/26 [00:00<00:00, 32446.27it/s]
100%|██████████| 2/2 [00:00<00:00, 4253.86it/s]


##### **Model architecture and training**

In [None]:
model = Sequential()

model.add(LSTM(128,input_shape = (X_train.shape[1:]),return_sequences = True,activation = "tanh")) 
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(LSTM(128,activation = "tanh",return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128,activation = "tanh"))
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(Dense(32,activation = "relu"))
model.add(Dropout(0.2))

model.add(Dense(2,activation = "softmax"))

opt = tf.keras.optimizers.Adam(lr=0.001,decay = 1e-6)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 128)           66560     
_________________________________________________________________
dropout (Dropout)            (None, 30, 128)           0         
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 128)           512       
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 128)           0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 30, 128)           512       
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               1

In [None]:
model.compile(loss = "sparse_categorical_crossentropy",optimizer= opt ,metrics = "accuracy")


In [None]:
model.fit(X_train,y_train,validation_data = (X_test,y_test),epochs = epochs,batch_size = batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x239bbc51850>

In [None]:
model.save("LSTM_stock")



INFO:tensorflow:Assets written to: LSTM_stock\assets


INFO:tensorflow:Assets written to: LSTM_stock\assets
