In [1]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.63.tar.gz (26 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 10.1 MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.63-py2.py3-none-any.whl size=23919 sha256=8aebea4b8969a34d3a72623cb6d3ec03a1db8f90274e6209bfb2f867d2e1f7dd
  Stored in directory: /root/.cache/pip/wheels/fe/87/8b/7ec24486e001d3926537f5f7801f57a74d181be25b11157983
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.3 yfinance-0.1.63


Loading main data

In [2]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [3]:
data = yf.download("AMZN" , start = "2019-01-01" , interval = '1d')

# interval = '1d' will each(1) day data per row

[*********************100%***********************]  1 of 1 completed


In [4]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,1465.199951,1553.359985,1460.930054,1539.130005,1539.130005,7983100
2019-01-03,1520.01001,1538.0,1497.109985,1500.280029,1500.280029,6975600
2019-01-04,1530.0,1594.0,1518.310059,1575.390015,1575.390015,9182600


In [5]:
# Sort the data points based on indexes just for confirmation 
data.sort_index(inplace = True)

In [6]:
# Remove any duplicate index 
data = data.loc[~data.index.duplicated(keep='first')]

In [7]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-07-21,3576.379883,3586.449951,3543.639893,3585.199951,3585.199951,2305400
2021-07-22,3587.22998,3640.02002,3582.27002,3638.030029,3638.030029,3259600
2021-07-23,3640.0,3665.98999,3622.040039,3656.639893,3656.639893,2436292


In [8]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,1465.199951,1553.359985,1460.930054,1539.130005,1539.130005,7983100
2019-01-03,1520.01001,1538.0,1497.109985,1500.280029,1500.280029,6975600
2019-01-04,1530.0,1594.0,1518.310059,1575.390015,1575.390015,9182600
2019-01-07,1602.310059,1634.560059,1589.189941,1629.51001,1629.51001,7993200
2019-01-08,1664.689941,1676.609985,1616.609985,1656.579956,1656.579956,8881400


In [9]:
# Check for missing values 
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [10]:
# Get the statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,645.0,645.0,645.0,645.0,645.0,645.0
mean,2465.29901,2491.890958,2436.675255,2465.084678,2465.084678,4218961.0
std,689.78885,698.284233,678.67883,687.539185,687.539185,1777865.0
min,1465.199951,1538.0,1460.930054,1500.280029,1500.280029,881300.0
25%,1814.630005,1829.469971,1800.790039,1817.459961,1817.459961,2974100.0
50%,2200.469971,2292.0,2186.209961,2283.320068,2283.320068,3759100.0
75%,3181.01001,3208.540039,3135.26001,3175.110107,3175.110107,5056200.0
max,3744.0,3773.080078,3696.790039,3731.409912,3731.409912,15567300.0


Understanding Trends with in the Data

In [11]:
import plotly.graph_objects as go

In [12]:
# Check the trend in Closing Values 
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900, 
                  xaxis_title='Date' , yaxis_title='Close')
fig.show()

In [13]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900, 
                  xaxis_title='Date' , yaxis_title='Volume')
fig.show()

Data Preparation

In [14]:
from sklearn.preprocessing import MinMaxScaler 
import pickle 
from tqdm.notebook import tnrange

In [15]:
# Filter only required data 
data = data[['Close' , 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-02,1539.130005,7983100
2019-01-03,1500.280029,6975600
2019-01-04,1575.390015,9182600


Scrapping extra information

In [16]:
import requests 

# RSI value for AMZN stock in daily interval
response = requests.get('https://www.alphavantage.co/query?function=RSI&symbol=AMZN&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM') 
response = response.json()  # converting data format in JSON

In [17]:
response.keys()

dict_keys(['Meta Data', 'Technical Analysis: RSI'])

In [18]:
rsi_data = pd.DataFrame.from_dict(response['Technical Analysis: RSI'] , orient='index')

In [19]:
rsi_data.head()

Unnamed: 0,RSI
1999-11-08,71.109
1999-11-09,49.8042
1999-11-10,52.7339
1999-11-11,55.4644
1999-11-12,60.9366


In [20]:
rsi_data = rsi_data[rsi_data.index >= '2018-01-01']

In [21]:
rsi_data['RSI'] = rsi_data['RSI'].astype(np.float64)  # ESI indicator should be a floating type value so .astype(np.float64) used here

In [22]:
rsi_data.head()

Unnamed: 0,RSI
2018-01-02,62.1117
2018-01-03,72.182
2018-01-04,75.1156
2018-01-05,83.165
2018-01-08,87.682


In [23]:
data = data.merge(rsi_data, left_index=True, right_index=True, how='inner')

In [24]:
data.head()

Unnamed: 0,Close,Volume,RSI
2019-01-02,1539.130005,7983100,63.9016
2019-01-03,1500.280029,6975600,51.4199
2019-01-04,1575.390015,9182600,66.9981
2019-01-07,1629.51001,7993200,74.3938
2019-01-08,1656.579956,8881400,77.5406


In [25]:
# Confirm the Testing Set length 
test_length = data[(data.index >= '2021-03-01')].shape[0]

In [26]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length): 
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [27]:
X , Y = CreateFeatures_and_Targets(data , 32)

HBox(children=(FloatProgress(value=0.0, max=613.0), HTML(value='')))




In [28]:
# Check the shapes
X.shape , Y.shape

((613, 32, 3), (613,))

In [29]:
Xtrain , Xtest , Ytrain , Ytest = X[:-test_length] , X[-test_length:] , Y[:-test_length] , Y[-test_length:]

In [30]:
# Check Training Dataset Shape 
Xtrain.shape , Ytrain.shape

((511, 32, 3), (511,))

In [31]:
# Check Testing Dataset Shape
Xtest.shape , Ytest.shape

((102, 32, 3), (102,))

In [32]:
# Create a Scaler to Scale Vectors with Multiple Dimensions 
class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self , X):
        total_dims = X.shape[2]
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, :, i] = Scaler.fit_transform(X[:,:,i])
            self.scalers.append(Scaler)
        return X

    def transform(self , X):
        for i in range(X.shape[2]):
            X[:, :, i] = self.scalers[i].transform(X[:,:,i])
        return X 

In [33]:
Feature_Scaler = MultiDimensionScaler()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [34]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [35]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [36]:
# Save your objects for future purposes 
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

Model Building

In [37]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=5, min_lr=0.00001,verbose = 1)

In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional , BatchNormalization

model = Sequential()

model.add(Bidirectional(LSTM(512 ,return_sequences=True , recurrent_dropout=0.1, input_shape=(32, 3))))
model.add(LSTM(256 ,recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32 , activation='elu'))
model.add(Dense(1 , activation='linear'))



In [39]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [40]:
history = model.fit(Xtrain, Ytrain,
            epochs=10,
            batch_size = 1,
            verbose=1,
            shuffle=False ,
            validation_data=(Xtest , Ytest),
            callbacks=[reduce_lr , save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
# Load the best weights
model.load_weights("best_weights.h5")

Visualize prediction on Test Set

In [42]:
Predictions = model.predict(Xtest)

In [43]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [44]:
Predictions.shape

(102, 1)

In [45]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [46]:
# Check the Predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

Visualize Prediction on whole data

In [47]:
Total_features = np.concatenate((Xtrain , Xtest) , axis = 0)

In [48]:
Total_Targets = np.concatenate((Ytrain , Ytest) , axis = 0)

In [49]:
Predictions = model.predict(Total_features)

In [50]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [51]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [52]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [53]:
# Save and Load the whole model
model.save("Model.h5")
loaded_model = tf.keras.models.load_model("Model.h5")



Realtime Prediction

In [54]:
def PredictStockPrice(Model , DataFrame , PreviousDate , feature_length = 32):
    idx_location = DataFrame.index.get_loc(PreviousDate)
    Features = DataFrame.iloc[idx_location - feature_length : idx_location,:].values
    Features = np.expand_dims(Features , axis = 0)
    Features = Feature_Scaler.transform(Features)
    Prediction = Model.predict(Features)
    Prediction = Target_Scaler.inverse_transform(Prediction)
    return Prediction[0][0]

In [55]:
PredictStockPrice(loaded_model , data , '2021-08-1')

3211.026