Stock price prediction using LSTM

Import library to collect and transfor data

In [1]:
import pandas as pd
import yfinance as yf
import datetime
from datetime import date, timedelta
today = date.today()


Download and Transform the data

In [2]:
#set a 500days (13 years) time frame to build the dataset on
d1 = today.strftime("%Y-%m-%d")
end_date = d1
d2 = date.today() - timedelta(days=5000)
d2 = d2.strftime("%Y-%m-%d")
start_date = d2

In [13]:
df = yf.download(tickers='ETH-USD', start=end_date, end=end_date, progress=False)
df['Date'] = df.index

In [16]:
type(df)

pandas.core.frame.DataFrame

In [14]:
tickers = ['BTC-USD', 'ETH-USD', 'USDT-USD', 'BNB-USD', 'USDC-USD']
datasets = []

## develop dataset for each ticker
for ticker in tickers:
    df = yf.download(tickers=ticker, start=start_date, end=end_date, progress=False)
    #set date as index and move to new dataframe
    df['Date'] = df.index
    df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
    df.reset_index(inplace=True, drop=True)
    #append ticker dataframe to datasets list
    datasets.append(df)

In [21]:

datasets[1].tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1611,2022-04-08,3233.272461,3301.607422,3179.142334,3192.073975,3192.073975,17557050669
1612,2022-04-09,3191.976074,3261.963135,3187.469238,3261.91626,3261.91626,9908112156
1613,2022-04-10,3261.291504,3303.003174,3211.866943,3211.866943,3211.866943,10427054790
1614,2022-04-11,3209.576904,3214.461914,2962.756592,2981.052246,2981.052246,21891804831
1615,2022-04-12,2981.420654,3077.452637,2957.872314,3030.376465,3030.376465,20235707410


Create a Candlestick chart to see the increase and decrease of the stock price

In [None]:
# candlestick chart to have a better view of the increase and decrease of the stock price
import plotly.graph_objects as go
figure = go.Figure(data=[go.Candlestick(x=df['Date'], open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'])])
figure.update_layout(title='AAPL Stock Price Analysis', xaxis_title='Date', yaxis_title='Price', xaxis_rangeslider_visible=False)
figure.show()


Check the correlation of the variable with Close

In [22]:
# correlation with the Close column
for ticker in datasets:
    correl = ticker.corr()
    print(correl['Close'].sort_values(ascending=False))


Adj Close    1.000000
Close        1.000000
High         0.999498
Low          0.999393
Open         0.998838
Volume       0.722272
Name: Close, dtype: float64
Adj Close    1.000000
Close        1.000000
Low          0.998984
High         0.998957
Open         0.997937
Volume       0.549925
Name: Close, dtype: float64
Adj Close    1.000000
Close        1.000000
Open         0.677443
High         0.621918
Low          0.456403
Volume      -0.130490
Name: Close, dtype: float64
Adj Close    1.000000
Close        1.000000
High         0.998892
Low          0.998796
Open         0.997642
Volume       0.736578
Name: Close, dtype: float64
Adj Close    1.000000
Close        1.000000
Open         0.778438
Low          0.553664
High         0.190261
Volume      -0.017885
Name: Close, dtype: float64


Trainning LSTM for stock price prediction

In [24]:
#separate data into features and labels
x_data = []
y_data = []

for data in datasets:
    x_data.append(data[['Open', 'High', 'Low', 'Volume']].to_numpy())
    y_data.append(data['Close'].to_numpy().reshape(-1, 1))



Training and the testing of the entire dataset of all tickers will be done in batches

In [30]:
#import necessary libraries for training
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Dropout
from keras.metrics import MeanAbsoluteError
from keras.callbacks import EarlyStopping

from sklearn.metrics import mean_absolute_error as MAE

from sklearn.model_selection import train_test_split

Prepare a neural network architecture for LSTM

In [31]:
def train_model(split_data, epochs=100):
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(split_data[0].shape[1], 1)))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(25))
    model.add(Dense(1))
    model.summary()

    # Compile the model
    model.compile(optimizer="adam", 
                loss="mean_absolute_error", 
                metrics=['mean_absolute_error'])

    # early stopping
    early_stopping = EarlyStopping(
                        monitor='val_loss', patience=10, restore_best_weights=True, verbose=1
                    )

    # Train the model
    model.fit(split_data[0], split_data[2], batch_size=1, epochs=100, callbacks=[early_stopping], validation_data=(split_data[1], split_data[3]))

    return model

Train our neural network model for stock price prediction

In [32]:
models = []
eval_score = []

for (xdata, ydata) in zip(x_data, y_data):
    #split data into training and testing
    split_data = train_test_split(xdata, ydata, test_size=0.2, random_state=42)
    #train model
    model = train_model(split_data)
    models.append(model)
    #evaluate performance
    preds = model.predict(split_data[1])
    eval_score.append(MAE(split_data[3], preds))


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 4, 128)            66560     
                                                                 
 lstm_3 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 25)                1625      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 117,619
Trainable params: 117,619
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


Test giving input values according to the features that we have used to train this model and predicting the final result of the prediction of **Close value**


In [33]:
eval_score

[7784.548754479958,
 136.31814167823322,
 0.004122312790081825,
 7.789193747956076,
 0.003766464815993253]

In [42]:
scores = list(zip(tickers, eval_score))
print(scores)

[('BTC-USD', 7784.548754479958), ('ETH-USD', 136.31814167823322), ('USDT-USD', 0.004122312790081825), ('BNB-USD', 7.789193747956076), ('USDC-USD', 0.003766464815993253)]


In [46]:
import numpy as np
# features = [Open, High, Low, Volume]
features = np.array([[413.93, 421.69, 411.08, 1924450176]])
models[3].predict(features) # return the Close value

array([[432.2462]], dtype=float32)

In [35]:
#save the model for inference
for idx, model in enumerate(models):
    model.save(tickers[idx]+"_no_adjustments")



INFO:tensorflow:Assets written to: BTC-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: BTC-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: ETH-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: ETH-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: USDT-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: USDT-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: BNB-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: BNB-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: USDC-USD_no_adjustments\assets


INFO:tensorflow:Assets written to: USDC-USD_no_adjustments\assets


In [None]:
import tensorflow as tf


In [None]:
new_model = tf.keras.models.load_model('saved_model')

In [None]:
new_preds = new_model.predict(x_test)
print(score(y_test, new_preds))