In [16]:
import pandas as pd
import numpy as np
import yfinance as yf
import json
import ta
import matplotlib.pyplot as pl
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
all_stocks = ['NVR', 'BKNG', 'CMG', 'AZO' ]

In [3]:
start_date = '2013-01-01'
end_date = '2023-03-31'

# Download the data
df = yf.download(all_stocks, start=start_date, end=end_date)['Adj Close']
df = df.reset_index()
# Save the data to a CSV file
df.to_csv('stocks_data.csv')

# Display the 1st 5 records
df.head(5)

[*********************100%%**********************]  4 of 4 completed


Ticker,Date,AZO,BKNG,CMG,NVR
0,2013-01-02,356.880005,635.308105,6.0212,930.130005
1,2013-01-03,356.140015,645.588013,6.019,949.0
2,2013-01-04,360.850006,645.269592,6.0036,954.48999
3,2013-01-07,356.170013,654.196167,5.9918,966.890015
4,2013-01-08,348.25,654.235962,5.9552,974.5


In [4]:
df

Ticker,Date,AZO,BKNG,CMG,NVR
0,2013-01-02,356.880005,635.308105,6.021200,930.130005
1,2013-01-03,356.140015,645.588013,6.019000,949.000000
2,2013-01-04,360.850006,645.269592,6.003600,954.489990
3,2013-01-07,356.170013,654.196167,5.991800,966.890015
4,2013-01-08,348.250000,654.235962,5.955200,974.500000
...,...,...,...,...,...
2574,2023-03-24,2329.399902,2487.225098,32.485001,5413.000000
2575,2023-03-27,2368.550049,2496.161865,32.956402,5375.450195
2576,2023-03-28,2405.219971,2532.793701,33.057999,5413.560059
2577,2023-03-29,2395.370117,2559.314453,33.134399,5433.990234


In [5]:
# plot the 4 stocks together 


df1 = pd.melt(df, id_vars='Date')
px.line(df1, x = 'Date', y = 'value', color='Ticker')

**The 4 stocks have a diverse range. We would take each stock one at a time**

In [6]:
df

Ticker,Date,AZO,BKNG,CMG,NVR
0,2013-01-02,356.880005,635.308105,6.021200,930.130005
1,2013-01-03,356.140015,645.588013,6.019000,949.000000
2,2013-01-04,360.850006,645.269592,6.003600,954.489990
3,2013-01-07,356.170013,654.196167,5.991800,966.890015
4,2013-01-08,348.250000,654.235962,5.955200,974.500000
...,...,...,...,...,...
2574,2023-03-24,2329.399902,2487.225098,32.485001,5413.000000
2575,2023-03-27,2368.550049,2496.161865,32.956402,5375.450195
2576,2023-03-28,2405.219971,2532.793701,33.057999,5413.560059
2577,2023-03-29,2395.370117,2559.314453,33.134399,5433.990234


**Install Optuna, Jupyterlab-Optuna, Optuna-Dashboard**

An open source hyperparameter optimization framework to automate hyperparameter search



In [11]:
df.head(2)

Ticker,Date,AZO,BKNG,CMG,NVR
0,2013-01-02,356.880005,635.308105,6.0212,930.130005
1,2013-01-03,356.140015,645.588013,6.019,949.0


In [12]:
# Normalize the data
data = df[['AZO']]

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Create the time series sequences
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        labels.append(data[i + seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 60  # e.g., use 60 days of data to predict the next day
X, y = create_sequences(scaled_data, seq_length)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [14]:
len(X)

2519

In [15]:
len(X[0])

60

**Build the Model**

In [None]:


def create_model(units=50, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, return_sequences=True, input_shape=(seq_length, 1)))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(units, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model