In [10]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MaxAbsScaler,RobustScaler,MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow as tf

import plotly.graph_objects as go

yfinance: This library is used to download historical market data from Yahoo Finance.
numpy: Provides support for large multi-dimensional arrays and matrices, along with mathematical functions to operate on these arrays.
pandas: A powerful data manipulation tool for data analysis, providing data structures like DataFrames.
matplotlib.pyplot: A plotting library for creating static, interactive, and animated visualizations in Python.
sklearn.preprocessing: Includes different scalers (MaxAbsScaler, RobustScaler, MinMaxScaler) for normalizing data before feeding it into the model.
tensorflow:A linear stack of layers for building a neural network model in Keras AND Includes LSTM, Dense, and Dropout layers which are used to construct the neural network.
plotly.graph_objects: A library for creating interactive and dynamic plots, useful for more complex visualizations.

In [11]:
ticker = 'AAPL'
train_str_date = "2017-01-05"
train_end_date = "2024-06-23"
data = yf.download(ticker, start=train_str_date, end=train_end_date)

[*********************100%%**********************]  1 of 1 completed


In [14]:
data = data[['Adj Close']]
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

In [15]:
# Split the data into training and testing sets


time_step = 50


train_data = scaled_data[:int(len(scaled_data) * 0.6)]
test_data = scaled_data[int(len(scaled_data) * 0.6):-time_step-23-30]
final_data = scaled_data[-time_step-23-30:]




def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)


X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [28]:

model = Sequential()
model.add(LSTM(units=196, return_sequences=True, input_shape=(time_step, 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=196, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.summary()

# Compile the model
model.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=0.9))


# Train the model
history = model.fit(X_train, y_train, epochs=12, batch_size=8, validation_data=(X_test, y_test), verbose=1)


Epoch 1/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 71ms/step - loss: 0.0016 - val_loss: 0.0024
Epoch 2/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 69ms/step - loss: 5.2771e-04 - val_loss: 0.0010
Epoch 3/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 6.6320e-04 - val_loss: 5.4922e-04
Epoch 4/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 3.7437e-04 - val_loss: 2.8555e-04
Epoch 5/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 4.0980e-04 - val_loss: 0.0174
Epoch 6/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 7.6500e-04 - val_loss: 2.3632e-04
Epoch 7/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 3.4677e-04 - val_loss: 3.1031e-04
Epoch 8/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 3.0944e-04 - val_

In [29]:
# Plot training & validation loss values using plotly
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig_loss.update_layout(title='Model Loss', xaxis_title='Epoch', yaxis_title='Loss')
fig_loss.show()

In [30]:
# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)


train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

#helps in ploting
y_train_actual = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

train_size = len(train_data)

# Plot actual vs predicted values
fig_prices = go.Figure()
fig_prices.add_trace(go.Scatter(x=data.index[time_step:train_size], y=y_train_actual.flatten(), mode='lines', name='Train Actual'))
fig_prices.add_trace(go.Scatter(x=data.index[train_size+time_step:], y=y_test_actual.flatten(), mode='lines', name='Test Actual'))
fig_prices.add_trace(go.Scatter(x=data.index[time_step:train_size], y=train_predict.flatten(), mode='lines', name='Train Predict'))
fig_prices.add_trace(go.Scatter(x=data.index[train_size+time_step:], y=test_predict.flatten(), mode='lines', name='Test Predict'))
fig_prices.update_layout(title='Stock Price Prediction', xaxis_title='Date', yaxis_title='Price')
fig_prices.show()

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


Predictions for tomorrow are made using only today's available data, ensuring the model relies solely on present-day information

In [31]:
# Download new data after 2021

new_data = yf.download(ticker, start='2024-03-11', end='2024-06-23')
new_data = new_data[['Adj Close']]
new_scaled_data = scaler.transform(new_data)

# Create dataset for new data
X_new, y_new = create_dataset(new_scaled_data, time_step)
X_new = X_new.reshape(X_new.shape[0], X_new.shape[1], 1)

# Predict on new data
new_predict = model.predict(X_new)
new_predict = scaler.inverse_transform(new_predict)
y_new_actual = scaler.inverse_transform(y_new.reshape(-1, 1))

# Plot actual vs predicted values for new data using plotly
fig_new = go.Figure()
fig_new.add_trace(go.Scatter(x=new_data.index[time_step:], y=y_new_actual.flatten(), mode='lines', name='Actual'))
fig_new.add_trace(go.Scatter(x=new_data.index[time_step:], y=new_predict.flatten(), mode='lines', name='Predicted'))
fig_new.update_layout(title='Stock Price Prediction for New Data', xaxis_title='Date', yaxis_title='Price')
fig_new.show()

# Calculate loss on new data
new_loss = model.evaluate(X_new, y_new)
print(f"Loss on new data: {new_loss}")

[*********************100%%**********************]  1 of 1 completed

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.0080
Loss on new data: 0.008010901510715485


The model is designed to predict stock prices for the next n days (Type 2 forecasting), focusing on short- to mid-term trends

In [32]:

# Predict the next n days
n_days = 7
x_input = test_data[-time_step:].reshape(1, -1)
temp_input = list(x_input[0])
lst_output = []

for i in range(n_days):
    if len(temp_input) > time_step:
        x_input = np.array(temp_input[1:])
        x_input = x_input.reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.append(yhat[0][0])
        temp_input = temp_input[1:]
        lst_output.append(yhat[0][0])
    else:
        x_input = x_input.reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.append(yhat[0][0])
        lst_output.append(yhat[0][0])


lst_output = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))

# Plot predictions for the next n days
fig_future = go.Figure()
fig_future.add_trace(go.Scatter(y=lst_output.flatten(), mode='lines', name='Predicted'))
fig_future.update_layout(title=f'Stock Price Prediction for Next {n_days} Days', xaxis_title='Day', yaxis_title='Price')
fig_future.show()