In [42]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.losses import Huber


In [None]:

import plotly.graph_objects as go

 fetching the historical stock data for Apple Inc. from January 5, 2017, to June 23, 2024. This data will be used to train the GRU model.

In [33]:
ticker = 'AAPL'
train_str_date = "2017-01-05"
train_end_date = "2024-06-23"
data = yf.download(ticker, start=train_str_date, end=train_end_date)
data = data[['Adj Close']]

[*********************100%%**********************]  1 of 1 completed


Although I tried using log returns, the model predicted almost constant values.

so,back to normal price 

In [34]:
# Calculate log returns
#log_returns = np.log(data['Adj Close'] / data['Adj Close'].shift(1)).dropna()


scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)


In [35]:
time_step = 50

train_data = scaled_data[:int(len(scaled_data) * 0.6)]
test_data = scaled_data[int(len(scaled_data) * 0.6):-time_step-23-30]
final_data = scaled_data[-time_step-23-30:]

def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)


X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


**This architecture is adapted from the work "Stock Market Prediction Using LSTM Recurrent Neural Network" by Adil MOGHAR and Mhamed HAMICHE, with modifications to the layer type for enhanced performance**

1.**batch_size** = **8**

2.**loss** used : **Huber**

3.**Epochs** = 12(optimal performance observed between 12, 25, and 50 epochs)




In [37]:
model = Sequential()
model.add(GRU(units=196, return_sequences=True, input_shape=(time_step, 1),activation='tanh'))
model.add(Dropout(0.2))
model.add(GRU(units=196, return_sequences=True,activation='tanh'))
model.add(Dropout(0.2))
model.add(GRU(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=96, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.summary()

# Compile the model
model.compile(optimizer='adam', loss=Huber(delta=1.9))


# Train the model
history = model.fit(X_train, y_train, epochs=12, batch_size=8, validation_data=(X_test, y_test), verbose=1)


Epoch 1/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 85ms/step - loss: 0.0038 - val_loss: 0.0020
Epoch 2/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 86ms/step - loss: 4.4118e-04 - val_loss: 0.0020
Epoch 3/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 84ms/step - loss: 7.7276e-04 - val_loss: 3.5074e-04
Epoch 4/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 84ms/step - loss: 4.5459e-04 - val_loss: 3.6568e-04
Epoch 5/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 83ms/step - loss: 3.9702e-04 - val_loss: 4.9262e-04
Epoch 6/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 82ms/step - loss: 3.7865e-04 - val_loss: 2.2876e-04
Epoch 7/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 82ms/step - loss: 5.9648e-04 - val_loss: 0.0018
Epoch 8/12
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 82ms/step - loss: 4.4010e-04

In [38]:
# Plot training & validation loss values using plotly
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig_loss.update_layout(title='Model Loss', xaxis_title='Epoch', yaxis_title='Loss')
fig_loss.show()


In [39]:
# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)


train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

#helps in ploting
y_train_actual = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

train_size = len(train_data)

# Plot actual vs predicted values
fig_prices = go.Figure()
fig_prices.add_trace(go.Scatter(x=data.index[time_step:train_size], y=y_train_actual.flatten(), mode='lines', name='Train Actual'))
fig_prices.add_trace(go.Scatter(x=data.index[train_size+time_step:], y=y_test_actual.flatten(), mode='lines', name='Test Actual'))
fig_prices.add_trace(go.Scatter(x=data.index[time_step:train_size], y=train_predict.flatten(), mode='lines', name='Train Predict'))
fig_prices.add_trace(go.Scatter(x=data.index[train_size+time_step:], y=test_predict.flatten(), mode='lines', name='Test Predict'))
fig_prices.update_layout(title='Stock Price Prediction', xaxis_title='Date', yaxis_title='Price')
fig_prices.show()

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


Predictions for tomorrow are made using only today's available data, ensuring the model relies solely on present-day information

In [40]:
# Download new data after 2021

new_data = yf.download(ticker, start='2024-03-11', end='2024-06-23')
new_data = new_data[['Adj Close']]
new_scaled_data = scaler.transform(new_data)

# Create dataset for new data
X_new, y_new = create_dataset(new_scaled_data, time_step)
X_new = X_new.reshape(X_new.shape[0], X_new.shape[1], 1)

# Predict on new data
new_predict = model.predict(X_new)
new_predict = scaler.inverse_transform(new_predict)
y_new_actual = scaler.inverse_transform(y_new.reshape(-1, 1))

# Plot actual vs predicted values for new data using plotly
fig_new = go.Figure()
fig_new.add_trace(go.Scatter(x=new_data.index[time_step:], y=y_new_actual.flatten(), mode='lines', name='Actual'))
fig_new.add_trace(go.Scatter(x=new_data.index[time_step:], y=new_predict.flatten(), mode='lines', name='Predicted'))
fig_new.update_layout(title='Stock Price Prediction for New Data', xaxis_title='Date', yaxis_title='Price')
fig_new.show()

# Calculate loss on new data
new_loss = model.evaluate(X_new, y_new)
print(f"Loss on new data: {new_loss}")

[*********************100%%**********************]  1 of 1 completed

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 7.4896e-04
Loss on new data: 0.0007489630370400846


The model is designed to predict stock prices for the next n days (Type 2 forecasting), focusing on short- to mid-term trends.

In [41]:

# Predict the next n days
n_days = 7
x_input = test_data[-time_step:].reshape(1, -1)
temp_input = list(x_input[0])
lst_output = []

for i in range(n_days):
    if len(temp_input) > time_step:
        x_input = np.array(temp_input[1:])
        x_input = x_input.reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.append(yhat[0][0])
        temp_input = temp_input[1:]
        lst_output.append(yhat[0][0])
    else:
        x_input = x_input.reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.append(yhat[0][0])
        lst_output.append(yhat[0][0])


lst_output = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))

# Plot predictions for the next n days
fig_future = go.Figure()
fig_future.add_trace(go.Scatter(y=lst_output.flatten(), mode='lines', name='Predicted'))
fig_future.update_layout(title=f'Stock Price Prediction for Next {n_days} Days', xaxis_title='Day', yaxis_title='Price')
fig_future.show()