In [34]:
# Importing Pandas Library
import pandas as pd

In [35]:
# Loading the Dataset
data = pd.read_csv('apple_stock_data.csv')

In [36]:
# Checking the Top 5 rows of a Dataset
data.head()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.31752,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300


In [37]:
# Checking the Dataset Information
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       252 non-null    object 
 1   Adj Close  252 non-null    float64
 2   Close      252 non-null    float64
 3   High       252 non-null    float64
 4   Low        252 non-null    float64
 5   Open       252 non-null    float64
 6   Volume     252 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 13.9+ KB


In [38]:
# Converting Date Column of the Dataset from Object to Date
data['Date']= pd.to_datetime(data['Date'])

In [39]:
# Checking in the column is transformed correctly
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   Date       252 non-null    datetime64[ns, UTC]
 1   Adj Close  252 non-null    float64            
 2   Close      252 non-null    float64            
 3   High       252 non-null    float64            
 4   Low        252 non-null    float64            
 5   Open       252 non-null    float64            
 6   Volume     252 non-null    int64              
dtypes: datetime64[ns, UTC](1), float64(5), int64(1)
memory usage: 13.9 KB


In [40]:
# Set Date as Index Column
data.set_index('Date', inplace = True)

In [41]:
data = data[['Close']]

In [42]:
data

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,177.570007
2023-11-03 00:00:00+00:00,176.649994
2023-11-06 00:00:00+00:00,179.229996
2023-11-07 00:00:00+00:00,181.820007
2023-11-08 00:00:00+00:00,182.889999
...,...
2024-10-28 00:00:00+00:00,233.399994
2024-10-29 00:00:00+00:00,233.669998
2024-10-30 00:00:00+00:00,230.100006
2024-10-31 00:00:00+00:00,225.910004


### Choosing the Hybrid Models

In [43]:
# Scaing the CLose price using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range= (0 , 1))
data['Close'] = scaler.fit_transform(data[['Close']])

In [44]:
data

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,0.175853
2023-11-03 00:00:00+00:00,0.162983
2023-11-06 00:00:00+00:00,0.199077
2023-11-07 00:00:00+00:00,0.235311
2023-11-08 00:00:00+00:00,0.250280
...,...
2024-10-28 00:00:00+00:00,0.956911
2024-10-29 00:00:00+00:00,0.960688
2024-10-30 00:00:00+00:00,0.910744
2024-10-31 00:00:00+00:00,0.852127


In [45]:
# Prepare the Data for LSTM by creating sequences of a defined length (e.g., 60 Days)

import numpy as np
def create_sequences(data, seq_length = 60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(data['Close'].values, seq_length)

In [47]:
# Now we will split the sequences into training and test sets (e.g., 80% for training and 20% for testing)

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [55]:
# Lets build LSTM model with layers to capture the temporal dependencies in the data

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

lstm_model = Sequential()
lstm_model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
lstm_model.add(LSTM(units = 50))
lstm_model.add(Dense(1))

In [57]:
# Now lets compile the model using an appropriate optimizer and loss function and fit it into the training Data

lstm_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
lstm_model.fit(X_train, y_train, epochs = 20, batch_size = 32)

Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 0.2049
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.0342
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0300
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0140
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0162
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.0127
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0132
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0098
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0093
Epoch 10/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0092
Epoch 11/20
[1m5/5

<keras.src.callbacks.history.History at 0x14073416390>

In [58]:
# Now let's train the second model. We will start by generating lagged features for Linear Regression 
# (e.g., using the past 3 days as predictors )


data['Lag_1'] = data['Close'].shift(1)
data['Lag_2'] = data['Close'].shift(2)
data['Lag_3'] = data['Close'].shift(3)
data = data.dropna()

In [59]:
# We will split the data accordingly for training and testing

X_lin = data[['Lag_1', 'Lag_2', 'Lag_3']]
y_lin = data['Close']
X_train_lin, X_test_lin, = X_lin[:train_size], X_lin[train_size:]
y_train_lin, y_test_lin = y_lin[:train_size], y_lin[train_size:]

In [60]:
# Now lets train the Linear Regression Model

from sklearn.linear_model import LinearRegression
lin_model = LinearRegression()
lin_model.fit(X_train_lin, y_train_lin)

In [61]:
# Now lets make predictions using LSTM on the test set and inverse transform the scaled predictions

X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
lstm_predictions = lstm_model.predict(X_test_lstm)
lstm_predictions = scaler.inverse_transform(lstm_predictions)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 451ms/step


In [62]:
# Here's how to generate predictions using Linear Regression and inverse transfrom them

lin_predictions = lin_model.predict(X_test_lin)
lin_predictions = scaler.inverse_transform(lin_predictions.reshape(-1,1))

In [65]:
# Here's how to use a weighted average to create hybrid predictions:

min_len = min(len(lstm_predictions), len(lin_predictions))

hybrid_predictions = (
    0.7 * lstm_predictions[:min_len] + 
    0.3 * lin_predictions[:min_len]
)


### Predicting Using the Hybrid Model

In [66]:
# Now lest make predictions for the next 10 days using our hybrid model

lstm_future_predictions = []
last_sequence = X[-1].reshape(1 , seq_length, 1)
for _ in range(10):
    lstm_pred = lstm_model.predict(last_sequence)[0,0]
    lstm_future_predictions.append(lstm_pred)
    lstm_pred_reshaped = np.array([[lstm_pred]]).reshape(1 , 1 , 1)
    last_sequence = np.append(last_sequence[:,1:,:], lstm_pred_reshaped, axis = 1)

lstm_future_predictions = scaler.inverse_transform(np.array(lstm_future_predictions).reshape(-1,1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step


In [67]:
# Here's how to predict the next 10 days using linear regression

recent_data = data['Close'].values[-3:]
lin_future_predictions = []
for _ in range(10):
    lin_pred = lin_model.predict(recent_data.reshape(1 , -1))[0]
    lin_future_predictions.append(lin_pred)
    recent_data = np.append(recent_data[1:], lin_pred)

lin_future_predictions = scaler.inverse_transform(np.array(lin_future_predictions).reshape(-1, 1))



In [68]:
hybrid_future_predictions = (0.7 * lstm_future_predictions) + (0.3 * lin_future_predictions)

In [69]:
# Here's how to create the final DataFrame to look at the predictions:

future_dates = pd.date_range(start= data.index[-1] + pd.Timedelta(days = 1), periods= 10)
predictions_df = pd.DataFrame({
    'Date': future_dates,
    'LSTM Predictions': lstm_future_predictions.flatten(),
    'Linear Regression Predictions': lin_future_predictions.flatten(),
    'Hybrid Model Predictions': hybrid_future_predictions.flatten()
})

In [70]:
predictions_df

Unnamed: 0,Date,LSTM Predictions,Linear Regression Predictions,Hybrid Model Predictions
0,2024-11-02 00:00:00+00:00,231.617172,230.355192,231.238577
1,2024-11-03 00:00:00+00:00,231.292221,225.707291,229.616744
2,2024-11-04 00:00:00+00:00,231.017746,222.703426,228.523446
3,2024-11-05 00:00:00+00:00,230.780548,230.631535,230.735847
4,2024-11-06 00:00:00+00:00,230.570969,225.48638,229.045588
5,2024-11-07 00:00:00+00:00,230.381485,222.494588,228.01542
6,2024-11-08 00:00:00+00:00,230.206436,230.930195,230.423559
7,2024-11-09 00:00:00+00:00,230.041824,225.245599,228.602961
8,2024-11-10 00:00:00+00:00,229.884735,222.284007,227.604514
9,2024-11-11 00:00:00+00:00,229.733261,231.252375,230.188991
