# **Import Module**

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import tensorflow as tf
import math
import random

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional, GRU, Attention
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers

warnings.filterwarnings('ignore')

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# **Download Data**

In [9]:
!wget --no-check-certificate https://raw.githubusercontent.com/rioooranteai/data-analytics---modeling/main/Apple%20Stock/Dataset/apple_stock_data.csv -O /content/apple_stock_data.csv

--2025-03-17 15:24:30--  https://raw.githubusercontent.com/rioooranteai/data-analytics---modeling/main/Apple%20Stock/Dataset/apple_stock_data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 31516 (31K) [text/plain]
Saving to: ‘/content/apple_stock_data.csv’


2025-03-17 15:24:30 (19.5 MB/s) - ‘/content/apple_stock_data.csv’ saved [31516/31516]



# **Read Data**

In [10]:
df = pd.read_csv('/content/apple_stock_data.csv')
df.head()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.31752,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       252 non-null    object 
 1   Adj Close  252 non-null    float64
 2   Close      252 non-null    float64
 3   High       252 non-null    float64
 4   Low        252 non-null    float64
 5   Open       252 non-null    float64
 6   Volume     252 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 13.9+ KB


In [12]:
df.describe()

Unnamed: 0,Adj Close,Close,High,Low,Open,Volume
count,252.0,252.0,252.0,252.0,252.0,252.0
mean,199.088202,199.454286,201.093056,197.608373,199.316032,58322140.0
std,21.511933,21.321567,21.571427,20.963034,21.365476,30257710.0
min,164.585999,165.0,166.399994,164.080002,165.350006,24048300.0
25%,182.254063,182.852501,184.617504,181.487499,182.777504,42788550.0
50%,192.370026,193.084999,194.399994,191.724998,192.989998,51748650.0
75%,221.697502,221.697502,224.149994,219.717499,221.635002,64974600.0
max,236.479996,236.479996,237.490005,234.449997,236.479996,318679900.0


# **Data Preprocessing**

In [None]:
def preprocess_data(df, feature_columns=['Adj Close'], target_column='Adj Close', split_ratio=0.8):
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date')

    data = df[feature_columns].values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    target_scaler = MinMaxScaler(feature_range=(0, 1))
    target_scaler.fit_transform(df[[target_column]].values)

    return scaled_data, scaler, target_scaler, df['Date'].values


# **Create Sequence: Method**

In [None]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# **Train Test Split**

In [None]:
def train_test_split(X, y, dates, split_ratio=0.8):
    train_size = int(len(X) * split_ratio)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    train_dates, test_dates = dates[seq_length:train_size+seq_length], dates[train_size+seq_length:]
    return X_train, X_test, y_train, y_test, train_dates, test_dates


# **LSTM Model**

In [None]:
def lstm_1(X_train, y_train):

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)

    model = Sequential([
        Bidirectional(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 34))),
        LSTM(units=50),
        Dense(1),
    ])

    model.compile(
        optimizer=optimizer,
        loss='mean_squared_error',
    )

    history = model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        verbose=1
    )

    return model, history

In [None]:
def lstm_2(input_shape, lstm_units=50):

    model = Sequential([
        Bidirectional(LSTM(lstm_units, return_sequences=True), input_shape=input_shape),
        Dropout(0.3),
        Bidirectional(LSTM(lstm_units*2, return_sequences=True)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)
    ])


    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)

    model.compile(
        optimizer=optimizer,
        loss='mean_squared_error',
    )

    return model

In [None]:
def lstm_attention_model(input_shape, lstm_units=50):

    inputs = layers.Input(shape=input_shape)[1:]

    lstm_out = LSTM(lstm_units, )


# **Regression Model**

In [None]:
data['Lag_1'] = data['Close'].shift(1)
data['Lag_2'] = data['Close'].shift(2)
data['Lag_3'] = data['Close'].shift(3)
data = data.dropna()

In [None]:
X_reg = data[['Lag_1', 'Lag_2', 'Lag_3']]
y_reg = data['Close']
X_train_reg, X_test_reg = X_reg[:train_size], X_reg[train_size:]
y_train_reg, y_test_reg = y_reg[:train_size], y_reg[train_size:]

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train_reg, y_train_reg)

# **Prediction**

In [None]:
print("X_test shape:", X_test.shape)


X_test shape: (21, 30)


In [None]:
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
lstm_predictions = lstm_model.predict(X_test_lstm)
lstm_predictions = scaler.inverse_transform(lstm_predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step


In [None]:
lin_predictions = linear_model.predict(X_test_reg)
lin_predictions = scaler.inverse_transform(lin_predictions.reshape(-1, 1))

In [None]:
min_length = min(len(lstm_predictions), len(lin_predictions))
lstm_predictions = lstm_predictions[:min_length]
lin_predictions = lin_predictions[:min_length]

In [None]:
hybrid_predictions = (0.7 * lstm_predictions) + (0.3 * lin_predictions)