# **Import Module**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import tensorflow as tf
import math
import random

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional, GRU, Attention, Reshape
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers

warnings.filterwarnings('ignore')

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# **Download Data**

In [None]:
!wget --no-check-certificate https://raw.githubusercontent.com/rioooranteai/data-analytics---modeling/main/Apple%20Stock/Dataset/apple_stock_data.csv -O /content/apple_stock_data.csv

--2025-03-19 17:50:38--  https://raw.githubusercontent.com/rioooranteai/data-analytics---modeling/main/Apple%20Stock/Dataset/apple_stock_data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 31516 (31K) [text/plain]
Saving to: ‘/content/apple_stock_data.csv’


2025-03-19 17:50:41 (2.87 MB/s) - ‘/content/apple_stock_data.csv’ saved [31516/31516]



# **Read Data**

In [None]:
df = pd.read_csv('/content/apple_stock_data.csv')
df.head()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.31752,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300


In [None]:
df.tail()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
247,2024-10-28 00:00:00+00:00,233.399994,233.399994,234.729996,232.550003,233.320007,36087100
248,2024-10-29 00:00:00+00:00,233.669998,233.669998,234.330002,232.320007,233.100006,35417200
249,2024-10-30 00:00:00+00:00,230.100006,230.100006,233.470001,229.550003,232.610001,47070900
250,2024-10-31 00:00:00+00:00,225.910004,225.910004,229.830002,225.369995,229.339996,64370100
251,2024-11-01 00:00:00+00:00,222.910004,222.910004,225.350006,220.270004,220.970001,65242200


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       252 non-null    object 
 1   Adj Close  252 non-null    float64
 2   Close      252 non-null    float64
 3   High       252 non-null    float64
 4   Low        252 non-null    float64
 5   Open       252 non-null    float64
 6   Volume     252 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 13.9+ KB


In [None]:
df.describe()

Unnamed: 0,Adj Close,Close,High,Low,Open,Volume
count,252.0,252.0,252.0,252.0,252.0,252.0
mean,199.088202,199.454286,201.093056,197.608373,199.316032,58322140.0
std,21.511933,21.321567,21.571427,20.963034,21.365476,30257710.0
min,164.585999,165.0,166.399994,164.080002,165.350006,24048300.0
25%,182.254063,182.852501,184.617504,181.487499,182.777504,42788550.0
50%,192.370026,193.084999,194.399994,191.724998,192.989998,51748650.0
75%,221.697502,221.697502,224.149994,219.717499,221.635002,64974600.0
max,236.479996,236.479996,237.490005,234.449997,236.479996,318679900.0


# **Data Preprocessing**

In [None]:
def preprocess_data(df, feature_columns=['Close'], target_column='Close'):
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date')

    data = df[feature_columns].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    target_scaler = MinMaxScaler(feature_range=(0, 1))
    target_scaler.fit_transform(df[[target_column]].values)

    return scaled_data, scaler, target_scaler, df['Date'].values


In [None]:
scaled_data, scaler, target_scaler, dates = preprocess_data(df)

# **Create Sequence: Method Recursive Forecasting**

In [None]:
def create_sequence(data, n_lookback=60, n_forecast=30):
    X = []
    Y = []

    for i in range(n_lookback, len(data) - n_forecast + 1):
        X.append(data[i - n_lookback:i])
        Y.append(data[i:i + n_forecast])

    return np.array(X), np.array(Y)

In [None]:
n_lookback_multi = 60
n_forecast = 30

X_seq, Y_seq = create_sequence(scaled_data, n_lookback=n_lookback_multi, n_forecast=n_forecast)
print("Bentuk sequence multistep:", X_seq.shape, Y_seq.shape)

X_seq_train = X_seq[:-1]
Y_seq_train = Y_seq[:-1]
X_seq_test = X_seq[-1:]
Y_seq_test = Y_seq[-1:]
print("Data training multistep:", X_seq_train.shape, Y_seq_train.shape)
print("Data test multistep:", X_seq_test.shape, Y_seq_test.shape)

Bentuk sequence multistep: (163, 60, 1) (163, 30, 1)
Data training multistep: (162, 60, 1) (162, 30, 1)
Data test multistep: (1, 60, 1) (1, 30, 1)


# **Create Sequence: Lag Features**

In [None]:
def lag_features(df, n_lookback):
    X = []
    y = []

    for i in range(len(df) - n_lookback):
        X.append(df[i:i+n_lookback].values.flatten())
        y.append(df.iloc[i+n_lookback, 0])

    df_x = pd.DataFrame(X, columns=[f'Yt-{i}' for i in range(n_lookback, 0, -1)])
    df_y = pd.DataFrame(y, columns=['Y'])

    df_x = df_x.reset_index(drop=True)
    df_y = df_y.reset_index(drop=True)

    df = pd.concat([df_x, df_y], axis=1)

    return df


In [None]:
n_lookback_lag = 60

df_close = pd.DataFrame(scaled_data, columns=['Close'])
df_lag = lag_features(df_close, n_lookback=n_lookback_lag)
print("Bentuk data lag features:", df_lag.shape)

df_lag_train = df_lag[:-30]
df_lag_test = df_lag[-30:]
print("Data training lag features:", df_lag_train.shape)
print("Data test lag features:", df_lag_test.shape)

X_lag_train = df_lag_train.drop('Y', axis=1).values
y_lag_train = df_lag_train['Y'].values
X_lag_test = df_lag_test.drop('Y', axis=1).values
y_lag_test = df_lag_test['Y'].values

X_lag_train = X_lag_train.reshape((-1, X_lag_train.shape[1], 1))
X_lag_test = X_lag_test.reshape((-1, X_lag_test.shape[1], 1))
print("Bentuk input lag features:", X_lag_train.shape, X_lag_test.shape)

Bentuk data lag features: (192, 61)
Data training lag features: (162, 61)
Data test lag features: (30, 61)
Bentuk input lag features: (162, 60, 1) (30, 60, 1)


# **LSTM Model**

In [None]:
def lstm_multistep(X_train, y_train, n_forecast, n_lookback):
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)
    model = Sequential([
        Bidirectional(LSTM(units=50, return_sequences=True, input_shape=(n_lookback, 1))),
        Dropout(0.2),
        LSTM(units=25, return_sequences=True),
        Dropout(0.2),
        Dense(n_forecast),
        Reshape((n_forecast, 1))
    ])
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=10,
        min_lr=1e-6,
        mode='min',
        verbose=1
    )

    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.1,
        verbose=1,
        callbacks=[early_stop, reduce_lr]
    )
    return model, history

In [None]:
def lstm_lag_features(X_train, y_train):
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)
    model = Sequential([
        Bidirectional(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1))),
        Dropout(0.2),
        LSTM(units=25, return_sequences=False),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=10,
        min_lr=1e-6,
        mode='min',
        verbose=1
    )

    history = model.fit(
        X_train, y_train,
        epochs=70,
        batch_size=32,
        validation_split=0.1,
        verbose=1,
        callbacks=[early_stop, reduce_lr]
    )
    return model, history

# **Train Model**

In [None]:
model_multi, history_multi = lstm_multistep(X_seq_train, Y_seq_train, n_forecast=n_forecast, n_lookback=n_lookback_multi)

Epoch 1/50


ValueError: The total size of the tensor must be unchanged. Received: input_shape=(60, 30), target_shape=(30, 1)

In [None]:
model_lag, history_lag = lstm_lag_features(X_lag_train, y_lag_train)

# **Evaluasi Model 1**

In [None]:
pred_multi = model_multi.predict(X_seq_test)
pred_multi = pred_multi.reshape(n_forecast, 1)
actual_multi = Y_seq_test.reshape(n_forecast, 1)

pred_multi_inv = target_scaler.inverse_transform(pred_multi)
actual_multi_inv = target_scaler.inverse_transform(actual_multi)

mse_multi = mean_squared_error(actual_multi_inv, pred_multi_inv)
mae_multi = mean_absolute_error(actual_multi_inv, pred_multi_inv)
r2_multi = r2_score(actual_multi_inv, pred_multi_inv)

print("Evaluasi Model Multistep Recursive:")
print("MSE   :", mse_multi)
print("MAE   :", mae_multi)
print("R2    :", r2_multi)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(range(1, n_forecast+1), actual_multi_inv, label='Aktual')
plt.plot(range(1, n_forecast+1), pred_multi_inv, label='Prediksi')
plt.title("Hasil Prediksi Multistep Recursive (Test 30 Hari)")
plt.xlabel("Hari")
plt.ylabel("Harga Penutupan (Asli)")
plt.legend()
plt.show()

# **Evaluasi Model 2**

In [None]:
pred_lag = model_lag.predict(X_lag_test)
actual_lag = y_lag_test.reshape(-1, 1)

pred_lag_inv = target_scaler.inverse_transform(pred_lag)
actual_lag_inv = target_scaler.inverse_transform(actual_lag)

mse_lag = mean_squared_error(actual_lag_inv, pred_lag_inv)
mae_lag = mean_absolute_error(actual_lag_inv, pred_lag_inv)
r2_lag = r2_score(actual_lag_inv, pred_lag_inv)

print("\nEvaluasi Model Lag Features:")
print("MSE   :", mse_lag)
print("MAE   :", mae_lag)
print("R2    :", r2_lag)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(range(1, 31), actual_lag_inv, label='Aktual')
plt.plot(range(1, 31), pred_lag_inv, label='Prediksi')
plt.title("Hasil Prediksi Lag Features (Test 30 Hari)")
plt.xlabel("Hari")
plt.ylabel("Harga Penutupan (Asli)")
plt.legend()
plt.show()