In [15]:
import os
import math
import random
import statistics
import numpy as np
import pandas as pd
from google.colab import drive

# Lib TF
import tensorflow as tf
from tensorflow import keras
from keras import Input
from keras.layers import GRU, Dense, Dropout, Input
from keras.models import Sequential
from keras.optimizers import Adam, Adamax
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Lib for plot
import plotly.express as px
import matplotlib.pyplot as plt

# Lib for learn
import sklearn
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# Random seed : to set random value
random.seed(123)
np.random.seed(123)
tf.random.set_seed(123)

color_pal = ["#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB"]

# Pre - processing

In [5]:
raw_data = pd.read_csv('/content/data_skripsi_fix.csv', delimiter=',')
raw_data['Tanggal'] = pd.to_datetime(raw_data['Tanggal'])
date_range = pd.date_range(start=raw_data['Tanggal'].min(), end=raw_data['Tanggal'].max())
complete_data = pd.DataFrame({'Tanggal': date_range})
merged_data = pd.merge(complete_data, raw_data, on='Tanggal', how='left')

raw_data = merged_data

In [6]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2311 entries, 0 to 2310
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Tanggal    2311 non-null   datetime64[ns]
 1   Penutupan  1534 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 36.2+ KB


In [7]:
# Total missing value
missing_percentage = raw_data.isna().sum()

print("Percentage of missing values in each column:")
print(missing_percentage)

Percentage of missing values in each column:
Tanggal        0
Penutupan    777
dtype: int64


In [8]:
#Percentage of missing value
missing_percentage = raw_data.isna().mean() * 100

print("Percentage of missing values in each column:")
print(missing_percentage)

Percentage of missing values in each column:
Tanggal       0.000000
Penutupan    33.621809
dtype: float64


In [9]:
raw_data.dropna(inplace=True)

In [10]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1534 entries, 0 to 2310
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Tanggal    1534 non-null   datetime64[ns]
 1   Penutupan  1534 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 36.0+ KB


In [11]:
raw_data['Penutupan'] = raw_data['Penutupan'].str.replace('.', '')
raw_data['Penutupan'] = raw_data['Penutupan'].str.replace(',', '.')
raw_data['Penutupan'] = raw_data['Penutupan'].astype(float)

# EDA

In [12]:
raw_data['Penutupan'].describe().round(3).T

Unnamed: 0,Penutupan
count,1534.0
mean,6328.078
std,652.602
min,3937.63
25%,6012.105
50%,6369.165
75%,6839.448
max,7433.31


In [13]:
index_min = raw_data['Penutupan'].idxmin()  # Mengambil index dari nilai terkecil
min_ihsg = raw_data.loc[index_min]
min_ihsg

Unnamed: 0,812
Tanggal,2020-03-24 00:00:00
Penutupan,3937.63


In [14]:
index_max = raw_data['Penutupan'].idxmax()  # Mengambil index dari nilai terkecil
max_ihsg = raw_data.loc[index_max]
max_ihsg

Unnamed: 0,2263
Tanggal,2024-03-14 00:00:00
Penutupan,7433.31


In [16]:
fig = px.line(raw_data, x='Tanggal', y='Penutupan', hover_data=['Tanggal'])
fig.update_traces(line_color="#F8766D")
fig.update_layout(title='<b>Indeks Harga Saham Gabungan</b>', title_x=0.5, xaxis_title='Tanggal', yaxis_title='Penutupan')
fig.show()

In [17]:
raw_df_year = raw_data

# Filter data untuk tahun tertentu
year_to_plot = 2021

# Assuming 'Tanggal' is a string, convert it to datetime
raw_df_year['Tanggal'] = pd.to_datetime(raw_df_year['Tanggal'])

# Extract the year from the 'Tanggal' column
raw_df_year['year'] = raw_df_year['Tanggal'].dt.year

# Now you can filter
df_year = raw_df_year[raw_df_year['year'] == year_to_plot]

In [18]:
fig = px.line(df_year, x='Tanggal', y='Penutupan', hover_data=['Tanggal'])
fig.update_traces(line=dict(color=color_pal[0])) # Change color of the line trace
fig.update_layout(title='<b>Indeks Harga Saham Gabungan</b>', title_x=0.5, xaxis_title='Tanggal', yaxis_title='Penutupan')
fig.show()

# Training Model

In [20]:
# Dataset for training

dataset = raw_data
scaler = MinMaxScaler()
data_test = scaler.fit_transform(dataset['Penutupan'].values.reshape(-1, 1))
dataset['Normalisasi'] = scaler.fit_transform(dataset['Penutupan'].values.reshape(-1, 1))

In [None]:
dataset_fix = dataset['Penutupan'].values

In [21]:
# Fungsi windowing

def create_windows(data, window_size):
  X, y = [], []
  for i in range(len(data) - window_size):
      a = data[i:i + window_size]
      X.append(a)  # Input window
      y.append(data[i + window_size])  # Target
  return np.array(X), np.array(y)

# Fungsi asrsitektur GRU
def create_model(units, lr, batch_size, optimizer):
    model = Sequential([
        Input(shape=[X_train.shape[1], X_train.shape[2]]),
        GRU(units, return_sequences=True),
        Dropout(0.4, seed=123),
        GRU(5),
        Dense(1)
    ])
    if optimizer == 'Adam':
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=lr),
                      metrics=['mean_absolute_percentage_error'])
    else:
        model.compile(loss='mean_squared_error', optimizer=Adamax(learning_rate=lr),
                      metrics=['mean_absolute_percentage_error'])
    return model

In [22]:
# Hyperparameter
param_grid = {
    'test_size': [0.1, 0.2, 0.3],
    'batch_size': [16, 32, 64],
    'lr': [0.01, 0.001],
    'units': [8, 16, 32],
    'optimizer': ['Adam', 'Adamax']
}

In [None]:
# Kombinasi hyperparameter
window_size = 30
param_combinations = list(ParameterGrid(param_grid))
result = []
losses = []

# Training model dengan seluruh kombinasi hyperparameter
for params in param_combinations:
    print(f"Training with params: {params}")
    test_size = params['test_size']
    train, test = train_test_split(dataset_fix, test_size=test_size, shuffle=False)

    train_scaled = scaler.fit_transform(train.reshape(-1, 1)).reshape(train.shape)
    test_scaled = scaler.fit_transform(test.reshape(-1, 1)).reshape(test.shape)

    X_train, y_train = create_windows(train_scaled, window_size)
    X_test, y_test = create_windows(test_scaled, window_size)

    X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1], 1))

    model = create_model(
        params['units'], params['lr'],
        params['batch_size'], params['optimizer'])

    early_stopping = EarlyStopping(patience=8, monitor='val_loss', verbose=0)

    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        epochs=100,
                        batch_size=params['batch_size'],
                        shuffle=False,
                        callbacks=[early_stopping],
                        verbose=0)

    epoch_stopped = early_stopping.stopped_epoch

    model_path = os.path.join('/content/gdrive/MyDrive/Bismillah Skripsi/Model', f'best_model_{params["test_size"]}_{params["batch_size"]}_{params["lr"]}_{params["units"]}_{params["optimizer"]}.keras')
    model.save(model_path)

    mse, mape = model.evaluate(X_test, y_test, verbose=0)

    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    losses.append({'train_loss': train_loss, 'val_loss': val_loss})

    result.append({**params, "Epoch Stopped": epoch_stopped,
                   "Training Loss": train_loss[-1],
                   "Validation Loss": val_loss[-1], 'val_mape': mape})
    del model

In [None]:
# Menyimpan metrics hasil training dalam csv
result_df = pd.DataFrame(result)
result_df.to_csv('RESULT_GRU.csv', index=False)

# Model Evaluation

In [28]:
result = pd.read_csv('/content/RESULT_GRU.csv', delimiter=';')
result_df = pd.DataFrame(result)

In [29]:
result_df

Unnamed: 0,batch_size,lr,optimizer,test_size,units,Epoch Stopped,Training Loss,Validation Loss,val_mape
0,16,0.010,Adam,0.1,8,10,0.001275,0.005643,7.613397
1,16,0.010,Adam,0.1,16,3,0.004993,0.005017,7.213566
2,16,0.010,Adam,0.1,32,3,0.002325,0.001186,3.325508
3,16,0.010,Adam,0.2,8,6,0.002243,0.000699,2.710842
4,16,0.010,Adam,0.2,16,10,0.003743,0.000608,2.462563
...,...,...,...,...,...,...,...,...,...
103,64,0.001,Adamax,0.2,16,0,0.000499,0.000180,1.234196
104,64,0.001,Adamax,0.2,32,5,0.008773,0.010669,11.177448
105,64,0.001,Adamax,0.3,8,0,0.001630,0.002506,5.239655
106,64,0.001,Adamax,0.3,16,87,0.000612,0.000276,1.470263


In [30]:
adam_results = result_df[(result_df['optimizer'] == 'Adam') & (result_df['Epoch Stopped'] != 0)]
adamax_results = result_df[(result_df['optimizer'] == 'Adamax') & (result_df['Epoch Stopped'] != 0)]

In [31]:
best_adam_params = adam_results.loc[adam_results['Validation Loss'].idxmin()]
best_adamax_params = adamax_results.loc[adamax_results['Validation Loss'].idxmin()]

In [32]:
best_model_params = pd.concat([best_adam_params, best_adamax_params], axis=1)
best_model_params = best_model_params.T
best_model_params

Unnamed: 0,batch_size,lr,optimizer,test_size,units,Epoch Stopped,Training Loss,Validation Loss,val_mape
25,16,0.001,Adam,0.3,16,92,0.000472,0.000177,1.201517
50,32,0.01,Adamax,0.2,32,21,0.000583,0.000189,1.264686


In [33]:
adam = best_model_params[(best_model_params['optimizer'] == 'Adam')]
adamax = best_model_params[(best_model_params['optimizer'] == 'Adamax')]

In [34]:
adam_test_size = adam['test_size'].values
adamax_test_size = adamax['test_size'].values

adam_test_size = adam_test_size[0]
adamax_test_size = adamax_test_size[0]

In [None]:
# Format penamaan model 'best_model_(test_size)_(batxh_size)_(lr)_(unit)_(optimizer).keras'
# Load model Adam
model_adam = load_model('/content/gdrive/MyDrive/Bismillah Skripsi/Model/best_model_0.3_16_0.001_16_Adam.keras')

# Load model Adamax
model_adamax = load_model('/content/gdrive/MyDrive/Bismillah Skripsi/Model/best_model_0.2_32_0.01_32_Adamax.keras')

In [None]:
window_size = 5

train_adam, test_adam = train_test_split(dataset_fix, test_size=adam_test_size, shuffle=False)

train_scaled_adam = scaler.fit_transform(train_adam.reshape(-1, 1)).reshape(train_adam.shape)
test_scaled_adam = scaler.fit_transform(test_adam.reshape(-1, 1)).reshape(test_adam.shape)

X_train_adam, y_train_adam = create_windows(train_scaled_adam, window_size)
X_test_adam, y_test_adam = create_windows(test_scaled_adam, window_size)

X_train_adam = np.reshape(X_train_adam, (X_train_adam.shape[0],X_train_adam.shape[1], 1))
X_test_adam = np.reshape(X_test_adam, (X_test_adam.shape[0],X_test_adam.shape[1], 1))

# Cetak panjang masing-masing data
print("Panjang data X_train:", len(X_train_adam))
print("Panjang data X_test:", len(X_test_adam))
print("Panjang data y_train:", len(y_train_adam))
print("Panjang data y_test:", len(y_test_adam))

In [None]:
train_adamax, test_adamax = train_test_split(dataset_fix, test_size=adamax_test_size, shuffle=False)

train_scaled_adamax = scaler.fit_transform(train_adamax.reshape(-1, 1)).reshape(train_adamax.shape)
test_scaled_adamax = scaler.fit_transform(test_adamax.reshape(-1, 1)).reshape(test_adamax.shape)

X_train_adamax, y_train_adamax = create_windows(train_scaled_adamax, window_size)
X_test_adamax, y_test_adamax = create_windows(test_scaled_adamax, window_size)

X_train_adamax = np.reshape(X_train_adamax, (X_train_adamax.shape[0],X_train_adamax.shape[1], 1))
X_test_adamax = np.reshape(X_test_adamax, (X_test_adamax.shape[0],X_test_adamax.shape[1], 1))

# Cetak panjang masing-masing data
print("Panjang data X_train:", len(X_train_adamax))
print("Panjang data X_test:", len(X_test_adamax))
print("Panjang data y_train:", len(y_train_adamax))
print("Panjang data y_test:", len(y_test_adamax))

In [None]:
y_pred_adam = model_adam.predict(X_test_adam)
y_pred_adamax = model_adamax.predict(X_test_adamax)

In [None]:
# Cek mape denorm
# Kemablikan semua nilai yang dinormalisasi
y_adam_inverse = scaler.inverse_transform(y_test_adam.reshape(-1, 1))
y_adamax_inverse = scaler.inverse_transform(y_test_adamax.reshape(-1, 1))
y_pred_adam_inverse = scaler.inverse_transform(y_pred_adam)
y_pred_adamax_inverse = scaler.inverse_transform(y_pred_adamax)

mape_adam_denorm = mean_absolute_percentage_error(y_adam_inverse,y_pred_adam_inverse)
mape_adamax_denorm = mean_absolute_percentage_error(y_adamax_inverse,y_pred_adamax_inverse)

In [None]:
df_pred_adam = pd.DataFrame({'Tanggal': dataset.index[2*window_size + len(X_train_adam):]})
df_pred_adam = df_pred_adam.set_index('Tanggal')
df_pred_adam['Tanggal'] = dataset['Tanggal'].iloc[df_pred_adam.index]

# Adjust the index of df_pred_adam to match the length of y_test_adam
df_pred_adam = df_pred_adam.iloc[:len(y_test_adam)]

df_pred_adam['Y Norm'] = y_test_adam.flatten()
df_pred_adam['Y Denorm'] = y_adam_inverse.flatten()
df_pred_adam['Adam Norm'] = y_pred_adam.flatten()
df_pred_adam['Adam Denorm'] = y_pred_adam_inverse.flatten()
df_pred_adam.to_csv('df_pred_adam.csv', index=False)

In [None]:
fig = px.line(df_pred_adam, x=df_pred_adam['Tanggal'], y=['Y Denorm', 'Adam Denorm'])
fig.update_layout(title='<b>Perbandingan Nilai Aktual dan Prediksi Adam</b>', title_x=0.5,
                  xaxis_title='Tanggal', yaxis_title='Nilai Aktual')
fig.update_traces(
    line=dict(color=color_pal[0]),
    name='Nilai Aktual',
    selector=dict(name='Normalisasi')
)
fig.update_traces(
    line=dict(color=color_pal[1]),
    name='Prediksi Adam',
    selector=dict(name='Adam Denorm')
)
fig.show()

In [None]:
df_pred_adamax = pd.DataFrame({'Tanggal': dataset.index[2*window_size + len(X_train_adamax):]})
df_pred_adamax = df_pred_adamax.set_index('Tanggal')
df_pred_adamax['Tanggal'] = dataset['Tanggal'].iloc[df_pred_adamax.index]

# Adjust the index of df_pred_adamax to match the length of y_test_adamax
df_pred_adamax = df_pred_adamax.iloc[:len(y_test_adamax)]

df_pred_adamax['Y Norm'] = y_test_adamax.flatten()
df_pred_adamax['Y Denorm'] = y_adamax_inverse.flatten()
df_pred_adamax['Adamax Norm'] = y_pred_adamax.flatten()
df_pred_adamax['Adamax Denorm'] = y_pred_adamax_inverse.flatten()
df_pred_adamax.to_csv('df_pred_adamax.csv', index=False)

In [None]:
fig = px.line(df_pred_adamax, x=df_pred_adamax['Tanggal'], y=['Y Denorm', 'Adamax Denorm'])
fig.update_layout(title='<b>Perbandingan Nilai Aktual dan Prediksi Adamax</b>', title_x=0.5,
                  xaxis_title='Tanggal', yaxis_title='Nilai Aktual')
fig.update_traces(
    line=dict(color=color_pal[0]),
    name='Nilai Aktual',
    selector=dict(name='Normalisasi')
)
fig.update_traces(
    line=dict(color=color_pal[1]),
    name='Prediksi Adamax',
    selector=dict(name='Adamax Denorm')
)
fig.show()

In [None]:
print('MAPE Adam Denorm:', mape_adam_denorm)
print('MAPE Adamax Denorm:', mape_adamax_denorm)

# Predict

In [None]:
num_predictions = 30
last_timestep_data = X_test_adam[-1]
predictions = []

In [None]:
for _ in range(num_predictions):
  input_data = np.reshape(last_timestep_data, (1, last_timestep_data.shape[0], 1))

  predicted_value = model_adamax.predict(input_data)

  predictions.append(predicted_value[0][0])

  #last_timestep_data = np.append(last_timestep_data[1:], predicted_value)
  last_timestep_data = np.append(last_timestep_data[1:], predicted_value)

predictions = np.array(predictions)
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))
forecast_df = pd.DataFrame({'Prediksi': predictions.flatten()})

In [None]:
predictions

In [None]:
forecast_df = pd.DataFrame({'Prediksi': predictions.flatten()},index=range(1, 31))

In [None]:
forecast_df

In [None]:
plt.plot(forecast_df['Prediksi'])

plt.xlabel("Index")
plt.ylabel("Prediksi")
plt.title("Line Plot of Prediksi")

plt.show()

In [None]:
forecast_df.to_csv('forecast_df.csv', index=False)

In [None]:
fig = px.line(forecast_df, x=forecast_df.index, y='Prediksi')
fig.update_layout(title='<b>Prediksi IHSG Selama 30 Periode</b>', title_x=0.5, xaxis_title='Periode', yaxis_title='Nilai Prediksi')
fig.show()