## M3 - Hyperparameter GridSearch

In [None]:
# pustaka manipulasi data array
import numpy as np

# pustaka manipulasi data frame
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel

import random as rm
import time as tm

# pustaka untuk visualisasi data
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter

# library normalize data with max-min algorithm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# library algorithm lstm-rnn with keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import RNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD
from keras.layers import LeakyReLU

# pustaka untuk visualisasi acf dan pacf
import scipy.stats as sc
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# pustaka untuk  evaluasi model prediksi
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

### Config Models

In [None]:
# Setting seed
rm.seed(1234)
np.random.seed(1234)
tf.random.set_seed(1234)

In [None]:
# Setting Algorithm
algorithm = "LSTM-RNN"

In [None]:
# # Setting Algorithm
# algorithm = "GRU-RNN"

### 1. Akuisisi Data

In [None]:
# Set waktu komputasi
start = tm.time()

In [None]:
dataset = pd.read_csv("../source-code/dataset/dataset_enso.csv", parse_dates=["acq_date"])

In [None]:
# menampilkan metadata dataset
dataset.info()

In [None]:
print(dataset.head())

### 2. Praproses Data

- Seleksi Fitur

In [None]:
# jika hotspot dipengaruhi oleh enso
data = dataset[["hotspot", "sst_std", "sst_anom", "oni_std", "oni_anom", "soi_std", "soi_anom"]]
data = data.values

# # jika hotspot dipengaruhi oleh enso
# data = dataset[["hotspot", "sst_anom", "oni_anom", "soi_anom"]]
# data = data.values

In [None]:
# hasil seleksi fitur
np.round(data[:5],7)

In [None]:
# cek dimensi normalisasi
data.shape

- Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(np.array(data))

In [None]:
# show data train
np.round(scaled[:5],4)

In [None]:
# cek dimensi normalisasi
scaled.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset["acq_date"], np.array(scaled[:,:1]), color="tab:blue", label="Hotspot di Sumatera Selatan 2001 - 2023", linewidth=2.5)

# membuat label-label
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper left")
ax.grid(True)

# menampilkan plot
plt.show()

### 3. Pembagian Data

In [None]:
# data train
train_size = 216

# set loc data train
train_data = scaled[0:train_size,:]

# show data train
np.round(train_data[:5],4)

In [None]:
train_data.shape

In [None]:
# data test
test_size = 60

# set loc data test
test_data = scaled[train_size:len(scaled),:]

# show data test
np.round(test_data[:5],4)

In [None]:
test_data.shape

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (10,5))

# make a time series plot
ax.plot(dataset.iloc[0:len(train_data),0], train_data[:,:1], color="tab:blue", label="Data Train", linewidth=2.5)
ax.plot(dataset.iloc[len(train_data):len(dataset),0], test_data[:,:1], color="tab:red", label="Data Test", linewidth=2.5)

# make are labels
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="best")
ax.grid(True)

# show plot time series
plt.show()

### 4. Supervised Learning

In [None]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    
    # return value
    return agg

- hasil supervised learning data train

In [None]:
# set supervised learning for data train
reframed_train = series_to_supervised(train_data, 1, 1)

In [None]:
reframed_train.head()

In [None]:
# Jika menggunakan anom sst, oni, dan soi.
# # drop columns we don't want to predict
# reframed_train.drop(reframed_train.columns[[5,6,7]], axis=1, inplace=True)

# Jika menggunakan semua parameter
# drop columns we don't want to predict
reframed_train.drop(reframed_train.columns[[8,9,10,11,12,13]], axis=1, inplace=True)

In [None]:
reframed_train.head(5)

In [None]:
# split into input and outputs
trainX, trainY = reframed_train.iloc[:, :-1], reframed_train.iloc[:, -1]

In [None]:
# view a dimension dataset after supervised learning
print(trainX.shape, trainY.shape)

- hasil supervised learning data test

In [None]:
# set supervised learning for data test
reframed_test = series_to_supervised(test_data, 1, 1)

In [None]:
reframed_test.head(5)

In [None]:
# Jika menggunakan semua parameter
# drop columns we don't want to predict
reframed_test.drop(reframed_test.columns[[8,9,10,11,12,13]], axis=1, inplace=True)

In [None]:
reframed_test.head(5)

In [None]:
# split into input and outputs
testX, testY = reframed_test.iloc[:, :-1], reframed_test.iloc[:, -1]

In [None]:
# view a dimension dataset after supervised learning
print(testX.shape, testY.shape)

- Check data train, for result supervised learning

In [None]:
temp_trainX = pd.DataFrame(trainX)
temp_trainY = pd.DataFrame(trainY)

In [None]:
hasil_train = pd.concat([temp_trainX, temp_trainY], axis=1)
hasil_train.info()

In [None]:
hasil_train.head()

- Check data test, for result supervised learning

In [None]:
temp_testX = pd.DataFrame(testX)
temp_testY = pd.DataFrame(testY)

In [None]:
hasil_test = pd.concat([temp_testX, temp_testY], axis=1)
hasil_test.info()

In [None]:
hasil_test.head()

- reshape input for samples, time steps, features

In [None]:
# reshape data train
trainX = np.reshape(np.array(trainX), (trainX.shape[0], 1, trainX.shape[1]))

In [None]:
print(trainX.shape, trainY.shape)

In [None]:
# reshape data test
testX = np.reshape(np.array(testX), (testX.shape[0], 1, testX.shape[1]))

In [None]:
print(testX.shape, testY.shape)

### 5. Hyperparameter GridSearchCV GRU-RNN

In [None]:
from itertools import product
from scikeras.wrappers import KerasRegressor
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
# function models
def get_model(activation='selu', optimizer='sgd', dropout_rate=0.15):

    # reset of session model
    tf.keras.backend.clear_session()

    # design network
    model = Sequential()

    # # Jika menggunakan SB-LSTM-RNN
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=True), input_shape=(trainX.shape[1], trainX.shape[2])))
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=True)))
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=False)))
    model.add(Dropout(dropout_rate))

    # the output layer
    model.add(Dense(1))

    # compiling model
    model.compile(
        optimizer=optimizer,
        loss="mae",
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(),
            tf.keras.metrics.MeanSquaredError(),
            tf.keras.metrics.MeanAbsolutePercentageError(),
        ]
    )

    # return value
    return model

In [None]:
# Creating KerasRegressor wrapper for GridSearchCV
grid_model = KerasRegressor(model=get_model, verbose=1)

In [None]:
# Set param for GridSearch [1]
param_grid = {
    'model__activation': ['relu', 'selu', 'elu', 'softplus'],
    'model__optimizer': ['adam', 'adamax', 'rmsprop', 'sgd'],
    'model__dropout_rate': [0.05, 0.1, 0.15, 0.2, 0.25],
    'batch_size': [2, 4, 8, 16, 32],
    'epochs': [1500],
}

In [None]:
param_grid

In [None]:
# Menghitung jumlah kombinasi
total_combinations = len(list(product(*param_grid.values())))
print("Total kombinasi:", total_combinations)

In [None]:
# process a gridsearchCV
grid_search = GridSearchCV(estimator=grid_model, param_grid=param_grid, n_jobs=-1, cv=2, verbose=10, scoring='neg_root_mean_squared_error')

In [51]:
# # from joblib import Parallel, delayed
# from joblib import parallel_backend

# with parallel_backend("loky"):
#   grid_result = grid_search.fit(trainX, trainY)

grid_result = grid_search.fit(trainX, trainY)

- cek hasil gridsearchCV

In [None]:
# summarize results
print("Best parameters: %f using %s\n" % (grid_search.best_score_, grid_search.best_params_))

In [None]:
# show the results
result = pd.concat([pd.DataFrame(grid_search.cv_results_["params"]),pd.DataFrame(grid_search.cv_results_["mean_test_score"], columns=["score"])],axis=1)

In [None]:
# show the results
result = result.sort_values(by="score", ascending=False)
result

In [None]:
# write csv form dataframe
# csv_file = '/content/drive/MyDrive/BOPTN_2024/best_params_best_model_gru.csv'
csv_file = "M3_hasil_gridsearch.csv"
result.to_csv(csv_file, index=False)

In [None]:
# # Specify the text file name
# csv_file = '/content/drive/MyDrive/BOPTN_2024/best_params_best_model_gru.csv'
# csv_file = "M3_best_param.csv"

# create dataframe from best_param
df = pd.DataFrame([grid_search.best_params_])

# write csv form dataframe
df.to_csv(csv_file, index=False)

### 6. Evaluasi Model

In [None]:
# Set akhir waktu komputasi
end = tm.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))