## SB-GRU-RNN + Hyperparameter GridSearch - Base Model Titik Panas

In [None]:
# pustaka manipulasi data array
import numpy as np

# pustaka manipulasi data frame
import pandas as pd
import random as rm
import time as tm

# pustaka untuk visualisasi data
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter

# library normalize data with max-min algorithm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# library algorithm lstm-rnn with keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import RNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD
from keras.layers import LeakyReLU

# pustaka untuk visualisasi acf dan pacf
import scipy.stats as sc
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# pustaka untuk  evaluasi model prediksi
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
# Setting seed
rm.seed(1234)
np.random.seed(1234)
tf.random.set_seed(1234)

In [None]:
# # Setting Algorithm
# algorithm = "LSTM-RNN"

In [None]:
# Setting Algorithm
algorithm = "GRU-RNN"

### 1. Akuisisi Data

In [None]:
# Set waktu komputasi
start = tm.time()

In [None]:
# Import from Local
dataset = pd.read_csv("../source-code/dataset/dataset_hotspot.csv", parse_dates=["acq_date"])

In [None]:
# Show information
dataset.info()

In [None]:
# Show dataset
print(dataset)

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset["acq_date"], dataset["hotspot"], color="tab:blue", label="Hotspot di Sumatera Selatan 2001 - 2023", linewidth=2.5)

# membuat label-label
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper left")
ax.grid(True)

# menampilkan plot
plt.show()

### 2. PraProses Data

- seleksi fitur

In [None]:
# hanya memperhatikan aspek hotspot
data = dataset.filter(["hotspot"])
data = data.values

In [None]:
# hasil seleksi fitur
np.round(data[:5],7)

In [None]:
# cek dimensi normalisasi
data.shape

- Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(np.array(data))

In [None]:
# cek dimensi normalisasi
scaled.shape

- Hasil PraProses Data

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset["acq_date"], np.array(scaled), color="tab:blue", label="Hotspot di Sumatera Selatan 2001 - 2023", linewidth=2.5)

# membuat label-label
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper left")
ax.grid(True)

# menampilkan plot
plt.show()

### 3. Pembagian Data

In [None]:
# data train = tahun 2001 - 2018 = 216
# data test = tahun 2019 - 2023 = 60

In [None]:
# data train
train_size = 216

# set loc data train
train_data = scaled[0:train_size,:]

# show data train
np.round(train_data[:5],7)

In [None]:
# view dimension of data train
train_data.shape

In [None]:
# data test
test_size = 60

# set loc data test
test_data = scaled[train_size:len(scaled),:]

# show data test
np.round(test_data[:5],7)


In [None]:
# view dimension of data test
test_data.shape

- cek hasil pembagian data

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (10,5))

# make a time series plot
ax.plot(dataset.iloc[0:len(train_data),0], train_data, color="tab:blue", label="Data Train", linewidth=2.5)
ax.plot(dataset.iloc[len(train_data):len(dataset),0], test_data, color="tab:red", label="Data Test", linewidth=2.5)

# make are labels
# ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
# ax.set_xlabel("Years", fontsize=12)
# ax.set_ylabel("Sum of hotspot", fontsize=12)
ax.legend(loc="best")
ax.grid(True)

# show plot time series
plt.show()

### 5. Supervised Learning

In [None]:
# function for supervised learning
def create_dataset(look_back, dataset):

    # declare variable X and Y
    dataX = []
    dataY = []

    # for loop for create supervised learning
    for i in range(look_back, len(dataset)):

        # insert value X and Y
        dataX.append(dataset[i-look_back:i, 0])
        dataY.append(dataset[i, 0])

    # return value X and Y
    return np.array(dataX), np.array(dataY)

In [None]:
# set time series lag
look_back = 1

In [None]:
# Proses supervised learning
x_train, y_train = create_dataset(look_back, train_data)
print(x_train.shape, y_train.shape)

In [None]:
# Proses supervised learning
x_test, y_test = create_dataset(look_back, test_data)
print(x_test.shape, y_test.shape)

In [None]:
# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

- hasil supervised learning

In [None]:
# cek dimensi data
print(x_train.shape, x_test.shape)

### 6. Hyperparameter GridSearchCV GRU-RNN

In [None]:
from itertools import product
from scikeras.wrappers import KerasRegressor
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
# function models
def get_model(activation='selu', optimizer='sgd', dropout_rate=0.15):

    # reset of session model
    tf.keras.backend.clear_session()

    # design network
    model = Sequential()

    # # Jika menggunakan SB-LSTM-RNN
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=True), input_shape=(x_train.shape[1], 1)))
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=True)))
    model.add(Bidirectional(LSTM(10, activation=activation, return_sequences=False)))
    model.add(Dropout(dropout_rate))

    # the output layer
    model.add(Dense(1))

    # compiling model
    model.compile(
        optimizer=optimizer,
        loss="mae",
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(),
            tf.keras.metrics.MeanSquaredError(),
            tf.keras.metrics.MeanAbsolutePercentageError(),
        ]
    )

    # return value
    return model

In [None]:
# Creating KerasRegressor wrapper for GridSearchCV
grid_model = KerasRegressor(model=get_model, verbose=1)

In [None]:
# Set param for GridSearch [1]
param_grid = {
    'model__activation': ['relu', 'selu', 'elu', 'softplus'],
    'model__optimizer': ['adam', 'adamax', 'rmsprop', 'sgd'],
    'model__dropout_rate': [0.05],
    'batch_size': [2],
    'epochs': [1500],
}

In [None]:
param_grid

In [None]:
# Menghitung jumlah kombinasi
total_combinations = len(list(product(*param_grid.values())))
print("Total kombinasi:", total_combinations)

In [None]:
# process a gridsearchCV
grid_search = GridSearchCV(estimator=grid_model, param_grid=param_grid, n_jobs=-1, cv=2, verbose=10, scoring='neg_root_mean_squared_error')

In [None]:
# from joblib import Parallel, delayed
from joblib import parallel_backend

with parallel_backend('threading'):
    grid_result = grid_search.fit(x_train, y_train)

In [None]:
# summarize results
print("Best parameters: %f using %s\n" % (grid_search.best_score_, grid_search.best_params_))

In [None]:
# show the results
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
params = grid_search.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# show the results
result = pd.concat([pd.DataFrame(grid_search.cv_results_["params"]),pd.DataFrame(grid_search.cv_results_["mean_test_score"], columns=["score"])],axis=1)

In [None]:
# show the results
result.sort_values(by="score", ascending=False)

### 7. Evaluasi Model

In [None]:
# Set akhir waktu komputasi
end = tm.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))