### Hyperparameter GridSearchCV LSTM-RNN Percobaan 1

### 1. Declaration Library

In [1]:
# import warnings
# warnings.filterwarnings("ignore")

# library manipulation dataset
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel

# library manipulation array
import numpy as np
from numpy import concatenate
from numpy import array

# library configuration date and time
import time
from datetime import datetime

# library data visualization
import seaborn as sns
from matplotlib import pyplot
from matplotlib import pyplot as plt

# library analysis acf and pacf
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# library normalize data with max-min algorithm
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# library algorithm lstm-rnn with keras
import tensorflow as tf
from tensorflow.keras import models
from keras.models import Sequential
from keras.layers import RNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD

# Early stoping
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# library evaluation model
from math import sqrt
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

### 2. Akuisisi Data

In [None]:
# Set waktu komputasi
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234)

In [None]:
# membaca dataset
dataset = read_excel("dataset/dataset.xlsx")

In [None]:
# set index tanggal
# dataset = dataset.set_index("tanggal")

In [None]:
dataset.info()

In [None]:
print(dataset.head())

### 3. Exploration Data Analysis

- Data Visualization

In [None]:
# make frame
fig, ax = plt.subplots(figsize = (20,6))

# make time series plot
ax.plot(dataset["tanggal"], dataset["hotspot_sumsel"], color="tab:blue", label="hotspot sumsel 2001-2020", linewidth=2.5)

# make are labels
ax.set_title("Hotspot Sumsel 2001-2020", fontsize=14)
ax.set_xlabel("Date", fontsize=12)
ax.set_ylabel("Sum of hostpot", fontsize=12)
ax.legend(loc='best')
ax.grid(True)

# show plot time series
plt.show()

- Analysis ACF and PACF

In [None]:
# make frame
fig, ax= plt.subplots(nrows=1, ncols=2, facecolor="#F0F0F0", figsize = (20,5))

# plot acf
plot_acf(dataset["hotspot_sumsel"], lags=24, ax=ax[0])
ax[0].grid(True)

# plot pacf
plot_pacf(dataset["hotspot_sumsel"],lags=24, ax=ax[1], method="yw")
ax[1].grid(True)

# show plot acf and pacf
plt.show()

### 4. Praproses Data

- feature selection (studi kasus sumatera selatan)

In [None]:
# memilih area studi
df_sumsel = dataset[["hotspot_sumsel"]]

In [None]:
# ensure all data is float
df_sumsel = df_sumsel.values
df_sumsel = df_sumsel.astype('float64')

In [None]:
# show a dataset
np.round(df_sumsel[:5],7)

In [None]:
# view a dimension dataset
df_sumsel.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (20,6))

# membuat time series plot
ax.plot(dataset["tanggal"], df_sumsel, color="tab:blue", label="data aktual", linewidth=2.5)

# membuat label-label
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
ax.set_xlabel("Tanggal", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

- normalization data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
df_sumsel = scaler.fit_transform(df_sumsel)

In [None]:
# show a dataset after normalize
np.round(df_sumsel[:5],7)

In [None]:
# view a dimension dataset after normalize
df_sumsel.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (20,6))

# membuat time series plot
ax.plot(dataset["tanggal"], df_sumsel, color="tab:blue", label="data aktual", linewidth=2.5)

# membuat label-label
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
ax.set_xlabel("Tanggal", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

- set data train and data test

In [None]:
# set data train
train_size = int(len(df_sumsel) * 0.8)

In [None]:
# set loc data train
train= df_sumsel[0:train_size,:]

In [None]:
# show data train
np.round(train[:5],7)

In [None]:
# view dimension of data train
train.shape

In [None]:
# set data test
test_size = len(df_sumsel) - train_size

In [None]:
# set loc data test
test = df_sumsel[train_size:len(df_sumsel),:]

In [None]:
# view dimension of data test
test.shape

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (20,7))

# make a time series plot
ax.plot(dataset.iloc[0:train_size,0], train, color="tab:blue", label="Data Latih", linewidth=2.5)
ax.plot(dataset.iloc[train_size:len(dataset),0], test, color="tab:red", label="Data Uji", linewidth=2.5)

# make are labels
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
ax.set_xlabel("Tahun", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc="best")
ax.grid(True)

# show plot time series
plt.show()

### 5. Supervised Learning

In [None]:
# function for supervised learning
def create_dataset(dataset, look_back=1):
    
    # declare variable X and Y
    dataX = []
    dataY = []
    
    # for loop for create supervised learning
    for i in range(len(dataset)-look_back):
        
        # insert value X and Y 
        dataX.append(dataset[i:(i+look_back), 0])
        dataY.append(dataset[i + look_back, 0])
    
    # return value X and Y
    return np.array(dataX), np.array(dataY)

- process supervised learning, with look back t-1 into X=t and Y=t+1

In [None]:
# set look back -1
look_back = 1

In [None]:
# set supervised learning for data train
trainX, trainY = create_dataset(train, look_back)

In [None]:
# view a dimension dataset after supervised learning
print(trainX.shape, trainY.shape)

In [None]:
# set supervised learning for data test
testX, testY = create_dataset(test, look_back)

In [None]:
# view a dimension dataset after supervised learning
print(testX.shape, testY.shape)

- Check data train, for result supervised learning

In [None]:
temp_trainX = pd.DataFrame(trainX, columns=['x train'])
temp_trainY = pd.DataFrame(trainY, columns=['y train'])

In [None]:
hasil = pd.concat([temp_trainX, temp_trainY], axis=1)
hasil.info()

In [None]:
hasil.head()

- Check data test, for result supervised learning

In [None]:
temp_testX = pd.DataFrame(testX, columns=['x test'])
temp_testY = pd.DataFrame(testY, columns=['y test'])

In [None]:
hasil = pd.concat([temp_testX, temp_testY], axis=1)
hasil.info()

In [None]:
hasil.head()

- reshape input for samples, time steps, features

In [None]:
# reshape data train
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))

In [None]:
print(trainX.shape, trainY.shape)

In [None]:
# reshape data test
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
print(testX.shape, testY.shape)

### 6. Hyperparameter GridSearchCV LSTM-RNN

- library for hyperparameter

In [None]:
from scikeras.wrappers import KerasRegressor
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

- set models lstm-rnn with sliding windows and timestep

In [None]:
# function models lstm-rnn
def get_model(activation='tanh', optimizer='adam', dropout_rate=0.0):
    
    # reset of session model
    tf.keras.backend.clear_session()
    
    # design network
    model = Sequential()
    
    # first LSTM-RNN layer with dropout regularisation
    model.add(LSTM(units=10, activation=activation, input_shape=(trainX.shape[1], 1)))
    model.add(Dropout(dropout_rate))
    
    # the output layer
    model.add(Dense(1))

    # compiling model the LSTM-RNN
    model.compile(
        optimizer=optimizer,
        loss="mae",
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(),
            tf.keras.metrics.MeanSquaredError(),
            tf.keras.metrics.RootMeanSquaredError()
        ]
    )
    
    # return value
    return model

Set parameter
- activation function = tanh, sigmoid, relu, selu, softplus, elu.
- optimizers = adam, adamax, rmsprop, sgd.
- droupout = 0.00, 0.05, 0.1, 0.15, 0.2, 0.25
- batch size = 2, 4, 8, 16, 32, 64
- epoch = 500, 1000, 1500, 2000

In [None]:
# set parameter activation
activation = ['tanh', 'sigmoid', 'relu', 'selu', 'elu', 'softplus']

In [None]:
# set parameter optimizer
optimizer = ['adam', 'adamax', 'rmsprop', 'sgd']

In [None]:
# set parameter dropout_rate
dropout_rate = [0.00, 0.05, 0.1, 0.15, 0.2, 0.25]

In [None]:
# set parameters batch sizes
batch_size = [2, 4, 8, 16, 32]

In [None]:
# set parameter epochs
epochs = [2000]

In [None]:
# set all parameter to dictionary
param_grid = dict(
    model__activation=activation,
    model__optimizer=optimizer,
    model__dropout_rate=dropout_rate,
    batch_size=batch_size,
    epochs=epochs
)

In [None]:
# show parameter a gridsearch
param_grid

In [None]:
# set grid model with KerasRegressor
grid_model = KerasRegressor(model=get_model, verbose=1)

In [None]:
# process a gridsearchCV
grid_search = GridSearchCV(estimator=grid_model, param_grid=param_grid, n_jobs=-1, cv=2, scoring='neg_root_mean_squared_error')

In [None]:
# show a result gridsearchCV
grid_result = grid_search.fit(trainX, trainY)

In [None]:
# summarize results
print("Best parameters: %f using %s\n" % (grid_search.best_score_, grid_search.best_params_))

In [None]:
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
params = grid_search.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# result = pd.concat([pd.DataFrame(grid_search.cv_results_["params"]),pd.DataFrame(grid_search.cv_results_["mean_test_score"], columns=["score"])],axis=1)

In [None]:
# result.sort_values(by="score", ascending=False)

In [None]:
# result.sort_values(by="score", ascending=False).to_csv('gridsearch_percobaan1.csv',index=False)

### Evaluasi Model LSTM-RNN

In [None]:
# Set akhir waktu komputasi 
end = time.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))