### Hyperparameter GridSearchCV LSTM-RNN Percobaan 1

- Hanya memperhatikan Titik Panas

In [None]:
# pustaka untuk manipulasi data
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel
import numpy as np
from numpy import concatenate

# pustaka untuk waktu komputasi
import time
from datetime import datetime

# Pustaka untuk visualisasi data
import seaborn as sns # Visualization
from matplotlib import pyplot
from matplotlib import pyplot as plt

# Pustaka untuk membuat data latih dan data uji.
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# Pustaka untuk membuat model prediksi LSTM-RNN
import itertools
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.optimizers import Adam, Adamax, RMSprop, SGD

# Early stoping
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# Pustaka untuk  evaluasi model prediksi
from math import sqrt
from sklearn.metrics import mean_squared_error

In [None]:
# Set waktu komputasi
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234)

In [None]:
# membaca dataset
dataset = read_excel("dataset/dataset.xlsx")

In [None]:
# set index tanggal
# dataset = dataset.set_index("tanggal")

In [None]:
dataset.info()

In [None]:
print(dataset.head())

### Studi Kasus Sumatera Selatan

In [None]:
# memilih area studi
df_sumsel = dataset[["hotspot_sumsel"]]
df_sumsel.info()

In [None]:
print(df_sumsel.head())

In [None]:
# ensure all data is float
df_sumsel = df_sumsel.values
df_sumsel = df_sumsel.astype('float32')

In [None]:
# generate urutan data sesuai panjang datanya
x = pd.date_range(start="2001-01-01", periods=len(df_sumsel), freq='MS')

# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(x, df_sumsel, color="tab:blue", label="data aktual", linewidth=2.5)

# membuat label-label
ax.set_title("Hotspot Sumsel Sensor MODIS 2018-2020", fontsize=14)
ax.set_xlabel("Tanggal", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

### Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
df_sumsel = scaler.fit_transform(df_sumsel)

In [None]:
np.round(df_sumsel[:5],6)

### Data Latih dan Data Uji

In [None]:
# split into train and test sets
train_size = int(len(df_sumsel) * 0.8)
test_size = len(df_sumsel) - train_size
train, test = df_sumsel[0:train_size,:], df_sumsel[train_size:len(df_sumsel),:]

In [None]:
print(train.shape, test.shape)

### Supervised Learning

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

- Check data train, for result supervised learning

In [None]:
temp_trainX = pd.DataFrame(trainX)
temp_trainY = pd.DataFrame(trainY)

In [None]:
hasil = pd.concat([temp_trainX, temp_trainY], axis=1)
hasil.head()

- Check data test, for result supervised learning

In [None]:
temp_testX = pd.DataFrame(testX)
temp_testY = pd.DataFrame(testY)

In [None]:
hasil = pd.concat([temp_testX, temp_testY], axis=1)
hasil.head()

- reshape input for samples, time steps, features

In [None]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
print(trainX.shape, trainY.shape)

In [None]:
print(testX.shape, testY.shape)

### Hyperparameter GridSearchCV LSTM-RNN

In [None]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

In [None]:
# Define parameter
parameters = {'neurons' : [10],
              'activation' : ['sigmoid', 'tanh', 'relu', 'selu', 'elu', 'softplus'],
              'optimizer' : ['adam', 'adamax', 'rmsprop', 'sgd'],
              'dropout_rate' : [0.05, 0.1, 0.15, 0.2, 0.25],
              'epochs' : [2000],
              'batch_size' : [4, 8, 16, 32, 64],
              'verbose' : [0]}

In [None]:
keys = parameters.keys()
values = (parameters[key] for key in keys)
combinations = [dict(zip(keys, combination)) for combination in itertools.product(*values)]
print(len(combinations), str('kombinasi hyperparameter'))

In [None]:
combinations

- Model LSTM with Sliding Window dan Timestep

In [None]:
def build_classifier(neurons='', activation='', optimizer='', dropout_rate=''):
    tf.keras.backend.clear_session()
    
    # design network
    grid_model = Sequential()
    grid_model.add(LSTM(units=neurons, activation=activation, input_shape=(trainX.shape[1], 1)))
    grid_model.add(Dropout(dropout_rate))
    grid_model.add(Dense(1))

    # model compile
    grid_model.compile(loss='mae', optimizer=optimizer)
    
    # return value
    return grid_model

In [None]:
grid_model = KerasRegressor(build_fn=build_classifier)

In [None]:
grid_search = GridSearchCV(estimator=grid_model, param_grid=parameters, n_jobs=-1, cv=2, scoring='neg_root_mean_squared_error')

In [None]:
grid_result = grid_search.fit(trainX, trainY)

In [None]:
# summarize results
print("Best parameters: %f using %s\n" % (grid_search.best_score_, grid_search.best_params_))

means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
params = grid_search.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
result = pd.concat([pd.DataFrame(grid_search.cv_results_["params"]),pd.DataFrame(grid_search.cv_results_["mean_test_score"], columns=["score"])],axis=1)

In [None]:
result.sort_values(by="score", ascending=False)

In [None]:
result.sort_values(by="score", ascending=False).to_csv('gridsearch_percobaan1.csv',index=False)

### Evaluasi Model LSTM-RNN

In [None]:
# Set akhir waktu komputasi 
end = time.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))