## Univariate LSTM-RNN Sumatera Selatan Percobaan 1

### 1. Declaration Library

In [None]:
# library manipulation dataset
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel

# library manipulation array
import numpy as np
from numpy import concatenate
from numpy import array

# library configuration date and time
import time
from datetime import datetime

# library data visualization
import seaborn as sns
from matplotlib import pyplot
from matplotlib import pyplot as plt

# library analysis acf and pacf
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# library normalize data with max-min algorithm
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# library algorithm lstm-rnn with keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import RNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD

# Early stoping
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# library evaluation model
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

### 2. Akuisisi Data

In [None]:
# set time for complexity computation
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234)

In [None]:
# load dataset csv
dataset = pd.read_csv("dataset/modis_sumsel_bulanan.csv", parse_dates=['acq_date'], engine="python")

In [None]:
# # set index
# dataset = dataset.set_index("Date");

In [None]:
# show metadata dataset
dataset.info()

In [None]:
# show dataset
dataset

### 3. Exploration Data Analysis

- Data Visualization

In [None]:
# make frame
fig, ax = plt.subplots(figsize = (20,6))

# make time series plot
ax.plot(dataset["acq_date"], dataset["hotspot"], color="tab:blue", label="hotspot sumsel 2001-2020", linewidth=3)

# make are labels
ax.set_title("Hotspot Sumsel 2001-2020", fontsize=14)
ax.set_xlabel("Date", fontsize=12)
ax.set_ylabel("Sum of hostpot", fontsize=12)
ax.legend(loc='best')
ax.grid(True)

# show plot time series
plt.show()

- Analysis ACF and PACF

In [None]:
# make frame
fig, ax= plt.subplots(nrows=1, ncols=2, facecolor="#F0F0F0", figsize = (20,5))

# plot acf
plot_acf(dataset["hotspot"], lags=24, ax=ax[0])
ax[0].grid(True)

# plot pacf
plot_pacf(dataset["hotspot"],lags=24, ax=ax[1], method="yw")
ax[1].grid(True)

# show plot acf and pacf
plt.show()

### 4. Praproses Data

- feature selection

In [None]:
# set a feature selection
df_sumsel = dataset.filter(["hotspot"])

In [None]:
# convert dataframe to series
df_sumsel = df_sumsel.values

In [None]:
# show a dataset
np.round(df_sumsel[:5],7)

In [None]:
# view a dimension dataset
df_sumsel.shape

- normalization data

In [None]:
# normalize data with max-min algorithm
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_data = scaler.fit_transform(np.array(df_sumsel).reshape(-1,1))

In [None]:
# show a dataset after normalize
np.round(scaled_data[:5],7)

In [None]:
# view a dimension dataset after normalize
scaled_data.shape

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (20,6))

# make a time series plot
ax.plot(dataset["acq_date"], scaled_data, color="tab:blue", label="hotspot sumsel 2001-2020", linewidth=2.5)

# make a labels
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
ax.set_xlabel("Tahun", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='best')
ax.grid(True)

# show plot time series
plt.show()

- set data train and data test

In [None]:
# set data train
train_size = int(len(df_sumsel) * 0.8)

In [None]:
# set loc data train
train_data = scaled_data[0:train_size,:]

In [None]:
# show data train
np.round(train_data[:5],7)

In [None]:
# view dimension of data train
train_data.shape

In [None]:
# set data test
test_size = len(df_sumsel) - train_size

In [None]:
# set loc data train
test_data = scaled_data[train_size:len(df_sumsel),:]

In [None]:
# show data train
np.round(test_data[:5],7)

In [None]:
# view dimension of data train
test_data.shape

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (20,6))

# make a time series plot
ax.plot(dataset.iloc[0:train_size,0], train_data, color="tab:blue", label="Data Latih", linewidth=2.5)
ax.plot(dataset.iloc[train_size:len(scaled_data),0], test_data, color="tab:red", label="Data Uji", linewidth=2.5)

# make are labels
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2020", fontsize=14)
ax.set_xlabel("Tahun", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc="best")
ax.grid(True)

# show plot time series
plt.show()

### 5. Supervised Learning

In [None]:
# function for supervised learning
def create_dataset(dataset, look_back):
    
    # declare variable X and Y
    dataX = []
    dataY = []
    
    # for loop for create supervised learning
    for i in range(look_back, len(dataset)):
        
        # insert value X and Y 
        dataX.append(dataset[i-look_back:i, 0])
        dataY.append(dataset[i, 0])
        
    # return value X and Y
    return np.array(dataX), np.array(dataY)

- process supervised learning, with look back t-1 into X=t and Y=t+1

In [None]:
# set look back -1
look_back = 1

In [None]:
# set supervised learning for data train
x_train, y_train = create_dataset(train_data, look_back)

In [None]:
# view a dimension dataset after supervised learning
print(x_train.shape, y_train.shape)

In [None]:
# set supervised learning for data test
x_test, y_test = create_dataset(test_data, look_back)

In [None]:
# view a dimension dataset after supervised learning
print(x_test.shape, y_test.shape)

- Check data train, for result supervised learning

In [None]:
temp_trainX = pd.DataFrame(x_train, columns=['y train'])
temp_trainY = pd.DataFrame(y_train, columns=['x train'])

In [None]:
hasil_train = pd.concat([temp_trainX, temp_trainY], axis=1)
hasil_train.head()

In [None]:
hasil_train.info()

- Check data test, for result supervised learning

In [None]:
temp_testX = pd.DataFrame(x_test, columns=['y test'])
temp_testY = pd.DataFrame(y_test, columns=['x test'])

In [None]:
hasil_test = pd.concat([temp_testX, temp_testY], axis=1)
hasil_test.head()

In [None]:
hasil_test.info()

- reshape input to be [samples, time steps, features]

In [None]:
# reshape data train
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [None]:
print(x_train.shape, y_train.shape)

In [None]:
# reshape data train
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
print(x_test.shape, y_test.shape)

### 6. Modeling LSTM-RNN

In [None]:
# Design network
model = Sequential()

# First LSTM layer with Dropout regularisation
model.add(
    LSTM(
        units=10,
        activation='selu',
        input_shape=(x_train.shape[1], 1)
    )
)
model.add(Dropout(0.2))

# The output layer
model.add(Dense(1))

# Compiling model the LSTM-RNN
# lr=0.0075, decay=0.00001
model.compile(
    optimizer='sgd',
    loss='mae',
    metrics=[
        tf.keras.metrics.MeanAbsoluteError(),
        tf.keras.metrics.MeanSquaredError(),
        tf.keras.metrics.RootMeanSquaredError()
    ]
)

In [None]:
# fit network
history = model.fit(x_train, y_train, epochs=2000, batch_size=8, validation_data=(x_test, y_test), verbose=1, shuffle=False)

In [None]:
model.summary()

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (20,5))

# membuat time series plot
ax.plot(history.history['loss'], color="tab:blue", label="train", linewidth=2)
ax.plot(history.history['val_loss'], color="tab:orange", label="test", linewidth=2)

# membuat label-label
ax.set_title("Grafik Loss Function", fontsize=14)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

In [None]:
# make predictions
predictions = model.predict(x_test, verbose=0)
print(predictions[:, 0])

### 7. Evaluasi Model LSTM-RNN

In [None]:
scores = model.evaluate(x_train, y_train)
scores

In [None]:
scores = model.evaluate(x_test, y_test)
scores

- MAE (Mean Absolute Error)

In [None]:
mae = mean_absolute_error(y_test, predictions)
print('Test MAE : %.4f' % mae)

- MSE (Mean Squared Error)

In [None]:
mse = mean_squared_error(y_test, predictions)
print('Test MSE: %.4f' % mse)

- RMSE (Root Mean Squared Error)

In [None]:
# calculate RMSE
rmse = sqrt(mse)
print('Test RMSE: %.4f' % rmse)

- Corelation pearson

In [None]:
hasil = np.stack((y_test.reshape(-1), predictions.reshape(-1)), axis=1)
hasil = pd.DataFrame(hasil, columns = ['data_aktual','prediksi'])

In [None]:
hasil.head()

In [None]:
import scipy.stats as sc
r, p = sc.pearsonr(hasil["data_aktual"], hasil["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r)+ "dengan signifikansi" +" {:.4f} ".format(p))

- Complexity time

In [None]:
# Set akhir waktu komputasi 
end = time.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

### 8. Visualisasi Data

In [None]:
# generate urutan data sesuai panjang datanya
x = pd.date_range(start="2017-01-01", periods=len(y_test), freq='MS')

# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(x, y_test, color="tab:blue", label="data aktual", linewidth=2.5)
ax.plot(x, predictions, color="tab:red", label="hasil prediksi", linewidth=2.5)

# membuat label-label
# ax.set_title("Hotspot Sumsel Sensor MODIS 2018-2020", fontsize=14)
ax.set_xlabel("Tanggal", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()