## M3 dan M6 - Model Prediksi Titik Panas di Sumatera Selatan 2001 - 2023

- Jika dipengaruhi oleh enso

In [None]:
# pustaka manipulasi data array
import numpy as np

# pustaka manipulasi data frame
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel

import random as rm
import time as tm

# pustaka untuk visualisasi data
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter

# library normalize data with max-min algorithm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# library algorithm lstm-rnn with keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import RNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD
from keras.layers import LeakyReLU

# pustaka untuk visualisasi acf dan pacf
import scipy.stats as sc
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# pustaka untuk  evaluasi model prediksi
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

### Config Models

In [None]:
# Setting seed
rm.seed(1234)
np.random.seed(1234)
tf.random.set_seed(1234)

In [None]:
# # Setting Algorithm
# algorithm = "LSTM-RNN"

In [None]:
# Setting Algorithm
algorithm = "GRU-RNN"

In [None]:
# set parameter
activation = "selu"
dropout_rate = 0.25
optimizer = "sgd"
batch_size = 2
epochs=1500

### 1. Akuisisi Data

In [None]:
# Set waktu komputasi
start = tm.time()

In [None]:
dataset = pd.read_csv("dataset/dataset_enso.csv", parse_dates=["acq_date"])

In [None]:
# menampilkan metadata dataset
dataset.info()

In [None]:
print(dataset.head())

### 2. Praproses Data

- Seleksi Fitur

In [None]:
# jika hotspot dipengaruhi oleh enso
data = dataset[["hotspot", "sst_std", "sst_anom", "oni_std", "oni_anom", "soi_std", "soi_anom"]]
data = data.values

# # jika hotspot dipengaruhi oleh enso
# data = dataset[["hotspot", "sst_anom", "oni_anom", "soi_anom"]]
# data = data.values

In [None]:
# hasil seleksi fitur
np.round(data[:5],7)

In [None]:
# cek dimensi normalisasi
data.shape

- Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(np.array(data))

In [None]:
# show data train
np.round(scaled[:5],4)

In [None]:
# cek dimensi normalisasi
scaled.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset["acq_date"], np.array(scaled[:,:1]), color="tab:blue", label="Hotspot di Sumatera Selatan 2001 - 2023", linewidth=2.5)

# membuat label-label
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper left")
ax.grid(True)

# menampilkan plot
plt.show()

### 3. Pembagian Data

In [None]:
# data train
train_size = 216

# set loc data train
train_data = scaled[0:train_size,:]

# show data train
np.round(train_data[:5],4)

In [None]:
train_data.shape

In [None]:
# data test
test_size = 60

# set loc data test
test_data = scaled[train_size:len(scaled),:]

# show data test
np.round(test_data[:5],4)

In [None]:
test_data.shape

In [None]:
# make a frame
fig, ax = plt.subplots(figsize = (10,5))

# make a time series plot
ax.plot(dataset.iloc[0:len(train_data),0], train_data[:,:1], color="tab:blue", label="Data Train", linewidth=2.5)
ax.plot(dataset.iloc[len(train_data):len(dataset),0], test_data[:,:1], color="tab:red", label="Data Test", linewidth=2.5)

# make are labels
ax.set_title("", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="best")
ax.grid(True)

# show plot time series
plt.show()

### 4. Supervised Learning

In [None]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    
    # return value
    return agg

- hasil supervised learning data train

In [None]:
# set supervised learning for data train
reframed_train = series_to_supervised(train_data, 1, 1)

In [None]:
reframed_train.head()

In [None]:
# Jika menggunakan anom sst, oni, dan soi.
# # drop columns we don't want to predict
# reframed_train.drop(reframed_train.columns[[5,6,7]], axis=1, inplace=True)

# Jika menggunakan semua parameter
# drop columns we don't want to predict
reframed_train.drop(reframed_train.columns[[8,9,10,11,12,13]], axis=1, inplace=True)

In [None]:
reframed_train.head(5)

In [None]:
# split into input and outputs
trainX, trainY = reframed_train.iloc[:, :-1], reframed_train.iloc[:, -1]

In [None]:
# view a dimension dataset after supervised learning
print(trainX.shape, trainY.shape)

- hasil supervised learning data test

In [None]:
# set supervised learning for data test
reframed_test = series_to_supervised(test_data, 1, 1)

In [None]:
reframed_test.head(5)

In [None]:
# Jika menggunakan semua parameter
# drop columns we don't want to predict
reframed_test.drop(reframed_test.columns[[8,9,10,11,12,13]], axis=1, inplace=True)

In [None]:
reframed_test.head(5)

In [None]:
# split into input and outputs
testX, testY = reframed_test.iloc[:, :-1], reframed_test.iloc[:, -1]

In [None]:
# view a dimension dataset after supervised learning
print(testX.shape, testY.shape)

- Check data train, for result supervised learning

In [None]:
temp_trainX = pd.DataFrame(trainX)
temp_trainY = pd.DataFrame(trainY)

In [None]:
hasil_train = pd.concat([temp_trainX, temp_trainY], axis=1)
hasil_train.info()

In [None]:
hasil_train.head()

- Check data test, for result supervised learning

In [None]:
temp_testX = pd.DataFrame(testX)
temp_testY = pd.DataFrame(testY)

In [None]:
hasil_test = pd.concat([temp_testX, temp_testY], axis=1)
hasil_test.info()

In [None]:
hasil_test.head()

- reshape input for samples, time steps, features

In [None]:
# reshape data train
trainX = np.reshape(np.array(trainX), (trainX.shape[0], 1, trainX.shape[1]))

In [None]:
print(trainX.shape, trainY.shape)

In [None]:
# reshape data test
testX = np.reshape(np.array(testX), (testX.shape[0], 1, testX.shape[1]))

In [None]:
print(testX.shape, testY.shape)

### 5. Model Prediksi Titik Panas

In [None]:
# call function
from class_base_model import lstm_algorithm
from class_base_model import gru_algorithm

# check algorithm
if algorithm == "algorithm":
    # call model LSTM-RNN
    model = lstm_algorithm(trainX, activation, dropout_rate, optimizer)
else:
    # call model GRU-RNN
    model = gru_algorithm(trainX, activation, dropout_rate, optimizer)

# fit network
history = model.fit(
    trainX, trainY, batch_size=batch_size, epochs=epochs,
    validation_data=(testX, testY), verbose=1, shuffle=False, use_multiprocessing=True
)

In [None]:
model.summary()

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(history.history['loss'], color="tab:blue", label="data train", linewidth=1.5)
ax.plot(history.history['val_loss'], color="tab:orange", label="data test", linewidth=1.5)

# membuat label-label
ax.set_title("Grafik Loss Function")
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

In [None]:
# 5. make predictions
predictions = model.predict(testX, verbose=0)
print(predictions[:, 0])

### 7. Evaluasi Model LSTM-RNN

In [None]:
scores = model.evaluate(trainX, trainY)
scores

In [None]:
scores = model.evaluate(testX, testY)
scores

- MAE

In [None]:
mae = mean_absolute_error(testY, predictions)
print('Test MAE: %.4f' % mae)

- MSE

In [None]:
mse = mean_squared_error(testY, predictions)
print('Test MSE: %.4f' % mse)

- MAPE

In [None]:
mape = mean_absolute_percentage_error(testY, predictions) * 100
print('Test MAPE: %.4f' % mape)

- korelasi dan signifikansi

In [None]:
hasil = np.stack((testY,predictions.reshape(-1)),axis=1)
hasil = pd.DataFrame(hasil, columns = ['data_aktual','prediksi'])
hasil.head()

In [None]:
import scipy.stats as sc
r, p = sc.pearsonr(hasil["data_aktual"], hasil["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r)+ "dengan signifikansi" +" {:.4f} ".format(p))

- Waktu komputasi

In [None]:
# Set akhir waktu komputasi 
end = tm.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

### 8. Visualisasi Hasil Prediksi

In [None]:
if algorithm == "LSTM-RNN":
    title = "Model Stacked-Bidirectional LSTM-RNN"
else:
    title = "Model Stacked-Bidirectional GRU-RNN"

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset.iloc[len(train_data)+1:len(dataset),0], testY, color="tab:blue", label="data aktual", linewidth=2)
ax.plot(dataset.iloc[len(train_data)+1:len(dataset),0], predictions, color="tab:red", label="hasil prediksi", linewidth=2)

# membuat label-label
ax.set_title(title, fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper right")
ax.grid(True)
plt.show()

### 8. Hasil Prediksi Titik Panas

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(np.array(dataset[["hotspot"]]))

In [None]:
# inverse value test predictions
testPredictions = scaler.inverse_transform(predictions)
testActual = scaler.inverse_transform(np.array(testY).reshape(-1, 1))

In [None]:
x = pd.DataFrame(np.array(dataset.iloc[train_size+1:len(dataset),0]), columns=['Date'])
y = pd.DataFrame(testActual, columns=['Data Aktual'])
z = pd.DataFrame(testPredictions, columns=['Data Prediksi'])

In [None]:
hasil_prediksi = pd.concat([x, y, z], axis=1)

In [None]:
hasil_prediksi = np.round(hasil_prediksi, 0)

In [None]:
hasil_prediksi

In [None]:
# if algorithm == "LSTM-RNN":
#     hasil_prediksi.to_csv("hasil_prediksi_m1.csv", index=False)
# else:
#     hasil_prediksi.to_csv("hasil_prediksi_m4.csv", index=False)

In [None]:
if algorithm == "LSTM-RNN":
    title = "Model Stacked-Bidirectional LSTM-RNN"
else:
    title = "Model Stacked-Bidirectional GRU-RNN"

In [None]:
fix, ax = plt.subplots(figsize=(10,5))

ax.plot(hasil_prediksi["Date"], hasil_prediksi["Data Aktual"], color="tab:blue", label="Data aktual", linewidth=2.5)
ax.plot(hasil_prediksi["Date"], hasil_prediksi["Data Prediksi"], color="tab:red", label="Hasil prediksi", linewidth=2.5)

# Mengatur format tanggal ke format tahun
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

# membuat label-label
ax.set_title(title, fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc="upper right")
ax.grid(True)
plt.show()