# Analisa Statistik - uji stasioneritas

In [None]:
# pustaka untuk manipulasi data
import pandas as pd
from pandas import concat
from pandas import DataFrame
from pandas import read_csv
from pandas import read_excel
import numpy as np
from numpy import concatenate

# pustaka untuk waktu komputasi
import time
from datetime import datetime

# Pustaka untuk visualisasi data
import seaborn as sns
from matplotlib import pyplot
from matplotlib import pyplot as plt

# Pustaka untuk visualisasi acf dan pacf
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

# pustaka normalisasi data / membuat data latih dan data uji.
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from mlxtend.preprocessing import minmax_scaling

# pustaka uji stasioneritas statsmodel
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# pustaka uji stasioneritas arch
from arch.unitroot import *
from arch.unitroot import ADF
from arch.unitroot import PhillipsPerron
from arch.unitroot import KPSS

### 1. Akuisisi Data

In [None]:
# Set waktu komputasi
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234)

In [None]:
dataset = pd.read_csv("dataset/dataset_boptn.csv", parse_dates=["acq_date"])

In [None]:
# # set index tanggal
# dataset = dataset.set_index("tanggal")

In [None]:
dataset.info()

In [None]:
dataset

### 2. Normalisasi Data (Max-Min)

In [None]:
# memanggil fungsi max min scaler
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))

In [None]:
# proses max min scaler
scaled = scaler.fit_transform(dataset[["hotspot", "soi_anom", "soi_std", "sst_anom", "sst_std", "oni_anom", "oni_std"]])

In [None]:
scaled = pd.DataFrame(scaled)
scaled.columns = ["hotspot", "soi_anom", "soi_std", "sst_anom", "sst_std", "oni_anom", "oni_std"]

In [None]:
df_normalisasi = pd.concat([dataset[["acq_date"]], scaled], axis=1)


In [None]:
df_normalisasi

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(df_normalisasi["acq_date"], df_normalisasi["hotspot"], color="tab:blue", label="Data Hostpot 2001 - 2023", linewidth=2.5)

# membuat label-label
ax.set_title("Hotspot Sumsel Sensor MODIS 2001-2023", fontsize=14)
ax.set_xlabel("", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.legend(loc='best')
ax.grid(True)

# menampilkan plot
plt.show()

### 3. Plot ACF dan PACF

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
plot_acf(df_normalisasi["hotspot"], lags=24, ax=ax)

# membuat label-label
ax.set_title("Plot ACF Hotspot Sumsel", fontsize=14)
ax.set_xlabel("test", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.grid(True)

# menampilkan plot
plt.show()

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
plot_pacf(df_normalisasi["hotspot"], lags=24, ax=ax)

# membuat label-label
ax.set_title("Plot PACF Hotspot Sumsel", fontsize=14)
ax.set_xlabel("test", fontsize=12)
ax.set_ylabel("", fontsize=12)
ax.grid(True)

# menampilkan plot
plt.show()

### Uji-Stasioneritas (Statsmodels)

**ADF Test with statsmodel**
- https://machinelearningmastery.com/time-series-data-stationary-python/
- https://docs.w3cub.com/statsmodels/generated/statsmodels.tsa.stattools.adfuller
- https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html

In [None]:
# Formula ADF Test
# result = adfuller(df_sumsel, maxlag=24, autolag='AIC')
result = adfuller(df_normalisasi["hotspot"], autolag=None)

# print result ADF Tes 
print("----------------------------------")
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
print(f'n-lags: {result[2]}')
print(f'n-obs: {result[3]}')

print("----------------------------------")
print('Critial Values:')
for key, value in result[4].items():
    print(f'   {key}, {value}')

In [None]:
if result[1] <= 0.05:
    print('Strong evidence against the null hypothesis (Ho), Reject the null hypothesis, Data has no unit root and is stationary')
else:
    print('Weak evidence against the null hypothesis (Ho), time series has a unit root, indicating it is non stationary.')

### Uji-Stasioneritas (ARCH toolbox)

**ADF Test with ARCH toolbox**
- https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.ADF.html

Hipotesis 0 (H0): data terdapat akar unit. Artinya data bersifat non-stasioner\
Hipotesis 1 (H1): data terdapat tidak akar unit. Artinya data bersifat stasioner

Jika p-value > 0.05 => Terima H0\
Jika p-value <= 0.05 => Terima H1 

In [None]:
adf = ADF(df_normalisasi["hotspot"], lags=15)
print(adf.summary().as_text())

In [None]:
# check linier regression with OLS
reg_res = adf.regression
print(reg_res.summary().as_text())

**PP Test with ARCH toolbook**
- https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.PhillipsPerron.html

Hipotesis 0 (H0): data terdapat akar unit. Artinya data bersifat non-stasioner\
Hipotesis 1 (H1): data terdapat tidak akar unit. Artinya data bersifat stasioner

Jika p-value > 0.05 => Terima H0\
Jika p-value <= 0.05 => Terima H1 

In [None]:
pp = PhillipsPerron(df_normalisasi["hotspot"], lags=15)
print(pp.summary().as_text())

In [None]:
# check linier regression with OLS
reg_res = pp.regression
print(reg_res.summary().as_text())

**KPSS Test with ARCH toolbook**
- https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.KPSS.html

Hipotesis 0 (H0): data bersifat stasioner\
Hipotesis 1 (H1): data bersifat non-stasioner

In [None]:
kpss = KPSS(df_normalisasi["hotspot"], lags=15)
print(kpss.summary().as_text())