# Imports

In [None]:
# Data processing
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
plt.style.use('seaborn-v0_8-darkgrid')

# Modelling and Forecasting
# ==============================================================================
from lightgbm import LGBMRegressor
from sklearn.ensemble import HistGradientBoostingRegressor

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

# Configuration warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

# Dataset

## Read Dataset

In [None]:
df = pd.read_csv('train.csv')
df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%Y-%m-%d')
df = df.set_index('Tanggal')
df = df.reset_index(drop = True)
df.head()

## Check Autocorrelation

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

# Autocorrelation plot
# ======================================================================================

plot_acf(df['Penutupan'], lags=500)
plt.show()

## Split Train Val

In [None]:
end_train = 800
df_train = df.loc[:end_train, :]
df_val   = df.loc[end_train:, :]

print(f"Dates train      : {df_train.index.min()} --- {df_train.index.max()}  (n={len(df_train)})")
print(f"Dates validation : {df_val.index.min()} --- {df_val.index.max()}  (n={len(df_val)})")

# Tune

## Trial 1 - LightGBM

### Tuning

In [None]:
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(random_state=123),
                lags = 24,
                transformer_y=StandardScaler()
             )

forecaster

In [None]:
# Regressor hyperparameters
param_grid = {
    'n_estimators': [25, 50, 75, 100, 150, 200, 300, 500, 600, 700, 1000],
    'max_depth': [-1, 1, 3, 5, 10, 15, 20, 25],
    'learning_rate': [0.05, 0.1, 0.12, 0.15, 0.2]
}

# Lags used as predictors
lags_grid = [range(1,10), range(1,15), range(1,50), range(1,100), range(1,200), range(1,300), range(1,500)]

results_grid = grid_search_forecaster(
                   forecaster         = forecaster,
                   y                  = df.loc[:,'Penutupan'],
                   param_grid         = param_grid,
                   lags_grid          = lags_grid,
                   steps              = 12,
                   refit              = False,
                   metric             = 'mean_absolute_percentage_error',
                   initial_train_size = len(df.loc[:end_train]),
                   fixed_train_size   = False,
                   return_best        = True,
                   verbose            = False
               )

In [None]:
metric, predictions = backtesting_forecaster(
                            forecaster         = forecaster,
                            y                  = df['Penutupan'],
                            initial_train_size = len(df.loc[:end_train]),
                            fixed_train_size   = False,
                            steps              = 12,
                            refit              = False,
                            metric             = 'mean_absolute_percentage_error',
                            verbose            = False # Change to True to see detailed information
                      )        

print(f"Backtest error: {metric:.2f}")
predictions.head()

### Make Forecast and Submission

In [None]:
subm = pd.read_csv('test.csv')
predict = forecaster.predict(len(subm))
submission = pd.DataFrame({
    'penutupan' : predict.values,
    'tanggal' : subm['Tanggal']
})
submission = submission.reset_index(drop = True).to_csv('submission_4.csv', index = False)