<a href="https://colab.research.google.com/github/kekenziii/Pengembangan-Sistem-Peramalan-Tingkat-Hunian-Hotel-Menggunakan-LightGBM/blob/main/Model_Pengembangan_Sistem_Peramalan_Tingkat_Hunian_Hotel_Menggunakan_LightGBM_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Skripsi

In [None]:
!python --version

# Library

In [None]:
print("\nHolidays\n")
!pip install holidays # Buat data liburan di Indonesia

print("\nDarts\n")
!pip install darts # Buat model

print("\Optuna\n")
!pip install optuna # Parameter tuning
# !pip install streamlit
# !pip install pyngrok

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from tabulate import tabulate

import plotly.graph_objs as go # Buat plot sistem
import holidays #Covariate
import calendar # Dipake saat display rata2 occupancy rate bulan lalu. Mungkin bakal dihapus
from datetime import timedelta

from scipy import stats
from scipy.stats import kurtosis, skew # Cek kurtosis dan skew
import statsmodels.api as sm # ACF
from darts import TimeSeries
from darts.models import LightGBMModel # Model
# from sklearn.model_selection import TimeSeriesSplit # Crossval
from darts.metrics import mae, mape, rmse, mse
from darts.metrics.metrics import smape

import optuna # Parameter tuning

import warnings # Biar gak diganggu warning
warnings.filterwarnings('ignore')

# # Sistem
# import streamlit as st # Streamlit
# from pyngrok import ngrok # Deploy sistem web based
# import plotly.graph_objs as go # Visualisasi dalam sistem

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import Dataset

In [None]:
df_A = pd.read_csv('/content/drive/MyDrive/Skripsi/HotelA_Hotel_CSV.csv')
df_B = pd.read_csv('/content/drive/MyDrive/Skripsi/HotelB_Hotel_CSV.csv')
df_C = pd.read_csv('/content/drive/MyDrive/Skripsi/HotelC_Hotel_CSV.csv')

# Data Preparation

## 1. Eksplorasi Data

In [None]:
print(df_A.shape)
print(df_B.shape)
print(df_C.shape)

In [None]:
df_A.info()
df_B.info()
df_C.info()

In [None]:
df_A.head()

In [None]:
df_A.tail()

In [None]:
df_B.head()

In [None]:
df_C.head()

### Cek Null

In [None]:
na_counts = {
    'df_A': df_A.isna().sum(),
    'df_B': df_B.isna().sum(),
    'df_C': df_C.isna().sum()
}

na_counts_df = pd.DataFrame(na_counts)

print(na_counts_df)

### Range Tanggal

In [None]:
df_A["Date"] = pd.to_datetime(df_A["Date"])
df_B["Date"] = pd.to_datetime(df_B["Date"])
df_C["Date"] = pd.to_datetime(df_C["Date"])

date_ranges = pd.DataFrame({
    "Hotel": ["Hotel A", "Hotel B", "Hotel C"],
    "From": [df_A['Date'].min().strftime('%Y-%m-%d'), df_B['Date'].min().strftime('%Y-%m-%d'), df_C['Date'].min().strftime('%Y-%m-%d')],
    "To": [df_A['Date'].max().strftime('%Y-%m-%d'), df_B['Date'].max().strftime('%Y-%m-%d'), df_C['Date'].max().strftime('%Y-%m-%d')]
}).set_index('Hotel')

print(tabulate(date_ranges, headers='keys'))

In [None]:
dfs = [df_A, df_B, df_C]
labels = ["Series A", "Series B", "Series C"]

global_min_date = min(df["Date"].min() for df in dfs)
global_max_date = max(df["Date"].max() for df in dfs)

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(60, 20), sharex=True)

for i, (df, label) in enumerate(zip(dfs, labels)):
    ax = axes[i]

    ax.plot(df["Date"], df["Occ (%) Sold"], label=f"{label} (Actual)", color=f"blue", alpha=0.7)

    rolling_avg = df["Occ (%) Sold"].rolling(window=30).mean()
    ax.plot(df["Date"], rolling_avg, label=f"{label} (3-day Avg)", linestyle="dashed", color=f"red")

    ax.set_ylabel(label)
    ax.legend()
    ax.grid(True)

axes[-1].set_xlim(global_min_date, global_max_date)

plt.xlabel("Date")
plt.suptitle("Three Time Series with Aligned X-Axis and 3-Day Rolling Average")
plt.tight_layout()
plt.show()


### Cek Outlier

In [None]:
plt.figure(figsize=(25, 15))

# Dataset A
q1_A = df_A['Occ (%) Sold'].quantile(0.25)
q3_A = df_A['Occ (%) Sold'].quantile(0.75)
iqr_A = q3_A - q1_A
lower_bound_A = q1_A - 1.5 * iqr_A
upper_bound_A = q3_A + 1.5 * iqr_A
outliers_A = df_A[(df_A['Occ (%) Sold'] < lower_bound_A) | (df_A['Occ (%) Sold'] > upper_bound_A)]

plt.subplot(3, 1, 1)
plt.boxplot(df_A['Occ (%) Sold'], vert=False, patch_artist=True, boxprops=dict(facecolor='lightblue'))
plt.axvline(lower_bound_A, color='red', linestyle='--')
plt.axvline(upper_bound_A, color='red', linestyle='--')
plt.scatter(outliers_A['Occ (%) Sold'], [1] * len(outliers_A), color='orange', zorder=3, label='Outliers')
plt.title("Outliers in Dataset A", fontsize=40)
plt.xlabel("Occ (%) Sold", fontsize=20)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.legend()

# Dataset B
q1_B = df_B['Occ (%) Sold'].quantile(0.25)
q3_B = df_B['Occ (%) Sold'].quantile(0.75)
iqr_B = q3_B - q1_B
lower_bound_B = q1_B - 1.5 * iqr_B
upper_bound_B = q3_B + 1.5 * iqr_B
outliers_B = df_B[(df_B['Occ (%) Sold'] < lower_bound_B) | (df_B['Occ (%) Sold'] > upper_bound_B)]

plt.subplot(3, 1, 2)
plt.boxplot(df_B['Occ (%) Sold'], vert=False, patch_artist=True, boxprops=dict(facecolor='lightblue'))
plt.axvline(lower_bound_B, color='red', linestyle='--')
plt.axvline(upper_bound_B, color='red', linestyle='--')
plt.scatter(outliers_B['Occ (%) Sold'], [1] * len(outliers_B), color='orange', zorder=3, label='Outliers')
plt.title("Outliers in Dataset B", fontsize=40)
plt.xlabel("Occ (%) Sold", fontsize=20)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.legend()

# Dataset C
q1_C = df_C['Occ (%) Sold'].quantile(0.25)
q3_C = df_C['Occ (%) Sold'].quantile(0.75)
iqr_C = q3_C - q1_C
lower_bound_C = q1_C - 1.5 * iqr_C
upper_bound_C = q3_C + 1.5 * iqr_C
outliers_C = df_C[(df_C['Occ (%) Sold'] < lower_bound_C) | (df_C['Occ (%) Sold'] > upper_bound_C)]

plt.subplot(3, 1, 3)
plt.boxplot(df_C['Occ (%) Sold'], vert=False, patch_artist=True, boxprops=dict(facecolor='lightblue'))
plt.axvline(lower_bound_C, color='red', linestyle='--')
plt.axvline(upper_bound_C, color='red', linestyle='--')
plt.scatter(outliers_C['Occ (%) Sold'], [1] * len(outliers_C), color='orange', zorder=3, label='Outliers')
plt.title("Outliers in Dataset C", fontsize=40)
plt.xlabel("Occ (%) Sold", fontsize=20)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.legend()

plt.tight_layout()
plt.show()


### Cek Kurtosis dan Skewness

In [None]:
def check_kurtosis_and_skewness(df):
    kurt = kurtosis(df['Occ (%) Sold'])
    skw = skew(df['Occ (%) Sold'])

    print(f"Kurtosis: {kurt}")
    print(f"Skewness: {skw}")

    if kurt > 0:
        print("Distribution is leptokurtic (heavy tails).")
    elif kurt < 0:
        print("Distribution is platykurtic (light tails).")
    else:
        print("Distribution is mesokurtic (normal-like).")

    if skw > 0:
        print("Data is positively skewed (long right tail).\n")
    elif skw < 0:
        print("Data is negatively skewed (long left tail).\n")
    else:
        print("Data is symmetric.\n")

    return kurt, skw

In [None]:
kurt_A, skw_A = check_kurtosis_and_skewness(df_A)
kurt_B, skw_B = check_kurtosis_and_skewness(df_B)
kurt_C, skw_C = check_kurtosis_and_skewness(df_C)

### Cek ACF

In [None]:
datasets = [("A", df_A), ("B", df_B), ("C", df_C)]
fig, axes = plt.subplots(3, 1, figsize=(10, 30))

viable_lags = {}

for i, (label, df) in enumerate(datasets):
    series = df["Occ (%) Sold"]

    acf_values = sm.tsa.acf(series, nlags=196)
    conf_interval = 1.96 / np.sqrt(len(series))

    acf_lag = np.argmax((np.abs(acf_values[1:]) < conf_interval)) + 1
    viable_lags[label] = acf_lag

    sm.graphics.tsa.plot_acf(series, lags=196, ax=axes[i])
    axes[i].set_title(f"ACF - {label} (Lag: {acf_lag})")
    axes[i].axvline(acf_lag, color='r', linestyle='dashed')

plt.tight_layout()
plt.show()

for dataset, lag in viable_lags.items():
    print(f"{dataset}: ACF Lag = {lag}")

### Visualisasi Pembagian Train dan Test

In [None]:
def plot_occupancy(df, hotel_name, color1='blue', color2='green'):
    plt.figure(figsize=(25, 15))

    split_index = int(len(df) * 0.8)

    plt.plot(df["Date"][:split_index], df["Occ (%) Sold"][:split_index], marker='o', color=color1, label=f'{hotel_name} (Train)')

    plt.plot(df["Date"][split_index:], df["Occ (%) Sold"][split_index:], marker='o', color=color2, label=f'{hotel_name} (Test)')

    plt.axvline(x=df["Date"].iloc[split_index], color='red', linestyle='--', linewidth=2, label='Train-Test Split')

    plt.xlabel("Date")
    plt.ylabel("Occupancy Rate")
    plt.title(hotel_name)
    plt.legend()
    plt.grid(True)
    plt.show()

plot_occupancy(df_A, "Hotel A")
plot_occupancy(df_B, "Hotel B")
plot_occupancy(df_C, "Hotel C")

### Range Data Train dan Test

In [None]:
def get_split_ranges(df, hotel_name):
    split_index = int(len(df) * 0.8)

    return {
        "Hotel": hotel_name,
        "Train Start": df["Date"].iloc[0],
        "Train End": df["Date"].iloc[split_index - 1],
        "Test Start": df["Date"].iloc[split_index],
        "Test End": df["Date"].iloc[-1],
    }

split_table = pd.DataFrame([
    get_split_ranges(df_A, "Hotel A"),
    get_split_ranges(df_B, "Hotel B"),
    get_split_ranges(df_C, "Hotel C"),
])

print(tabulate(split_table, headers='keys', tablefmt='grid'))

# 2. Pra-Pemrosesan Data

## Transformasi Data

#### Visualisasi Sebelum

In [None]:
plt.figure(figsize=(25, 15))

plt.subplot(3, 1, 1)
plt.plot(df_A["Date"], df_A["Occ (%) Sold"], label='Hotel A', marker='o', color = 'purple')
plt.xlabel("Date", fontsize=30)
plt.ylabel("Occupancy Rate", fontsize=30)
plt.title("Hotel A - Occupancy Percentage Over Time", fontsize=50)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.grid(True)

plt.subplot(3, 1, 2)
plt.plot(df_B["Date"], df_B["Occ (%) Sold"], label='Hotel B', marker='o', color = 'blue')
plt.xlabel("Date", fontsize=30)
plt.ylabel("Occupancy Rate", fontsize=30)
plt.title("Hotel B - Occupancy Percentage Over Time", fontsize=50)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.grid(True)

plt.subplot(3, 1, 3)
plt.plot(df_C["Date"], df_C["Occ (%) Sold"], label='Hotel C', marker='o', color = 'green')
plt.xlabel("Date", fontsize=30)
plt.ylabel("Occupancy Rate", fontsize=30)
plt.title("Hotel C - Occupancy Percentage Over Time", fontsize=50)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.grid(True)

plt.tight_layout()
plt.show()

### Rekayasa Fitur

In [None]:
def reductions(df_list):
    for i, df in enumerate(df_list):
        df_list[i] = df[["Date", "Occ (%) Sold"]]

df_list = [df_A, df_B, df_C]
reductions(df_list)

df_A, df_B, df_C = df_list

In [None]:
def add_holiday_feature(df, date_column='Date'):
    indonesia_holidays = holidays.Indonesia()
    df[date_column] = pd.to_datetime(df[date_column])

    df['is_holiday'] = df[date_column].apply(lambda x: x in indonesia_holidays or x.weekday() in [5, 6])
    return df

def prepare_data(df):
    df = add_holiday_feature(df)

    df['day_of_week'] = df['Date'].dt.weekday
    df['month'] = df['Date'].dt.month
    df['week_of_year'] = df['Date'].dt.isocalendar().week

    df_prepared = df[['Date', 'is_holiday', 'day_of_week', 'month', 'week_of_year', 'Occ (%) Sold']]
    return df_prepared

In [None]:
df_A_prepared = prepare_data(df_A)
df_B_prepared = prepare_data(df_B)
df_C_prepared = prepare_data(df_C)

### Mengatur Data Abnormal

In [None]:
def transformation(df, method="boxcox", Hotel="Hotel A"):
    df_transformed = df.copy()
    split_index = int(len(df) * 0.8)

    if method == "log":
        df_transformed['Occ (%) Sold'] = df_transformed['Occ (%) Sold'].apply(lambda x: np.log(x) if x > 0 else 0)

    elif method == "sqrt":
        df_transformed['Occ (%) Sold'] = df_transformed['Occ (%) Sold'].apply(lambda x: np.sqrt(x) if x >= 0 else 0)

    elif method == "boxcox":
        df_transformed['Occ (%) Sold'] += 1
        transformed_values, lambda_value = stats.boxcox(df_transformed['Occ (%) Sold'].values)
        df_transformed['Occ (%) Sold'] = transformed_values
        print(f"{Hotel} Box-Cox Lambda: {lambda_value}")

    elif method == "moving_avg":
        df_transformed.loc[:split_index, 'Occ (%) Sold'] = df_transformed.loc[:split_index, 'Occ (%) Sold'].rolling(window=2, min_periods=1).mean()

    elif method == "No":
        return df_transformed

    return df_transformed

# df_A_prepared = transformation(df_A_prepared, method="moving_avg", Hotel="Hotel A")
# df_B_prepared = transformation(df_B_prepared, method="moving_avg", Hotel="Hotel B")
# df_C_prepared = transformation(df_C_prepared, method="moving_avg", Hotel="Hotel C")

df_A_edited = transformation(df_A_prepared, method="none", Hotel="Hotel A")
df_B_edited = transformation(df_B_prepared, method="none", Hotel="Hotel B")
df_C_edited = transformation(df_C_prepared, method="none", Hotel="Hotel C")

In [None]:
print(f"Hotel A training data shape: {df_A_edited.info()}")
print(f"Hotel B training data shape: {df_B_edited.info()}")
print(f"Hotel C training data shape: {df_C_edited.info()}")

In [None]:
kurt_A, skw_A = check_kurtosis_and_skewness(df_A_edited)
kurt_B, skw_B = check_kurtosis_and_skewness(df_B_edited)
kurt_C, skw_C = check_kurtosis_and_skewness(df_C_edited)

prepared_a = df_A_edited.copy()
prepared_b = df_B_edited.copy()
prepared_c = df_C_edited.copy()

In [None]:
prepared_a["Occ (%) Sold"].plot()

# Pemodelan

## Inverse Function

In [None]:
def inverse_log_transform(series):
    return TimeSeries.from_dataframe(series.pd_dataframe().apply(np.exp))

def inverse_sqrt_transform(series):
    return TimeSeries.from_dataframe(series.pd_dataframe().apply(lambda x: x**2))

def inverse_boxcox_transform(series, lambda_value):
    return TimeSeries.from_dataframe(series.pd_dataframe().apply(lambda x: (lambda_value * x + 1) ** (1 / lambda_value)))


def inverse_transform(pred, target, method, lambda_value=None):
    if method == "log":
        return inverse_log_transform(pred), inverse_log_transform(target)
    elif method == "sqrt":
        return inverse_sqrt_transform(pred), inverse_sqrt_transform(target)
    elif method == "boxcox":
        if lambda_value is None:
            raise ValueError("Lambda value is required for Box-Cox transformation.")
        return inverse_boxcox_transform(pred, lambda_value), inverse_boxcox_transform(target, lambda_value)
    elif method == "none":
        return pred, target
    else:
        raise ValueError(f"Unknown transformation method: {method}")


# lambda_value_a = 0.7886837783709969
# lambda_value_b = 1.023992820766559
# lambda_value_c = 0.8310669031702983

## Lags, Based on ACF

In [None]:
lags_A = 17
lags_B = 23
lags_C = 17

## (1) No Optuna

### (1A) Model A

In [None]:
series_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_a_lightGBM) * 0.8)
target_train_a_lightGBM, target_test_a_lightGBM = series_a_lightGBM[:split_index], series_a_lightGBM[split_index:]
future_cov_train_a_lightGBM, future_cov_test_a_lightGBM = future_covariates_a_lightGBM[:split_index], future_covariates_a_lightGBM[split_index:]

model_a_lightGBM = LightGBMModel(
    lags=lags_A,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
)

model_a_lightGBM.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

backtest_predictions_a_lightGBM = model_a_lightGBM.historical_forecasts(
    series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
    future_covariates=future_covariates_a_lightGBM,
    start=len(target_train_a_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

mae_value_a_lightGBM = mae(target_test_a_lightGBM, backtest_predictions_a_lightGBM)
smape_value_a_lightGBM = smape(target_test_a_lightGBM, backtest_predictions_a_lightGBM)
rmse_value_a_lightGBM = rmse(target_test_a_lightGBM, backtest_predictions_a_lightGBM)

print(f"\nMAE: {mae_value_a_lightGBM:.4f}")
print(f"SMAPE: {smape_value_a_lightGBM:.4f}%")
print(f"RMSE: {rmse_value_a_lightGBM:.4f}")

### (1B) Model B

In [None]:
series_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_b_lightGBM) * 0.8)
target_train_b_lightGBM, target_test_b_lightGBM = series_b_lightGBM[:split_index], series_b_lightGBM[split_index:]
future_cov_train_b_lightGBM, future_cov_test_b_lightGBM = future_covariates_b_lightGBM[:split_index], future_covariates_b_lightGBM[split_index:]

model_b_lightGBM = LightGBMModel(
    lags=lags_B,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
)

model_b_lightGBM.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

backtest_predictions_b_lightGBM = model_b_lightGBM.historical_forecasts(
    series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
    future_covariates=future_covariates_b_lightGBM,
    start=len(target_train_b_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

mae_value_b_lightGBM = mae(target_test_b_lightGBM, backtest_predictions_b_lightGBM)
smape_value_b_lightGBM = smape(target_test_b_lightGBM, backtest_predictions_b_lightGBM)
rmse_value_b_lightGBM = rmse(target_test_b_lightGBM, backtest_predictions_b_lightGBM)

print(f"\nMAE: {mae_value_b_lightGBM:.4f}")
print(f"SMAPE: {smape_value_b_lightGBM:.4f}%")
print(f"RMSE: {rmse_value_b_lightGBM:.4f}")

### (1C) Model C

In [None]:
series_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_c_lightGBM) * 0.8)
target_train_c_lightGBM, target_test_c_lightGBM = series_c_lightGBM[:split_index], series_c_lightGBM[split_index:]
future_cov_train_c_lightGBM, future_cov_test_c_lightGBM = future_covariates_c_lightGBM[:split_index], future_covariates_c_lightGBM[split_index:]

model_c_lightGBM = LightGBMModel(
    lags=lags_C,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
)

model_c_lightGBM.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

backtest_predictions_c_lightGBM = model_c_lightGBM.historical_forecasts(
    series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
    future_covariates=future_covariates_c_lightGBM,
    start=len(target_train_c_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

mae_value_c_lightGBM = mae(target_test_c_lightGBM, backtest_predictions_c_lightGBM)
smape_value_c_lightGBM = smape(target_test_c_lightGBM, backtest_predictions_c_lightGBM)
rmse_value_c_lightGBM = rmse(target_test_c_lightGBM, backtest_predictions_c_lightGBM)

print(f"\nMAE: {mae_value_c_lightGBM:.4f}")
print(f"SMAPE: {smape_value_c_lightGBM:.4f}%")
print(f"RMSE: {rmse_value_c_lightGBM:.4f}")

## (2) Optuna, 50 Iteration

#### (2A) Model A

In [None]:
series_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_a_lightGBM) * 0.8)
target_train_a_lightGBM, target_test_a_lightGBM = series_a_lightGBM[:split_index], series_a_lightGBM[split_index:]
future_cov_train_a_lightGBM, future_cov_test_a_lightGBM = future_covariates_a_lightGBM[:split_index], future_covariates_a_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_A,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
        future_covariates=future_covariates_a_lightGBM,
        start=len(target_train_a_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_a_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_A,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
    future_covariates=future_covariates_a_lightGBM,
    start=len(target_train_a_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_a_lightGBM, final_predictions)
final_smape = smape(target_test_a_lightGBM, final_predictions)
final_rmse = rmse(target_test_a_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

### (2B) Model B

In [None]:
series_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_b_lightGBM) * 0.8)
target_train_b_lightGBM, target_test_b_lightGBM = series_b_lightGBM[:split_index], series_b_lightGBM[split_index:]
future_cov_train_b_lightGBM, future_cov_test_b_lightGBM = future_covariates_b_lightGBM[:split_index], future_covariates_b_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_B,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
        future_covariates=future_covariates_b_lightGBM,
        start=len(target_train_b_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_b_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_B,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
    future_covariates=future_covariates_b_lightGBM,
    start=len(target_train_b_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_b_lightGBM, final_predictions)
final_smape = smape(target_test_b_lightGBM, final_predictions)
final_rmse = rmse(target_test_b_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

### (2C) Model C

In [None]:
series_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_c_lightGBM) * 0.8)
target_train_c_lightGBM, target_test_c_lightGBM = series_c_lightGBM[:split_index], series_c_lightGBM[split_index:]
future_cov_train_c_lightGBM, future_cov_test_c_lightGBM = future_covariates_c_lightGBM[:split_index], future_covariates_c_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_C,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
        future_covariates=future_covariates_c_lightGBM,
        start=len(target_train_c_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_c_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_C,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
    future_covariates=future_covariates_c_lightGBM,
    start=len(target_train_c_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_c_lightGBM, final_predictions)
final_smape = smape(target_test_c_lightGBM, final_predictions)
final_rmse = rmse(target_test_c_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

## (3) Optuna, 100 Iteration

### (3A) Model A

In [None]:
series_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_a_lightGBM) * 0.8)
target_train_a_lightGBM, target_test_a_lightGBM = series_a_lightGBM[:split_index], series_a_lightGBM[split_index:]
future_cov_train_a_lightGBM, future_cov_test_a_lightGBM = future_covariates_a_lightGBM[:split_index], future_covariates_a_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_A,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
        future_covariates=future_covariates_a_lightGBM,
        start=len(target_train_a_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_a_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_A,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
    future_covariates=future_covariates_a_lightGBM,
    start=len(target_train_a_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_a_lightGBM, final_predictions)
final_smape = smape(target_test_a_lightGBM, final_predictions)
final_rmse = rmse(target_test_a_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

### (3B) Model B

In [None]:
series_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_b_lightGBM) * 0.8)
target_train_b_lightGBM, target_test_b_lightGBM = series_b_lightGBM[:split_index], series_b_lightGBM[split_index:]
future_cov_train_b_lightGBM, future_cov_test_b_lightGBM = future_covariates_b_lightGBM[:split_index], future_covariates_b_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_B,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
        future_covariates=future_covariates_b_lightGBM,
        start=len(target_train_b_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_b_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_B,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
    future_covariates=future_covariates_b_lightGBM,
    start=len(target_train_b_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_b_lightGBM, final_predictions)
final_smape = smape(target_test_b_lightGBM, final_predictions)
final_rmse = rmse(target_test_b_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

### (3C) Model C

In [None]:
series_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_c_lightGBM) * 0.8)
target_train_c_lightGBM, target_test_c_lightGBM = series_c_lightGBM[:split_index], series_c_lightGBM[split_index:]
future_cov_train_c_lightGBM, future_cov_test_c_lightGBM = future_covariates_c_lightGBM[:split_index], future_covariates_c_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_C,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )


    model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
        future_covariates=future_covariates_c_lightGBM,
        start=len(target_train_c_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_c_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_C,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
    future_covariates=future_covariates_c_lightGBM,
    start=len(target_train_c_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_c_lightGBM, final_predictions)
final_smape = smape(target_test_c_lightGBM, final_predictions)
final_rmse = rmse(target_test_c_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

## (4) Optuna, 150 Iteration

#### (4A) Model A

In [None]:
series_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_a_lightGBM = TimeSeries.from_dataframe(
    prepared_a,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_a_lightGBM) * 0.8)
target_train_a_lightGBM, target_test_a_lightGBM = series_a_lightGBM[:split_index], series_a_lightGBM[split_index:]
future_cov_train_a_lightGBM, future_cov_test_a_lightGBM = future_covariates_a_lightGBM[:split_index], future_covariates_a_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_A,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
        future_covariates=future_covariates_a_lightGBM,
        start=len(target_train_a_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_a_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_A,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_a_lightGBM, future_covariates=future_cov_train_a_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_a_lightGBM.concatenate(target_test_a_lightGBM),
    future_covariates=future_covariates_a_lightGBM,
    start=len(target_train_a_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_a_lightGBM, final_predictions)
final_smape = smape(target_test_a_lightGBM, final_predictions)
final_rmse = rmse(target_test_a_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

#### (4B) Model B

In [None]:
series_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_b_lightGBM = TimeSeries.from_dataframe(
    prepared_b,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_b_lightGBM) * 0.8)
target_train_b_lightGBM, target_test_b_lightGBM = series_b_lightGBM[:split_index], series_b_lightGBM[split_index:]
future_cov_train_b_lightGBM, future_cov_test_b_lightGBM = future_covariates_b_lightGBM[:split_index], future_covariates_b_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_B,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
        future_covariates=future_covariates_b_lightGBM,
        start=len(target_train_b_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_b_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_B,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_b_lightGBM, future_covariates=future_cov_train_b_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_b_lightGBM.concatenate(target_test_b_lightGBM),
    future_covariates=future_covariates_b_lightGBM,
    start=len(target_train_b_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_b_lightGBM, final_predictions)
final_smape = smape(target_test_b_lightGBM, final_predictions)
final_rmse = rmse(target_test_b_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

#### (4C) Model C

In [None]:
series_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['Occ (%) Sold'],
    fill_missing_dates=True
)

future_covariates_c_lightGBM = TimeSeries.from_dataframe(
    prepared_c,
    time_col='Date',
    value_cols=['is_holiday', 'day_of_week', 'month', 'week_of_year'],
    fill_missing_dates=True
)

split_index = int(len(series_c_lightGBM) * 0.8)
target_train_c_lightGBM, target_test_c_lightGBM = series_c_lightGBM[:split_index], series_c_lightGBM[split_index:]
future_cov_train_c_lightGBM, future_cov_test_c_lightGBM = future_covariates_c_lightGBM[:split_index], future_covariates_c_lightGBM[split_index:]

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 450),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'num_leaves': trial.suggest_int('num_leaves', 7, 2047)
    }

    model = LightGBMModel(
        lags=lags_C,
        lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
        output_chunk_length=28,
        verbose=-1,
        random_state=42,
        **params
    )

    model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

    backtest_predictions = model.historical_forecasts(
        series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
        future_covariates=future_covariates_c_lightGBM,
        start=len(target_train_c_lightGBM),
        forecast_horizon=28,
        stride=28,
        retrain=False,
        verbose=False
    )

    return smape(target_test_c_lightGBM, backtest_predictions)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)

best_params = study.best_params
print(f"Best Parameters: {best_params}")

best_model = LightGBMModel(
    lags=lags_C,
    lags_future_covariates=[0, 1, 2, 3, 4, 5, 6],
    output_chunk_length=28,
    verbose=-1,
    random_state=42,
    **best_params
)

best_model.fit(target_train_c_lightGBM, future_covariates=future_cov_train_c_lightGBM)

final_predictions = best_model.historical_forecasts(
    series=target_train_c_lightGBM.concatenate(target_test_c_lightGBM),
    future_covariates=future_covariates_c_lightGBM,
    start=len(target_train_c_lightGBM),
    forecast_horizon=28,
    stride=28,
    retrain=False,
    verbose=True
)

final_mae = mae(target_test_c_lightGBM, final_predictions)
final_smape = smape(target_test_c_lightGBM, final_predictions)
final_rmse = rmse(target_test_c_lightGBM, final_predictions)

print(f"\nFinal MAE: {final_mae:.4f}")
print(f"Final SMAPE: {final_smape:.4f}%")
print(f"Final RMSE: {final_rmse:.4f}")

# Streamlit

Token Kent: 2njmUSZRKuXEy8GSPANSix1cCzf_v5xQATTAzUkpeRWNwSiU

https://colab.research.google.com/drive/15VwwLhZsWJtFCCpT6VGWE3zQdoYr8Z6K?usp=sharing