In [8]:
%load_ext cuml.accel

UnsupportedCUDAError: A GPU with NVIDIA Volta™ (Compute Capability 7.0) or newer architecture is required.
Detected GPU 0: Tesla P100-PCIE-16GB                                                                                                                                                                                                                                            
Detected Compute Capability: 6.0

In [18]:
!pip install scikit-learn==1.5.2

Collecting scikit-learn==1.5.2
  Downloading scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m97.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
    Uninstalling scikit-learn-1.6.1:
      Successfully uninstalled scikit-learn-1.6.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.5.2


In [9]:
import os
os.environ["DEVICE"] = "cuda" 

In [1]:
!pip install xgboost optuna autogluon.timeseries[all] sktime[all_extras] tsai[all]

Collecting autogluon.timeseries[all]
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting sktime[all_extras]
  Downloading sktime-0.37.0-py3-none-any.whl.metadata (34 kB)
Collecting tsai[all]
  Downloading tsai-0.4.0-py3-none-any.whl.metadata (16 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries[all])
  Downloading lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries[all])
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries[all])
  Downloading gluonts-0.16.1-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries[all])
  Downloading statsforecast-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x8

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

# General-purpose
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch

# Tsai
from tsai.all import *

# Sktime
import sktime
from sktime.datasets import load_from_tsfile
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.transformations.panel.catch22 import Catch22
from sktime.classification.hybrid import HIVECOTEV2
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import DirectTabularRegressionForecaster

from xgboost import XGBRegressor
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from sktime.performance_metrics.forecasting import mean_absolute_error

from nltk.classify.scikitlearn import SklearnClassifier

# AutoGluon
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from autogluon.tabular import *
import autogluon

# Optuna
import optuna

# Settings
warnings.filterwarnings("ignore")
plt.style.use('seaborn-v0_8-whitegrid')
print(f"PyTorch version: {torch.__version__}")
print(f"tsai version: {tsai.__version__}")
print(f"sktime version: {sktime.__version__}")
print(f"optuna version: {optuna.__version__}")

# For reproducibility
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

PyTorch version: 2.5.1+cu124
tsai version: 0.4.0
sktime version: 0.37.0
optuna version: 4.2.1


In [3]:
X_train, y_train, X_valid, y_valid = get_UCR_data('JapaneseVowels', return_split=True)

# FIX: This is the single most important change.
# Replace all NaN values (from padding) with 0.0.
# This ensures all resources is numeric and all series have the same length.
X_train = np.nan_to_num(X_train, nan=0.0)
X_valid = np.nan_to_num(X_valid, nan=0.0)

# Reconstruct the full dataset for tsai from the now-cleaned arrays
X = np.concatenate((X_train, X_valid))
y = np.concatenate((y_train, y_valid))
train_indices = np.arange(len(X_train))
valid_indices = np.arange(len(X_train), len(X))
splits = (train_indices, valid_indices)

print("--- Data Loaded and Zero-Padded Successfully ---")
print(f"X_train shape after padding: {X_train.shape}")
print(f"Any NaNs left in X_train? {np.isnan(X_train).any()}")
print("-" * 30)

--- Data Loaded and Zero-Padded Successfully ---
X_train shape after padding: (270, 12, 29)
Any NaNs left in X_train? False
------------------------------


In [24]:
tfms = [None, [Categorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=64, batch_tfms=[TSStandardize()])

# FIX: Add the required `seq_len` argument to the MiniRocket constructor.
# `X.shape[2]` provides the number of timesteps (sequence length).
model_tsai = MiniRocket(c_in=X.shape[1], c_out=dsets.c, seq_len=X.shape[2])

learn = Learner(dls, model_tsai, metrics=accuracy)
learn.fit_one_cycle(20, 1e-3)

tsai_probas, tsai_targets, tsai_preds = learn.get_preds(dl=dls.valid, with_decoded=True)
tsai_accuracy = accuracy_score(tsai_targets, tsai_preds)
print(f"\n✅ tsai MiniRocket Accuracy: {tsai_accuracy:.4f}")
print("-" * 30)

epoch,train_loss,valid_loss,accuracy,time
0,1.943992,2.065574,0.632432,00:00
1,1.415399,1.67699,0.718919,00:00
2,0.982688,1.138633,0.794595,00:00
3,0.722558,0.7269,0.82973,00:00
4,0.556054,0.502268,0.851351,00:00
5,0.444135,0.3801,0.87027,00:00
6,0.364673,0.313263,0.891892,00:00
7,0.305431,0.270109,0.902703,00:00
8,0.259647,0.245642,0.913514,00:00
9,0.223436,0.231955,0.913514,00:00



✅ tsai MiniRocket Accuracy: 0.9351
------------------------------


*** SIGTERM received at time=1750596384 on cpu 3 ***
PC: @     0x7f878fbeee2e  (unknown)  epoll_wait
    @     0x7f878fb0b520  (unknown)  (unknown)
[2025-06-22 12:46:24,739 E 35 35] logging.cc:497: *** SIGTERM received at time=1750596384 on cpu 3 ***
[2025-06-22 12:46:24,739 E 35 35] logging.cc:497: PC: @     0x7f878fbeee2e  (unknown)  epoll_wait
[2025-06-22 12:46:24,740 E 35 35] logging.cc:497:     @     0x7f878fb0b520  (unknown)  (unknown)


Unable to join threads to shut down before fork(). This can break multithreading in child processes.

Unable to join threads to shut down before fork(). This can break multithreading in child processes.

Unable to join threads to shut down before fork(). This can break multithreading in child processes.

Unable to join threads to shut down before fork(). This can break multithreading in child processes.

Unable to join threads to shut down before fork(). This can break multithreading in child processes.

Unable to join threads to shut down before fork(). This can break multithreading in child processes.



In [5]:
print("--- Running sktime ---")

# FIX: Update the helper function to handle variable-length series by dropping NaNs.
# This creates pd.Series of different lengths, which is what the nested format is for.
def to_sktime_nested(X_np):
    df = pd.DataFrame()
    for i in range(X_np.shape[1]):
        # For each variable, create a list of Series, dropping NaNs from each.
        df[f'dim_{i}'] = [pd.Series(x_i).dropna() for x_i in X_np[:, i]]
    return df

# Convert the pre-split resources directly using the fixed function
X_train_sk_nested = to_sktime_nested(X_train)
X_valid_sk_nested = to_sktime_nested(X_valid)

# Now, RocketClassifier will receive clean resources with no NaNs.
rocket = RocketClassifier(num_kernels=10000, random_state=42)
rocket.fit(X_train_sk_nested, y_train)
y_pred_rocket = rocket.predict(X_valid_sk_nested)
sktime_rocket_accuracy = accuracy_score(y_valid, y_pred_rocket)
print(f"✅ sktime RocketClassifier Accuracy: {sktime_rocket_accuracy:.4f}")
print("-" * 30)

--- Running sktime ---
✅ sktime RocketClassifier Accuracy: 0.9351
------------------------------


In [21]:
print("--- Running AutoGluon ---")

# FIX: Prepare resources for TabularPredictor, not TimeSeriesPredictor.
# We flatten the time series resources from 3D (samples, vars, steps) to 2D (samples, features).
def flatten_for_tabular(X_np):
    n_samples, n_vars, n_timesteps = X_np.shape
    return X_np.reshape(n_samples, n_vars * n_timesteps)

X_train_flat = flatten_for_tabular(X_train)
X_valid_flat = flatten_for_tabular(X_valid)

# Create Pandas DataFrames, which is the required input for TabularPredictor
train_df = pd.DataFrame(X_train_flat)
train_df['label'] = y_train

valid_df = pd.DataFrame(X_valid_flat)
valid_df['label'] = y_valid

# FIX: Use TabularPredictor for this classification task.
predictor = TabularPredictor(
    label='label',
    path='./autogluon_models_jpvowels_tabular',
    eval_metric='accuracy', # 'accuracy' is a supported metric for TabularPredictor!
    
)

# Fit the model. AutoGluon will use the validation resources automatically for early stopping and ensembling.
predictor.fit(
    train_data=train_df,
    tuning_data=valid_df, # Provide the validation set here
    time_limit=180,
    presets='best',
    use_bag_holdout=True,
    ag_args_fit={'num_gpus': 1}
)

print("\nAutoGluon Leaderboard (evaluated on validation resources):")
print(predictor.leaderboard(valid_df))

autogluon_accuracy = predictor.evaluate(valid_df)['accuracy']
print(f"\n✅ AutoGluon Final Ensemble Accuracy: {autogluon_accuracy:.4f}")
print("-" * 30)

Preset alias specified: 'best' maps to 'best_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.11.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Nov 10 10:07:59 UTC 2024
CPU Count:          4
Memory Avail:       17.24 GB / 31.35 GB (55.0%)
Disk Space Avail:   19.43 GB / 19.52 GB (99.5%)
Presets specified: ['best']
Setting dynamic_stacking from 'auto' to False. Reason: Skip dynamic_stacking when use_bag_holdout is enabled. (use_bag_holdout=True)
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ... Time limit = 180s
AutoGluon will save models to "/kaggle/working/autogluon_models_jpvowels_tabular"
Train Data Rows:    270
Train Data Columns: 348
Tuning Data Rows:    370
Tuning Data Columns: 348
Label Column:       label
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	9 uni

--- Running AutoGluon ---


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 348 | ['0', '1', '2', '3', '4', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', []) : 348 | ['0', '1', '2', '3', '4', ...]
	0.4s = Fit runtime
	348 features in original data used to generate 348 features in processed data.
	Train Data (Processed) Memory Usage: 0.85 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.44s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
	To change this, specify the eval_metric parameter of Predictor()
use_bag_holdout=True, will use tuning_data as holdout (will not be used for early stopping).
Large model count detected (112 configs) ... Only displaying the first 3 models of each family. To see all, set `verbosity=3`.
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': [{}, {'activation': 'elu', 'd


AutoGluon Leaderboard (evaluated on validation data):
                    model  score_test  score_val eval_metric  pred_time_test  \
0     WeightedEnsemble_L2    0.935135   0.935135    accuracy        0.647439   
1  NeuralNetFastAI_BAG_L1    0.913514   0.913514    accuracy        0.459736   
2       LightGBMXT_BAG_L1    0.872973   0.872973    accuracy        0.178010   
3         LightGBM_BAG_L1    0.824324   0.824324    accuracy        0.211010   
4   KNeighborsUnif_BAG_L1    0.816216   0.816216    accuracy        0.007227   
5   KNeighborsDist_BAG_L1    0.808108   0.808108    accuracy        0.005798   

   pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  \
0       0.520449  115.482728                 0.002466                0.000808   
1       0.346745   55.250310                 0.459736                0.346745   
2       0.167103   60.165410                 0.178010                0.167103   
3       0.210690   68.586245                 0.211010       

In [22]:
print("--- Running Optuna with sktime ---")
# This section now uses the correctly prepared sktime resources
X_train_opt = X_train_sk_nested
y_train_opt = y_train
X_valid_opt = X_valid_sk_nested
y_valid_opt = y_valid

def objective(trial):
    num_kernels = trial.suggest_int("num_kernels", 5000, 20000, log=True)
    model = RocketClassifier(num_kernels=num_kernels, random_state=42)
    model.fit(X_train_opt, y_train_opt)
    accuracy = model.score(X_valid_opt, y_valid_opt)
    return accuracy

study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=20)

print("\nOptimization finished.")
print(f"✅ Optuna Best Trial Accuracy: {study.best_value:.4f}")
print(f"   Best params: {study.best_params}")
print("-" * 30)

[I 2025-06-22 11:39:16,854] A new study created in memory with name: no-name-5dfeae4a-62cc-416f-b306-53ff67bc564d


--- Running Optuna with sktime ---


[W 2025-06-22 11:39:21,824] Trial 0 failed with parameters: {'num_kernels': 8403} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_35/1952857915.py", line 11, in objective
    model.fit(X_train_opt, y_train_opt)
  File "/usr/local/lib/python3.11/dist-packages/sktime/classification/base.py", line 272, in fit
    self._fit(X, y)
  File "/usr/local/lib/python3.11/dist-packages/sktime/classification/_delegate.py", line 66, in _fit
    estimator.fit(X=X, y=y)
  File "/usr/local/lib/python3.11/dist-packages/sktime/classification/base.py", line 272, in fit
    self._fit(X, y)
  File "/usr/local/lib/python3.11/dist-packages/sktime/classification/compose/_pipeline.py", line 562, in _fit
    Xt = self.transformers_.fit_transform(X=X, y=y)
         ^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 

# Time Series

In [23]:
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "ETTh1.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "abiridir/etth1-dataset-csv",
  file_path,
)

print("First 5 records:", df.head())

First 5 records:    Id                 date         OT  Unnamed: 3
0   0  2016-07-01 00:00:00  30.531000         NaN
1   1  2016-07-01 01:00:00  27.787001         NaN
2   2  2016-07-01 02:00:00  27.787001         NaN
3   3  2016-07-01 03:00:00  25.044001         NaN
4   4  2016-07-01 04:00:00  21.948000         NaN


In [None]:
display(df.head())

# Define forecasting parameters
forecast_horizon = 96  # We want to predict the next 96 hours
target_col = 'OT'      # 'Oil Temperature' is our target

# Split resources into train and validation sets (chronologically)
# The last `forecast_horizon` points will be our validation set
train_df = df.iloc[:-forecast_horizon]
valid_df = df.iloc[-forecast_horizon:]

print(f"Full dataset shape: {df.shape}")
print(f"Training set shape:   {train_df.shape}")
print(f"Validation set shape: {valid_df.shape}")

# Visualize the split
plt.figure(figsize=(14, 7))
plt.plot(train_df.index, train_df[target_col], label='Training Data')
plt.plot(valid_df.index, valid_df[target_col], label='Validation Data (Ground Truth)', color='orange')
plt.title(f'ETTh1 Dataset - Forecasting "{target_col}"', fontsize=16)
plt.legend()
plt.show()

In [None]:
df['date'] = pd.to_datetime(df['date'])      # convert string → Timestamp
df = df.set_index('date')                    # now index is a DatetimeIndex

def create_date_features(df):
    df = df.copy()
    df['month']        = df.index.month
    df['day_of_week']  = df.index.dayofweek
    df['day_of_year']  = df.index.dayofyear
    df['hour']         = df.index.hour
    df['week_of_year'] = df.index.isocalendar().week.astype(int)
    return df

df_featured = create_date_features(df)

# FIX for tsai: Add a unique_id column for SlidingWindowPanel
df_featured['unique_id'] = 'ETTh1'

# Define forecasting parameters
forecast_horizon = 96
target_col = 'OT'

# Split the featured resources
train_df = df_featured.iloc[:-forecast_horizon]
valid_df = df_featured.iloc[-forecast_horizon:]

print("--- Data Loaded and Engineered for Forecasting ---")
display(df_featured.head())
print("-" * 30)

In [None]:

### 2. `tsai`: The Deep Learning Approach to Forecasting (FIXED)

print("--- Running tsai for Forecasting ---")
lookback_window = forecast_horizon * 2

# FIX: Correctly identify the target and feature columns
# 1. Get the list of ALL columns from the DataFrame first.
full_cols_list = list(df_featured.columns)
# 2. Get the integer index of the target column from this full list.
target_col_idx = full_cols_list.index(target_col)

# 3. Now, create the list of FEATURE columns for the `X` part of the resources.
#    This should not include the target or the ID column.
feat_cols = [col for col in full_cols_list if col not in [target_col, 'unique_id']]

# 4. Use the correct variables in SlidingWindowPanel
X, y = SlidingWindowPanel(
    window_len=lookback_window,
    get_y=forecast_horizon,
    # The lambda now correctly uses the integer index of the target column
    y_func=lambda o: o[:, -forecast_horizon:, target_col_idx],
    unique_id_cols=['unique_id'],
    # The feat_cols argument uses the list of feature names
)(df_featured)

# The rest of the tsai logic is now correct
tfms = [None, [TSRegression()]]
# Correctly define splits using the length of the new X array
splits = (list(range(len(X) - 1)), [len(X) - 1])
dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=128)

model_tsai_fc = InceptionTimePlus(c_in=X.shape[1], c_out=forecast_horizon)
learn = Learner(dls, model_tsai_fc, loss_func=MSELossFlat(), metrics=[mae])
learn.fit_one_cycle(10, 5e-4)

raw_preds, _, _ = learn.get_preds(dl=dls.valid)
prediction_tsai = raw_preds.numpy().flatten()
mae_tsai = np.mean(np.abs(valid_df[target_col].values - prediction_tsai))
print(f"\n✅ tsai InceptionTimePlus Forecasting MAE: {mae_tsai:.4f}")
print("-" * 30)

In [None]:
print("--- Running sktime for Forecasting ---")

# The resources is now purely numeric, so this will work.
y_train = train_df[target_col]
X_train = train_df.drop(columns=[target_col, 'unique_id'])

y_valid_true = valid_df[target_col]
X_valid = valid_df.drop(columns=[target_col, 'unique_id'])

fh = ForecastingHorizon(X_valid.index, is_relative=False)
regressor = XGBRegressor(random_state=42, n_estimators=100)
forecaster_sktime = DirectTabularRegressionForecaster(estimator=regressor)

forecaster_sktime.fit(y=y_train, X=X_train)
y_pred_sktime = forecaster_sktime.predict(fh=fh, X=X_valid)

mae_sktime = mean_absolute_error(y_true=y_valid_true, y_pred=y_pred_sktime)
print(f"\n✅ sktime XGBoost Forecasting MAE: {mae_sktime:.4f}")
# (Visualization code remains the same)
print("-" * 30)

In [None]:
print("\n--- Running AutoGluon for Forecasting ---")

# AutoGluon requires a specific TimeSeriesDataFrame format
# Since we have one main time series, we'll give it a static `item_id`
df_ag = df.reset_index().rename(columns={'index': 'timestamp'})
df_ag['item_id'] = 'ETTh1'

# Convert to TimeSeriesDataFrame
data_ag = TimeSeriesDataFrame.from_data_frame(df_ag)

# Split into train and validation resources
train_data_ag = data_ag.iloc[:-forecast_horizon]
# The validation resources for `predict` should not contain the answer
validation_data_known_covariates = data_ag.iloc[-forecast_horizon:]

# Instantiate the TimeSeriesPredictor
predictor_ag = TimeSeriesPredictor(
    prediction_length=forecast_horizon,
    path='./autogluon_models_etth1_forecast',
    target=target_col,
    eval_metric='MAE'
)

# Fit the predictor
predictor_ag.fit(
    train_data_ag,
    presets="medium_quality",
    time_limit=180
)

# Get predictions
predictions_ag = predictor_ag.predict(train_data_ag)

# Evaluate the results
print("\nAutoGluon Leaderboard (evaluated on internal validation split):")
print(predictor_ag.leaderboard())

# Calculate MAE on our held-out validation set
mae_autogluon = mean_absolute_error(y_true=valid_df[target_col], y_pred=predictions_ag['mean'])
print(f"\n✅ AutoGluon Forecasting MAE: {mae_autogluon:.4f}")

# Visualize the forecast
plt.figure(figsize=(14, 7))
plt.plot(train_df.index[-200:], train_df[target_col].iloc[-200:], label='Recent History')
plt.plot(valid_df.index, valid_df[target_col], label='Ground Truth', color='orange')
plt.plot(valid_df.index, predictions_ag['mean'], label='AutoGluon Forecast', color='purple', linestyle='--')
plt.title('AutoGluon Forecasting Result', fontsize=16)
plt.legend()
plt.show()

In [None]:
print("\n--- Running Optuna for Forecasting HPO ---")

def objective_forecasting(trial):
    # Suggest hyperparameters for XGBoost
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
    }
    
    # Create the forecaster with the tuned regressor
    regressor = XGBRegressor(random_state=42, **params)
    forecaster = DirectTabularRegressionForecaster(estimator=regressor)
    
    # Fit and predict
    forecaster.fit(y=y_train, X=X_train)
    y_pred = forecaster.predict(fh=fh, X=X_valid)
    
    # Return the metric to be minimized
    mae = mean_absolute_error(y_true=valid_df[target_col], y_pred=y_pred)
    return mae

# Create study and optimize
study_fc = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
study_fc.optimize(objective_forecasting, n_trials=20)

print("\nOptimization finished.")
print(f"✅ Optuna Best Trial MAE: {study_fc.best_value:.4f}")
print(f"   Best params: {study_fc.best_params}")