# Hyperparameters Optimization for the 3rd Model
## *Feature-Enriched Non-Autoregressive LSTM*

## 1: Imports

In [None]:
# Custom functions
import utils
import NAR_models

# Libraries
import pandas as pd
import numpy as np
import copy
import optuna
from optuna.samplers import TPESampler

---

## 2: Hyperparameters Optimization

### 2.1: Data Preparation

In [None]:
# Load Data
x_train_new  = pd.read_csv('x_train_new.csv')
y_train_new = pd.read_csv('y_train_sncf.csv')

In [None]:
# Data preparation
df_per_station = utils.prepare_backtest_data(x_train_new, y_train_new, remove_covid=True)

# Exclude Recent stations
RECENT = ['QD6', 'P6E', 'BDC', 'W80', 'W14']
df_per_station = utils.filter_stations(df_per_station, RECENT)

# Split into train and test dataset
df_train = {}
df_test = {}
for station in df_per_station:
    df_train_station, df_test_station = utils.split_dataset(df_per_station[station])
    df_train[station] = df_train_station
    df_test[station] = df_test_station

# Verification
print("len(df_train):",len(df_train),":",df_train.keys())
print("len(df_test):",len(df_test),":",df_train.keys())

### 2.2: Sampling

Hyperparameter tuning is conducted on a subset of 40 stations to significantly reduce runtime, under the assumption that optimal hyperparameters generalize across stations.

In [None]:
# Create a sample of stations
sample_size = 50
seed = 365

# train
sample_train = utils.sample_stations(df_train, sample_size, seed)
print("sample_train: ", sample_train.keys())

# test
sample_test = {
    station: df_test[station].copy()
    for station in sample_train.keys()
}

print("sample_test: ", sample_test.keys())

In [None]:
# Keep true values for MAPE computation
sample_test_true = {
    station: df_test[station].copy()
    for station in sample_test.keys()
}

### 2.3: Hyperparameter Optimization

In [None]:
features = ['job','ferie','vacances','dow_sin','dow_cos','month_sin','month_cos']
activation = 'tanh'
epochs = 50
early_stop=True
keep_percentage = 0.25

def objective(trial):
    """
    Optuna objective function to minimize the average MAPE score 
    across the station sample.
    """
    # 1. Define the search space
    params = {
        "units": trial.suggest_int("units", 40, 80, step=4),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        "batch_size": trial.suggest_int("batch_size", 16, 56, step=10),
        "seq_len": trial.suggest_int("seq_len", 30, 120, step=10)
    }

    # 2. Create copies of the test data to prevent trials from overwriting 
    # the original sample_test dictionary
    trial_sample_test = copy.deepcopy(sample_test)

    try:
        # 3. Call backtest_lstm with suggested parameters
        # ignore the returned df and losses to save memory during optimization
        _, mape_results, _ = NAR_models.backtest_lstm( 
            sample_train, 
            trial_sample_test, 
            sample_test_true, 
            sample_size,
            seq_len=params["seq_len"],
            units=params["units"],
            activation='tanh',
            learning_rate=params["learning_rate"],
            batch_size=params["batch_size"],
            epochs=epochs, 
            keep_percentage = keep_percentage,
            early_stop=early_stop, 
            features=features
        )

        # 4. Handle failed trials within the backtest
        if not mape_results:
            return float('inf')

        # 5. Calculate the MEAN MAPE across all stations in the sample
        # This makes the hyperparameters generalize better across different stations
        all_mapes = [res['MAPE'] for res in mape_results]
        average_mape = np.mean(all_mapes)
        
        return average_mape

    except Exception as e:
        print(f"Trial failed with error: {e}")
        return float('inf')

In [None]:
# Execution Flag
run = 0

In [None]:
# Execution
if (run == 1):
    study = optuna.create_study(direction="minimize", sampler=TPESampler())
    study.optimize(lambda trial: objective(trial), n_trials=30)
else:
    print("run == 0, no execution")

### 2.4: Results

In [None]:
# Best params found: {'units': 64, 'learning_rate': 0.006172885794840763, 'batch_size': 16, 'seq_len': 100}
# Best average MAPE: 0.956

In [None]:
print("Best parameters:", study.best_params)
print("Best average MAPE:", study.best_value)