In [1]:
from IPython.display import display
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

from ium_long_stay_patterns.src.helpers.create_numerical_dataset import create_numerical_dataset, merge_with_stats
from ium_long_stay_patterns.config import ProcessedCSV, SAVED_MODELS_DIR
from ium_long_stay_patterns.src.helpers.data_loaders import prepare_and_create_loaders
from ium_long_stay_patterns.modeling.train import Trainer
from models.binary import BinaryClassifier

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm
[32m2026-01-10 15:18:33.041[0m | [1mINFO    [0m | [36mium_long_stay_patterns.config[0m:[36m<module>[0m:[36m12[0m - [1mPROJ_ROOT path is: /home/mateusz/IUM/ium-long-stay-patterns[0m


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [3]:
df_numeric = create_numerical_dataset(ProcessedCSV.LISTINGS.path, strategy=True)
df_final = merge_with_stats(df_numeric, with_ids=True)

X = df_final.drop(columns=['target'])
y = df_final['target']

print(f"Dataset shape: {X.shape}")
print(f"Target distribution:\n{y.value_counts()}")

Dataset shape: (1368, 21)
Target distribution:
target
0    994
1    374
Name: count, dtype: int64


In [4]:
def objective(trial):
    """
    Objective function for Optuna to optimize.
    Returns validation AUC score.
    """
    # Hyperparams
    n_layers = trial.suggest_int('n_layers', 1, 3)

    hidden_layers = []
    for i in range(n_layers):
        hidden_size = trial.suggest_int(f'n_units_l{i}', 16, 128, step=16)
        hidden_layers.append(hidden_size)

    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5, step=0.1)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    train_loader, val_loader, _, _ = prepare_and_create_loaders(
        X, y, batch_size=batch_size, random_state=42, save_test_data=False, verbose=False
    )

    data_iter = iter(train_loader)
    sample_batch, _ = next(data_iter)
    input_dim = sample_batch.shape[1]

    model = BinaryClassifier(
        input_dim=input_dim,
        hidden_layers=hidden_layers,
        dropout_rate=dropout_rate
    ).to(device)

    optimizer = optim.Adam(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    criterion = nn.BCELoss()

    trainer = Trainer(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        epochs=10,
        device=device,
        seed=42
    )

    for epoch in range(1, 51):
        trainer.model.train()
        epoch_loss = 0.0

        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        if epoch % 10 == 0:
            metrics = trainer._validate(val_loader)

            trial.report(metrics['auc'], epoch)

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

    final_metrics = trainer._validate(val_loader)

    return final_metrics['auc']

In [5]:
# Create Optuna study
study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
    study_name='binary_classifier_tuning'
)

# Run optimization
print("Starting hyperparameter optimization...")
study.optimize(objective, n_trials=50, timeout=3600)  # 50 trials or 1 hour

print("\nOptimization complete!")

[I 2026-01-10 15:18:33,168] A new study created in memory with name: binary_classifier_tuning


Starting hyperparameter optimization...


[I 2026-01-10 15:18:35,149] Trial 0 finished with value: 0.7344863731656185 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'n_units_l1': 80, 'dropout_rate': 0.5, 'learning_rate': 0.00010552911814577977, 'batch_size': 64, 'weight_decay': 6.374508558638575e-05}. Best is trial 0 with value: 0.7344863731656185.
[I 2026-01-10 15:18:36,046] Trial 1 finished with value: 0.7827044025157233 and parameters: {'n_layers': 2, 'n_units_l0': 96, 'n_units_l1': 48, 'dropout_rate': 0.2, 'learning_rate': 0.00022023953242253532, 'batch_size': 128, 'weight_decay': 0.00017889933699058878}. Best is trial 1 with value: 0.7827044025157233.
[I 2026-01-10 15:18:38,105] Trial 2 finished with value: 0.9049266247379455 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'n_units_l1': 128, 'dropout_rate': 0.4, 'learning_rate': 0.0017773859407975044, 'batch_size': 32, 'weight_decay': 2.5478494884011557e-05}. Best is trial 2 with value: 0.9049266247379455.
[I 2026-01-10 15:18:39,067] Trial 3 finished with value: 0.78

KeyboardInterrupt: 

In [None]:
# Best trial
print("Best trial:")
trial = study.best_trial

print(f"  Value (AUC): {trial.value:.4f}")
print("\n  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

In [None]:
# Optimization history
fig = plot_optimization_history(study)
fig.show()

In [None]:
# Parameter importances
fig = plot_param_importances(study)
fig.show()

In [None]:
# Top 10 trials
df_trials = study.trials_dataframe()
df_trials_sorted = df_trials.sort_values('value', ascending=False).head(10)
display(df_trials_sorted[['number', 'value', 'params_n_layers', 'params_learning_rate',
                          'params_batch_size', 'params_dropout_rate']])

# Train best model

In [None]:
# Extract best hyperparameters
best_params = study.best_params

# Reconstruct hidden layers
n_layers = best_params['n_layers']
hidden_layers = [best_params[f'n_units_l{i}'] for i in range(n_layers)]

print(f"Training final model with architecture: {hidden_layers}")

# Create data loaders with best batch size
train_loader, val_loader, test_loader, fitted_scaler = prepare_and_create_loaders(
    X, y, batch_size=best_params['batch_size'], random_state=42
)

# Get input dimension
data_iter = iter(train_loader)
sample_batch, _ = next(data_iter)
input_dim = sample_batch.shape[1]

# Create final model
final_model = BinaryClassifier(
    input_dim=input_dim,
    hidden_layers=hidden_layers,
    dropout_rate=best_params['dropout_rate']
).to(device)

# Create optimizer and criterion
final_optimizer = optim.Adam(
    final_model.parameters(),
    lr=best_params['learning_rate'],
    weight_decay=best_params['weight_decay']
)
final_criterion = nn.BCELoss()

# Create trainer
final_trainer = Trainer(
    model=final_model,
    criterion=final_criterion,
    optimizer=final_optimizer,
    epochs=100,  # Full training
    device=device,
    seed=42
)

# Train
final_trainer.train(train_loader, val_loader)

In [None]:
final_trainer.save_model(SAVED_MODELS_DIR / "best_binary_model.pth")