In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna

from ium_long_stay_patterns.src.helpers.create_numerical_dataset import create_numerical_dataset, merge_with_stats
from ium_long_stay_patterns.config import ProcessedCSV, SAVED_MODELS_DIR
from ium_long_stay_patterns.src.helpers.data_loaders import prepare_and_create_loaders
from ium_long_stay_patterns.modeling.train import Trainer
from models.binary import BinaryClassifier

import warnings
warnings.filterwarnings('ignore')

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [7]:
df_numeric = create_numerical_dataset(ProcessedCSV.LISTINGS.path, strategy=True)
df_final = merge_with_stats(df_numeric, with_ids=True)

X = df_final.drop(columns=['target', 'id', 'host_id', 'listing_id'])
y = df_final['target']

print(f"Dataset shape: {X.shape}")
print(f"Target distribution:\n{y.value_counts()}")

Dataset shape: (1368, 18)
Target distribution:
target
0    994
1    374
Name: count, dtype: int64


In [8]:
def objective(trial):
    """
    Objective function for Optuna to optimize.
    Returns validation AUC score.
    """
    n_layers = trial.suggest_int('n_layers', 1, 3)

    hidden_layers = []
    for i in range(n_layers):
        hidden_size = trial.suggest_int(f'n_units_l{i}', 16, 128, step=16)
        hidden_layers.append(hidden_size)

    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5, step=0.1)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    train_loader, val_loader, _, _ = prepare_and_create_loaders(
        X, y, batch_size=batch_size, random_state=42, save_test_data=False, verbose=False
    )

    data_iter = iter(train_loader)
    sample_batch, _ = next(data_iter)
    input_dim = sample_batch.shape[1]

    model = BinaryClassifier(
        input_dim=input_dim,
        hidden_layers=hidden_layers,
        dropout_rate=dropout_rate
    ).to(device)

    optimizer = optim.Adam(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    criterion = nn.BCELoss()

    trainer = Trainer(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        epochs=10,
        device=device,
        seed=42
    )

    for epoch in range(1, 51):
        trainer.model.train()
        epoch_loss = 0.0

        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        if epoch % 10 == 0:
            metrics = trainer._validate(val_loader)

            trial.report(metrics['auc'], epoch)

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

    final_metrics = trainer._validate(val_loader)

    return final_metrics['auc']

In [9]:
study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
    study_name='binary_classifier_tuning'
)

print("Starting hyperparameter optimization...")
study.optimize(objective, n_trials=50, timeout=3600)

print("\nOptimization complete!")

[I 2026-01-13 20:00:52,952] A new study created in memory with name: binary_classifier_tuning


Starting hyperparameter optimization...


[I 2026-01-13 20:01:05,702] Trial 0 finished with value: 0.8866876310272537 and parameters: {'n_layers': 3, 'n_units_l0': 112, 'n_units_l1': 32, 'n_units_l2': 80, 'dropout_rate': 0.30000000000000004, 'learning_rate': 0.0008970843137745766, 'batch_size': 32, 'weight_decay': 0.00041102629948232256}. Best is trial 0 with value: 0.8866876310272537.
[I 2026-01-13 20:01:07,855] Trial 1 finished with value: 0.8580712788259959 and parameters: {'n_layers': 3, 'n_units_l0': 96, 'n_units_l1': 64, 'n_units_l2': 80, 'dropout_rate': 0.30000000000000004, 'learning_rate': 0.0009895388972497754, 'batch_size': 128, 'weight_decay': 0.0004134944748836937}. Best is trial 0 with value: 0.8866876310272537.
[I 2026-01-13 20:01:10,865] Trial 2 finished with value: 0.7851153039832285 and parameters: {'n_layers': 1, 'n_units_l0': 112, 'dropout_rate': 0.4, 'learning_rate': 0.00021961140316981948, 'batch_size': 64, 'weight_decay': 1.5177644140769158e-05}. Best is trial 0 with value: 0.8866876310272537.
[I 2026-01-


Optimization complete!


In [10]:
# Best trial
print("Best trial:")
trial = study.best_trial

print(f"  Value (AUC): {trial.value:.4f}")
print("\n  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

Best trial:
  Value (AUC): 0.9377

  Params: 
    n_layers: 2
    n_units_l0: 16
    n_units_l1: 112
    dropout_rate: 0.0
    learning_rate: 0.005315184658015584
    batch_size: 32
    weight_decay: 3.694197024296665e-05


## Top 10 best trials

In [None]:
df = study.trials_dataframe()

df_sorted = df.sort_values("value", ascending=False)

top10 = df_sorted.head(10)

cols = ["number", "value"] + [c for c in df_sorted.columns if c.startswith("params_")]

display(top10[cols])

Unnamed: 0,number,value,params_batch_size,params_dropout_rate,params_learning_rate,params_n_layers,params_n_units_l0,params_n_units_l1,params_n_units_l2,params_weight_decay
17,17,0.937736,32,0.0,0.005315,2,16,112.0,,3.7e-05
21,21,0.935639,32,0.0,0.005944,2,48,112.0,,0.000816
10,10,0.933019,32,0.0,0.00491,2,16,128.0,,1e-06
28,28,0.928407,64,0.2,0.009645,2,80,112.0,,9.3e-05
20,20,0.926625,32,0.0,0.006055,2,48,112.0,,2.8e-05
31,31,0.924004,64,0.2,0.008963,2,80,112.0,,0.000126
12,12,0.92327,32,0.1,0.002862,2,32,128.0,,1e-06
35,35,0.920597,64,0.2,0.009718,3,96,112.0,48.0,4.7e-05
42,42,0.920021,32,0.0,0.005932,2,32,96.0,,4e-06
22,22,0.917191,32,0.0,0.008774,2,16,112.0,,0.000833


Params saved in *config.py*

# Train best model - in *classification_numeric_data.ipynb*