# 05 - Model Tuning & Comparison

**Objective**: Perform hyperparameter tuning on selected models to optimize performance.

**Approach**:
- Use RandomizedSearchCV for efficient hyperparameter search
- Focus on models with best baseline performance
- Optimize for F1-score (balanced metric for imbalanced data)

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)
sns.set()


In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from xgboost import XGBClassifier

# Load engineered data
data_path = "data/flight_data_2018_2024_engineered.csv"
df = pd.read_csv(data_path)
df.columns = df.columns.str.strip()

# Prepare features (same as training notebook)
target = "DELAYED"
cols_to_remove = ["DELAYED", "FlightDate", "Duplicate", "DivAirportLandings", "CRSArrTime", "ArrTimeBlk"]
cols_to_drop_final = [c for c in cols_to_remove if c in df.columns]

X = df.drop(columns=cols_to_drop_final)
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Identify column types
cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = X.select_dtypes(exclude="object").columns.tolist()
low_card_cats = [c for c in cat_cols if df[c].nunique() <= 20]

X_train_tune = X_train[num_cols + low_card_cats].copy()
X_test_tune = X_test[num_cols + low_card_cats].copy()

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), low_card_cats),
        ("num", "passthrough", num_cols),
    ]
)

print("Data prepared for tuning")
print(f"Train shape: {X_train_tune.shape}, Test shape: {X_test_tune.shape}")

# ======================
# XGBoost Hyperparameter Tuning
# ======================

print("\n" + "="*60)
print("XGBoost Hyperparameter Tuning")
print("="*60)

xgb_param_dist = {
    'model__n_estimators': [100, 200, 300],
    'model__max_depth': [5, 7, 10, 15],
    'model__learning_rate': [0.01, 0.1, 0.2],
    'model__subsample': [0.8, 0.9, 1.0],
    'model__colsample_bytree': [0.8, 0.9, 1.0],
}

xgb_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", XGBClassifier(random_state=42, n_jobs=-1, eval_metric='logloss'))
])

xgb_search = RandomizedSearchCV(
    xgb_pipeline, 
    param_distributions=xgb_param_dist,
    n_iter=20, 
    cv=3, 
    scoring='f1', 
    n_jobs=-1, 
    random_state=42,
    verbose=1
)

print("Starting XGBoost tuning (this may take a while)...")
xgb_search.fit(X_train_tune, y_train)

print(f"\nBest XGBoost parameters: {xgb_search.best_params_}")
print(f"Best XGBoost CV F1-score: {xgb_search.best_score_:.4f}")

# Evaluate on test set
y_pred_xgb = xgb_search.best_estimator_.predict(X_test_tune)
y_prob_xgb = xgb_search.best_estimator_.predict_proba(X_test_tune)[:, 1]

print(f"\nTest set performance:")
print(f"  Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(f"  F1-score: {f1_score(y_test, y_pred_xgb):.4f}")
print(f"  ROC-AUC: {roc_auc_score(y_test, y_prob_xgb):.4f}")

# Save tuned model
joblib.dump(xgb_search.best_estimator_, 'models/best_xgb_tuned_model.pkl')
print("\nTuned XGBoost model saved!")

Data prepared for tuning
Train shape: (465940, 32), Test shape: (116485, 32)

XGBoost Hyperparameter Tuning
Starting XGBoost tuning (this may take a while)...
Fitting 3 folds for each of 20 candidates, totalling 60 fits
