# 4.5 Tune Boosted Model Hyperparameters - Code Brief

Condensed reference for hyperparameter tuning strategies.

## Setup

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold, cross_val_score
from sklearn.metrics import roc_auc_score
from scipy.stats import uniform, randint, loguniform
from xgboost import XGBClassifier
import optuna
from optuna.samplers import TPESampler

## Grid Search

In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

grid_search = GridSearchCV(
    estimator=XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False),
    param_grid=param_grid,
    scoring='roc_auc',
    cv=3,
    n_jobs=-1,
    return_train_score=True
)

grid_search.fit(X_train, y_train)

print(f"Best params: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.4f}")

## Randomized Search

In [None]:
param_distributions = {
    'n_estimators': randint(50, 500),
    'max_depth': randint(3, 12),
    'learning_rate': loguniform(0.01, 0.3),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
    'min_child_weight': randint(1, 10),
    'gamma': uniform(0, 0.5),
    'reg_alpha': loguniform(1e-5, 1),
    'reg_lambda': loguniform(1e-5, 1)
}

random_search = RandomizedSearchCV(
    estimator=XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False),
    param_distributions=param_distributions,
    n_iter=50,
    scoring='roc_auc',
    cv=3,
    n_jobs=-1,
    random_state=42
)

random_search.fit(X_train, y_train)

print(f"Best params: {random_search.best_params_}")
print(f"Best score: {random_search.best_score_:.4f}")

## Bayesian Optimization with Optuna

In [None]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 1, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 1, log=True),
    }
    
    model = XGBClassifier(**params, random_state=42, eval_metric='logloss', use_label_encoder=False)
    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='roc_auc')
    
    return scores.mean()

study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
optuna.logging.set_verbosity(optuna.logging.WARNING)
study.optimize(objective, n_trials=50)

print(f"Best params: {study.best_params}")
print(f"Best score: {study.best_value:.4f}")

## Optuna Analysis

In [None]:
# Parameter importance
importance = optuna.importance.get_param_importances(study)

# Trials dataframe
trials_df = study.trials_dataframe()

# Train final model with best params
final_model = XGBClassifier(**study.best_params, random_state=42, eval_metric='logloss', use_label_encoder=False)
final_model.fit(X_train, y_train)

## Key Hyperparameters

| Parameter | Description | Range |
|:----------|:------------|:------|
| n_estimators | Number of trees | 100-1000 |
| max_depth | Tree depth | 3-10 |
| learning_rate | Step size | 0.01-0.3 |
| subsample | Row sampling | 0.5-1.0 |
| colsample_bytree | Column sampling | 0.5-1.0 |
| reg_alpha | L1 regularization | 0-1 |
| reg_lambda | L2 regularization | 0-1 |

## Tuning Strategy Comparison

| Method | Pros | Best For |
|:-------|:-----|:---------|
| Grid Search | Exhaustive, reproducible | Small parameter spaces |
| Random Search | Faster, continuous params | Medium searches |
| Optuna | Intelligent, most efficient | Large searches, production |