In [7]:
# ==============================================================================
# PROJECT: Kaggle Playground Series - S6E2 (Heart Disease)
# MISSION: Hyperparameter Optimization (Corrected Indexing)
# ==============================================================================

!pip install optuna -q

import pandas as pd
import numpy as np
import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from google.colab import drive

# --- STEP 1: LOAD & ALIGN DATA ---
drive.mount('/content/drive')
train = pd.read_csv('/content/drive/MyDrive/Nihal Data/kaggle/S6E1 - heart/train.csv')

target_mapping = {'Absence': 0, 'Presence': 1}
y = train['Heart Disease'].map(target_mapping).reset_index(drop=True)
X = train.drop(['id', 'Heart Disease'], axis=1)

# Categorical Encoding
for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].astype('category').cat.codes

# THE FIX: Ensuring X has a clean 0 to N index for .iloc
X = X.reset_index(drop=True)

print(f" Alignment Check: X shape {X.shape}, y shape {y.shape}")

# --- STEP 2: THE OBJECTIVE FUNCTION ---
def objective(trial):
    param = {
        'n_estimators': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'tree_method': 'hist',
        'device': 'cuda',
        'eval_metric': 'auc',
        'random_state': 42
    }

    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    scores = []

    for train_idx, val_idx in skf.split(X, y):
        X_t, X_v = X.iloc[train_idx], X.iloc[val_idx]
        y_t, y_v = y.iloc[train_idx], y.iloc[val_idx]

        model = XGBClassifier(**param, early_stopping_rounds=50)

        # Updated fit syntax for new XGBoost
        model.fit(X_t, y_t, eval_set=[(X_v, y_v)], verbose=False)

        preds = model.predict_proba(X_v)[:, 1]
        scores.append(roc_auc_score(y_v, preds))

    return np.mean(scores)

# --- STEP 3: RUNNING THE SEARCH ---
print(" Launching the Optimizer. Hunting for the Top 100 parameters...")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20) # Reduced to 20 for faster initial results

print("\n Best Trial Found!")
print(f"  Value (AUC): {study.best_value:.5f}")
print(f"  Best Params: {study.best_params}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[I 2026-02-11 08:11:24,503] A new study created in memory with name: no-name-08527857-cc7f-477f-a5f2-dd07d5cd3ca8


 Alignment Check: X shape (630000, 13), y shape (630000,)
 Launching the Optimizer. Hunting for the Top 100 parameters...


[I 2026-02-11 08:11:36,948] Trial 0 finished with value: 0.9552933083349938 and parameters: {'learning_rate': 0.02753262621868414, 'max_depth': 4, 'subsample': 0.5160507040988652, 'colsample_bytree': 0.7415645082502119, 'min_child_weight': 6}. Best is trial 0 with value: 0.9552933083349938.
[I 2026-02-11 08:11:53,381] Trial 1 finished with value: 0.9549107989843763 and parameters: {'learning_rate': 0.010602894608873587, 'max_depth': 6, 'subsample': 0.6303585438923989, 'colsample_bytree': 0.7518791191704088, 'min_child_weight': 5}. Best is trial 0 with value: 0.9552933083349938.
[I 2026-02-11 08:12:04,710] Trial 2 finished with value: 0.9549954084673645 and parameters: {'learning_rate': 0.0453056107957274, 'max_depth': 9, 'subsample': 0.8910474675682108, 'colsample_bytree': 0.6569285404221153, 'min_child_weight': 9}. Best is trial 0 with value: 0.9552933083349938.
[I 2026-02-11 08:12:12,544] Trial 3 finished with value: 0.9553745344904828 and parameters: {'learning_rate': 0.075920501613


 Best Trial Found!
  Value (AUC): 0.95548
  Best Params: {'learning_rate': 0.0524336233119705, 'max_depth': 4, 'subsample': 0.8174286638784904, 'colsample_bytree': 0.5096944467506977, 'min_child_weight': 10}
