In [None]:
# ==============================================================================
# PROJECT: Kaggle Playground Series - S6E2 (Heart Disease)
# MISSION: Optuna Tuning for LightGBM (The Second Titan)
# ==============================================================================

!pip install optuna -q

import pandas as pd
import numpy as np
import optuna
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from google.colab import drive

# --- STEP 1: MOUNT & LOAD ---
drive.mount('/content/drive')
TRAIN_PATH = '/content/drive/MyDrive/Nihal Data/kaggle/S6E1 - heart/train.csv'
train = pd.read_csv(TRAIN_PATH)

target_mapping = {'Absence': 0, 'Presence': 1}
y = train['Heart Disease'].map(target_mapping)
X = train.drop(['id', 'Heart Disease'], axis=1)

for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].astype('category').cat.codes

# --- STEP 2: OPTUNA OBJECTIVE ---
def objective(trial):
    param = {
        'n_estimators': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'device': 'gpu',  # Use GPU
        'metric': 'auc',
        'verbosity': -1,
        'random_state': 42
    }

    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    scores = []

    for train_idx, val_idx in skf.split(X, y):
        X_t, X_v = X.iloc[train_idx], X.iloc[val_idx]
        y_t, y_v = y.iloc[train_idx], y.iloc[val_idx]

        model = LGBMClassifier(**param)
        model.fit(X_t, y_t, eval_set=[(X_v, y_v)])

        preds = model.predict_proba(X_v)[:, 1]
        scores.append(roc_auc_score(y_v, preds))

    return np.mean(scores)

# --- STEP 3: RUN OPTIMIZATION ---
print("üöÄ Tuning LightGBM... Finding the perfect settings.")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

print("\n‚≠ê Best LightGBM Params:")
print(study.best_params)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[I 2026-02-11 09:39:52,619] A new study created in memory with name: no-name-3a947ea1-4301-4f30-8396-70e57ef5f723


üöÄ Tuning LightGBM... Finding the perfect settings.


[I 2026-02-11 09:45:29,060] Trial 0 finished with value: 0.9549776323349567 and parameters: {'learning_rate': 0.011729572519326855, 'num_leaves': 47, 'max_depth': 10, 'min_child_samples': 88, 'subsample': 0.5608508056825651, 'colsample_bytree': 0.7795555591890273, 'reg_alpha': 0.00039745089110748916, 'reg_lambda': 0.0034644209193370507}. Best is trial 0 with value: 0.9549776323349567.
[I 2026-02-11 09:51:47,034] Trial 1 finished with value: 0.9548142213773585 and parameters: {'learning_rate': 0.010849239047989543, 'num_leaves': 98, 'max_depth': 8, 'min_child_samples': 24, 'subsample': 0.6695505785567991, 'colsample_bytree': 0.8454420659760947, 'reg_alpha': 9.501218878036874e-07, 'reg_lambda': 9.82909653755036e-07}. Best is trial 0 with value: 0.9549776323349567.
[I 2026-02-11 09:56:50,561] Trial 2 finished with value: 0.9550794430282221 and parameters: {'learning_rate': 0.01720686333702, 'num_leaves': 44, 'max_depth': 9, 'min_child_samples': 30, 'subsample': 0.5970122130203415, 'colsam


‚≠ê Best LightGBM Params:
{'learning_rate': 0.06348126876706298, 'num_leaves': 20, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.996806358768483, 'colsample_bytree': 0.6200893337791418, 'reg_alpha': 0.03100193186233627, 'reg_lambda': 5.554526533061371}
