In [9]:
# ==============================================================================
# PROJECT: Kaggle Playground Series - S6E2 (Heart Disease)
# MISSION: The Master Blend (Optimized XGBoost + LightGBM)
# ==============================================================================

import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from google.colab import drive

# --- STEP 1: MOUNT & LOAD ---
drive.mount('/content/drive')
TRAIN_PATH = '/content/drive/MyDrive/Nihal Data/kaggle/S6E1 - heart/train.csv'
TEST_PATH = '/content/drive/MyDrive/Nihal Data/kaggle/S6E1 - heart/test.csv'

train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

# --- STEP 2: PREPARATION ---
target_mapping = {'Absence': 0, 'Presence': 1}
y = train['Heart Disease'].map(target_mapping)
X = train.drop(['id', 'Heart Disease'], axis=1)
X_test = test.drop(['id'], axis=1)

for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].astype('category').cat.codes
    X_test[col] = X_test[col].astype('category').cat.codes

# --- STEP 3: THE EXPERT COUNCIL ---

# Your winning XGBoost parameters from Trial 4
xgb_params = {
    'n_estimators': 3000,
    'learning_rate': 0.0524336233119705,
    'max_depth': 4,
    'subsample': 0.8174286638784904,
    'colsample_bytree': 0.5096944467506977,
    'min_child_weight': 10,
    'tree_method': 'hist',
    'device': 'cuda',
    'random_state': 42
}

# A robust LightGBM configuration
lgb_params = {
    'n_estimators': 3000,
    'learning_rate': 0.03,
    'num_leaves': 31,
    'device': 'gpu',
    'random_state': 42,
    'verbosity': -1
}

# --- STEP 4: TRAINING THE COUNCIL ---
print("Training Optimized XGBoost...")
xgb_model = XGBClassifier(**xgb_params)
xgb_model.fit(X, y)
xgb_preds = xgb_model.predict_proba(X_test)[:, 1]

print("Training Optimized LightGBM...")
lgb_model = LGBMClassifier(**lgb_params)
lgb_model.fit(X, y)
lgb_preds = lgb_model.predict_proba(X_test)[:, 1]

# --- STEP 5: THE WEIGHTED BLEND ---
# I am giving XGBoost more weight because we know it's a proven winner.
final_preds = (0.7 * xgb_preds) + (0.3 * lgb_preds)

# --- STEP 6: FINAL SUBMISSION ---
submission = pd.DataFrame({'id': test['id'], 'Heart Disease': final_preds})
submission.to_csv('submission_v6_master_blend.csv', index=False)

print("Final Master Blend 'submission_v6_master_blend.csv' generated.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Training Optimized XGBoost...
Training Optimized LightGBM...
Final Master Blend 'submission_v6_master_blend.csv' generated.
