# ðŸ©º Diabetes Risk Prediction using Machine Learning

End-to-end ML pipeline using CatBoost, LightGBM, and XGBoost.

## 1. Install & Import Libraries

In [None]:
!pip install catboost lightgbm xgboost

In [None]:

import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier


## 2. Load Data

In [None]:

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

print(train.shape, test.shape)
train.head()


## 3. Feature & Target Separation

In [None]:

TARGET = 'diagnosed_diabetes'
ID_COL = 'id'

X = train.drop(columns=[TARGET, ID_COL])
y = train[TARGET]

X_test = test.drop(columns=[ID_COL])


## 4. Identify Categorical Features

In [None]:

cat_cols = X.select_dtypes(include='object').columns.tolist()
cat_features_idx = [X.columns.get_loc(col) for col in cat_cols]

cat_cols


## 5. Train CatBoost (Final Model)

In [None]:

cat_model = CatBoostClassifier(
    iterations=500,
    depth=6,
    learning_rate=0.05,
    eval_metric='AUC',
    random_seed=42,
    verbose=100
)

cat_model.fit(X, y, cat_features=cat_features_idx)


## 6. Train LightGBM

In [None]:

lgb_model = LGBMClassifier(
    n_estimators=500,
    learning_rate=0.05,
    random_state=42
)

lgb_model.fit(X, y)


## 7. Train XGBoost

In [None]:

xgb_model = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='auc',
    random_state=42
)

xgb_model.fit(X, y)


## 8. Ensemble Prediction

In [None]:

cat_preds = cat_model.predict_proba(X_test)[:,1]
lgb_preds = lgb_model.predict_proba(X_test)[:,1]
xgb_preds = xgb_model.predict_proba(X_test)[:,1]

final_preds = (cat_preds + lgb_preds + xgb_preds) / 3


## 9. Create Submission

In [None]:

submission = pd.DataFrame({
    'id': test['id'],
    'diagnosed_diabetes': final_preds
})

submission.to_csv('submission.csv', index=False)
submission.head()
