In [None]:
import pandas as pd
from sklearn.calibration import CalibratedClassifierCV
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier


from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,recall_score,precision_score,classification_report,roc_auc_score

In [None]:
df = pd.read_csv("model/cleaned_cardio_data.csv")

In [17]:
df.columns

Index(['age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo', 'cholesterol',
       'gluc', 'smoke', 'alco', 'active', 'cardio', 'Bmi', 'pulse_pressure',
       'age_bp_inter', 'gluc_bmi_inter', 'composite_risk', 'BMI_category'],
      dtype='object')

In [18]:
X=df[['gender','cholesterol', 'pulse_pressure',
       'age_bp_inter', 'gluc_bmi_inter',
       'BMI_category','composite_risk']]
y=df['cardio']

- train test split

In [19]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.25)

- categorical and numerical features for scaling and encoding

In [20]:
num_features =X.select_dtypes(include='number').columns.to_list()

cat_features = X.select_dtypes(include='object').columns.to_list()


- added a preprocessor

In [21]:
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_features),
    ("cat", OneHotEncoder(handle_unknown="ignore",drop='first'), cat_features)
])

# XGBOOST MODEL

In [22]:
xgb = XGBClassifier(
    max_depth=5, learning_rate=0.05, n_estimators=1200,
    subsample=0.8, colsample_bytree=0.8,
    eval_metric="auc", random_state=42, n_jobs=-1
)

In [23]:
Xgb_model=Pipeline([
    ("preprocess", preprocessor),
    ("model", xgb)
])

In [24]:
calibrated_Xgboost = CalibratedClassifierCV(
    estimator=Xgb_model,
    method="isotonic",
    cv=5
)

calibrated_Xgboost.fit(X_train, y_train)

In [25]:
y_pred_Xgb=calibrated_Xgboost.predict(X_test)
y_prob_Xgb=calibrated_Xgboost.predict_proba(X_test)[:,1]
print("Accuracy:", accuracy_score(y_test, y_pred_Xgb))
print("Recall:", recall_score(y_test, y_pred_Xgb))
print("Precision:", precision_score(y_test, y_pred_Xgb))
print("ROC AUC:", roc_auc_score(y_test, y_prob_Xgb))
print(classification_report(y_test, y_pred_Xgb ))

Accuracy: 0.9114569114569114
Recall: 0.8913881748071979
Precision: 0.8137283660897624
ROC AUC: 0.9729020548255912
              precision    recall  f1-score   support

           0       0.96      0.92      0.94      7877
           1       0.81      0.89      0.85      3112

    accuracy                           0.91     10989
   macro avg       0.88      0.91      0.89     10989
weighted avg       0.92      0.91      0.91     10989



# LightGBM MODEL

In [None]:
lgbm = LGBMClassifier(
    learning_rate=0.05, n_estimators=1200,
    random_state=42
)

In [None]:
LGBM_model=Pipeline([
    ("preprocess", preprocessor),
    ("model", lgbm)
])

In [None]:
calibrated_lgbm = CalibratedClassifierCV(
    estimator=LGBM_model,
    method="isotonic",
    cv=5
)
calibrated_lgbm.fit(X_train, y_train)

In [None]:
y_pred_lgbm=calibrated_lgbm.predict(X_test)
y_prob_lgbm=calibrated_lgbm.predict_proba(X_test)[:,1]
print("Accuracy:", accuracy_score(y_test, y_pred_lgbm))
print("Recall:", recall_score(y_test, y_pred_lgbm))
print("Precision:", precision_score(y_test, y_pred_lgbm))
print("ROC AUC:", roc_auc_score(y_test, y_prob_lgbm))
print(classification_report(y_test, y_pred_lgbm ))

# Neural Network

In [None]:
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed  = preprocessor.transform(X_test)

In [None]:
def create_mlp(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [None]:
input_dim = X_train_processed.shape[1]

mlp_model = create_mlp(input_dim)

In [None]:
history = mlp_model.fit(
    X_train_processed,
    y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

In [None]:
y_pred_prob = mlp_model.predict(X_test_processed)
y_pred = (y_pred_prob > 0.5).astype(int)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred_prob))

print(classification_report(y_test, y_pred))