In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import xgboost as xgb
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score

df=pd.read_csv('data/processed_data.csv')
y=df['preeclampsia']
X=df.drop('preeclampsia', axis=1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

log_clf=LogisticRegression(max_iter=1000, random_state=42)
svm_clf=SVC(probability=True, kernel='rbf', random_state=42)
rf_clf=RandomForestClassifier(n_estimators=100, random_state=42)
xgb_clf=xgb.XGBClassifier(eval_metric='logloss', random_state=42)

ensemble_clf=VotingClassifier(
    estimators=[
        ('LogReg', log_clf),
        ('SVM', svm_clf),
        ('RF', rf_clf),
        ('XGB', xgb_clf)
    ],
    voting='soft'
)

ensemble_clf.fit(X_train, y_train)
y_pred_ensemble=ensemble_clf.predict(X_test)
y_proba_ensemble=ensemble_clf.predict_proba(X_test)[:, 1]

print("ensemble model")
print(classification_report(y_test, y_pred_ensemble))
print("AUC-ROC:", roc_auc_score(y_test, y_proba_ensemble))

ensemble model
              precision    recall  f1-score   support

           0       0.96      0.99      0.98       536
           1       0.67      0.41      0.51        34

    accuracy                           0.95       570
   macro avg       0.82      0.70      0.74       570
weighted avg       0.95      0.95      0.95       570

AUC-ROC: 0.9787093942054433
