In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import brier_score_loss, log_loss, roc_auc_score


# some of our imports here.

In [None]:
df = pd.read_csv("data.csv") 
y = df['target']  
X = df.drop(columns=['target'])  

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

In [None]:

log_reg = LogisticRegression(solver='liblinear')
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)



ensemble_model = VotingClassifier(
    estimators=[('lr', log_reg), ('rf', random_forest), ('xgb', xgb)],
    voting='soft'  
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Train and evaluate models
models = {'Logistic Regression': log_reg, 'Random Forest': random_forest, 'XGBoost': xgb, 'Ensemble': ensemble_model}
results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    brier = brier_score_loss(y_test, y_pred_proba)
    logloss = log_loss(y_test, y_pred_proba)
    auc_roc = roc_auc_score(y_test, y_pred_proba)
    
    cv_brier = -np.mean(cross_val_score(model, X_train, y_train, cv=cv, scoring='neg_brier_score'))
    
    results[name] = {'Brier Score': brier, 'Log Loss': logloss, 'AUC-ROC': auc_roc, 'CV Brier Score': cv_brier}

results_df = pd.DataFrame(results).T
import ace_tools as tools

tools.display_dataframe_to_user(name="Model Evaluation Results", dataframe=results_df)
