In [1]:

from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.ensemble import (
    RandomForestClassifier,
    ExtraTreesClassifier,
    GradientBoostingClassifier
)

In [2]:


def run_classification_models(X, y, preprocessor, test_size=0.2, random_state=42):
    """
    Pipeline complet pour entraîner plusieurs modèles de classification
    et retourner leurs scores.
    """

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    # Liste des modèles à tester
    models = {
        "RandomForest": RandomForestClassifier(random_state=42),
        "ExtraTrees": ExtraTreesClassifier(random_state=42),
        "GradientBoosting": GradientBoostingClassifier(random_state=42),
        # "RLT_Classifier": RLTClassifier(...)   # À ajouter si tu implémentes RLT
    }

    results = {}

    for model_name, model in models.items():

        clf = Pipeline([
            ("preprocessing", preprocessor),
            ("classifier", model)
        ])

        # Fit
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        # Scores
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average="weighted")

        try:
            y_prob = clf.predict_proba(X_test)[:, 1]
            auc = roc_auc_score(y_test, y_prob)
        except:
            auc = "N/A"

        results[model_name] = {
            "Accuracy": round(acc, 4),
            "F1-score": round(f1, 4),
            "ROC-AUC": auc if auc == "N/A" else round(auc, 4)
        }

    return results

