In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

# Charger les données
data = pd.read_excel("dataset_new.xlsx")

# Séparer les caractéristiques (features) et la variable cible
X = data.drop('y', axis=1)
y = data['y']

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Calculer les poids de classe pour la pondération
class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)

# Créer les modèles avec les options spécifiques pour gérer le déséquilibre de classe
models = {
    'Logistic Regression': LogisticRegression(class_weight={0: class_weights[0], 1: class_weights[1]}),
    'Random Forest': RandomForestClassifier(class_weight={0: class_weights[0], 1: class_weights[1]}),
    'Support Vector Machine': SVC(probability=True),
    'Gradient Boosting': GradientBoostingClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'XGBoost': XGBClassifier(scale_pos_weight=class_weights[1]),  # Utilisation du paramètre scale_pos_weight
    'LightGBM': LGBMClassifier(class_weight={0: class_weights[0], 1: class_weights[1]}),  # Pondération de classe
    'CatBoost': CatBoostClassifier(verbose=0, class_weights=class_weights)  # Utilisation de la pondération de classe
}

# Définir les paramètres pour GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

# Définir la stratégie de cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Entraîner et évaluer les modèles avec GridSearchCV
for model_name, model in models.items():
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    
    y_pred = best_model.predict(X_test)
    
    # Accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{model_name} - Best params: {grid_search.best_params_}')
    print(f'{model_name} - Accuracy: {accuracy:.4f}')
    
    # Matrice de Confusion
    confusion_mat = confusion_matrix(y_test, y_pred)
    print(f'{model_name} - Confusion Matrix:\n{confusion_mat}')
    
    # Précision, Recall, F1-Score
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f'{model_name} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}')
    
    # AUC-ROC
    if hasattr(best_model, 'predict_proba'):
        y_pred_prob = best_model.predict_proba(X_test)[:, 1]
        auc_roc = roc_auc_score(y_test, y_pred_prob)
        print(f'{model_name} - AUC-ROC: {auc_roc:.4f}')
    
    print('\n')  # Ajout d'une ligne vide entre les résultats de différents modèles


TypeError: compute_class_weight() takes 1 positional argument but 3 were given