In [1]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc, 
    precision_recall_curve, classification_report, cohen_kappa_score, log_loss
)

X = data.drop([], axis = 1)
y = data[]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize classifiers
classifiers = {
    'Gaussian Naive Bayes': GaussianNB(),
    'Multinomial Naive Bayes': MultinomialNB(),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': SVC(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'XGBoost': xgb.XGBClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42)
}

accuracy  = []
for name, classifier in classifiers.items():
    print("Classification Report of ", classifier)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    #Accuracy
    current_accuracy = accuracy_score(y_test, y_pred)
    accuracy.append(current_accuracy)
    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print(f"Confusion Matrix:\n{conf_matrix}")
    
    # Precision, Recall, F1-Score (for multiclass)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
    
    # Classification Report
    print(f"Classification Report:\n{classification_report(y_test, y_pred)}")
    
    # Cohen's Kappa
    kappa = cohen_kappa_score(y_test, y_pred)
    print(f"Cohen's Kappa: {kappa:.2f}")
    
    # For multiclass, we need to handle probabilities differently if available.
    if hasattr(classifier, "predict_proba"):
        # Predict probabilities
        y_proba = classifier.predict_proba(X_test)
        
        # Log Loss (for multiclass)
        loss = log_loss(y_test, y_proba)
        print(f"Log Loss: {loss:.2f}")
        
        # ROC Curve and AUC for multiclass
        fpr = {}
        tpr = {}
        roc_auc = {}
        n_classes = y_proba.shape[1]
        
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test == i, y_proba[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Plot ROC curves
        plt.figure()
        colors = ['aqua', 'darkorange', 'cornflowerblue']
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=2, 
                     label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})')
        
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic for Multiclass')
        plt.legend(loc="lower right")
        plt.show()
    
        # Precision-Recall Curve for multiclass
        precision_vals = {}
        recall_vals = {}
        for i in range(n_classes):
            precision_vals[i], recall_vals[i], _ = precision_recall_curve(y_test == i, y_proba[:, i])
        
        plt.figure()
        for i, color in zip(range(n_classes), colors):
            plt.plot(recall_vals[i], precision_vals[i], color=color, lw=2,
                     label=f'Precision-Recall curve of class {i}')
        
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve for Multiclass')
        plt.legend(loc="lower left")
        plt.show()

print('Comparison of Accuracy')
performance_df = pd.DataFrame({'Model' : classifiers.keys(), 'Accuracy' : accuracy})
print(performance_df)
g = sns.catplot(x = 'Model', y = 'Accuracy', data = performance_df, kind = 'bar', height = 5)
g.set_xticklabels(rotation=90)

## Lazy predict

In [None]:
from lazypredict.Supervised import LazyClassifier
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train_scaled, X_test_scaled, y_train, y_test)
df = pd.DataFrame(models)
print(df.to_markdown())