In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [None]:
# Splits 

X = df[['target']]
y = df.drop('target', axis=1)

Naive Bayes can be extended to handle multi-class classification problems. In this context, it calculates the probability of each class given the features and selects the class with the highest probability as the predicted class.

Logistic Regression can be extended to handle multi-class classification tasks. One common approach is the one-vs-rest (OvR) strategy, where separate binary classifiers are trained for each class. Each classifier is trained to distinguish between one class and the rest. Alternatively, the one-vs-one (OvO) strategy trains a binary classifier for each pair of classes.

SVM: SVM can be adapted to handle multi-class classification using either the one-vs-one (OvO) or one-vs-rest (OvR) strategy. In OvO, a binary classifier is trained for each pair of classes, and the class with the most votes is chosen. In OvR, separate binary classifiers are trained for each class, where each classifier distinguishes between one class and the rest.



In [None]:
# Initialize models

naive_bayes_model = GaussianNB()
logistic_model = LogisticRegression(max_iter=1000)
svm_model = LinearSVC(max_iter=10000)  # Increase max_iter for convergence
random_forest_model = RandomForestClassifier()
gradient_boosting_model = GradientBoostingClassifier()

In [1]:
# util function to run models 

def repeated_cross_validation_model(model, X, y, n_splits=5, n_repeats=3):
    """
    Perform repeated cross-validation for a given multi-class model and return accuracy for each class.

    Parameters:
    - model: The multi-class machine learning model to be evaluated.
    - X: The feature matrix.
    - y: The target vector.
    - n_splits: Number of folds in each repeated cross-validation.
    - n_repeats: Number of repetitions of cross-validation.

    Returns:
    - accuracy_per_class: Array of accuracy scores for each class.
    """

    # Initialize RepeatedStratifiedKFold
    rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=42)

    # Perform repeated cross-validation
    y_pred = cross_val_predict(model, X, y, cv=rskf)

    # Calculate accuracy for each class
    accuracy_per_class = accuracy_score(y, y_pred, normalize=False)

    return accuracy_per_class

In [None]:
# Run models
naive_bayes_model_results = repeated_cross_validation_model(model=naive_bayes_model, X=X, y=y, n_splits=5, n_repeats=3)
logistic_model_results = repeated_cross_validation_model(model=logistic_model, X=X, y=y, n_splits=5, n_repeats=3)
svm_model_results = repeated_cross_validation_model(model=svm_model, X=X, y=y, n_splits=5, n_repeats=3)
random_forest_model_results = repeated_cross_validation_model(model=random_forest_model, X=X, y=y, n_splits=5, n_repeats=3)
gradient_boosting_model_results = repeated_cross_validation_model(model=gradient_boosting_model, X=X, y=y, n_splits=5, n_repeats=3)

results = pd.concat([naive_bayes_model_results, logistic_model_results, svm_model_results, random_forest_model_results, gradient_boosting_model_results], axis=1)