In [1]:
import pandas as pd
import numpy as np

In [2]:
#laod the iris dataset
data = pd.read_csv('HCV-Egy-Data.csv')
#X = data.drop(['Baselinehistological staging', 'RNA Base', 'RNA 4', 'RNA 12', 'RNA EOT', 'RNA EF'], axis=1)
X = data.drop(['Baselinehistological staging'], axis=1)
y = data['Baselinehistological staging']

In [3]:
from sklearn.model_selection import StratifiedKFold

In [4]:
#initialize k-fold cross-validation
k = 5
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

In [5]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

#initialize metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
total_conf_matrix = np.zeros((4,4), dtype=int) #zeros matrix for memory allocation for confusion matrix

#preform k-fold CV
for train_index, test_index in skf.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]


    model = AdaBoostClassifier(algorithm='SAMME', random_state=42) #algorithm 'SAMME' used for multi-class classification
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    #calculate metrics
    accuracies.append(accuracy_score(y_test, y_pred))
    precisions.append(precision_score(y_test, y_pred, average='macro'))#, zero_division=0)) --> in class notes but wont run
    recalls.append(recall_score(y_test, y_pred, average='macro'))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    #update the confusion matrix
    total_conf_matrix += confusion_matrix(y_test, y_pred)

In [6]:
#display average results
print(f"Average Accuracy: {np.mean(accuracies)}")
print(f"Average Precision: {np.mean(precisions)}")
print(f"Average Recall: {np.mean(recalls)}")
print(f"Average F1 Score: {np.mean(f1_scores)}")
print("Total Confusion Matrix (summed over folds):")
print(total_conf_matrix)

Average Accuracy: 0.24548736462093865
Average Precision: 0.2428468002610334
Average Recall: 0.2433150160763308
Average F1 Score: 0.23755582724137062
Total Confusion Matrix (summed over folds):
[[ 61  63 113  99]
 [ 60  66  84 122]
 [ 72  72  84 127]
 [ 65  69  99 129]]
