## Read and understand the data

In [171]:
import pandas as pd

data = pd.read_csv('dataset/data.csv')
data

Unnamed: 0,cap-shape=b,cap-shape=c,cap-shape=f,cap-shape=k,cap-shape=s,cap-shape=x,cap-surface=f,cap-surface=g,cap-surface=s,cap-surface=y,...,population=v,population=y,habitat=d,habitat=g,habitat=l,habitat=m,habitat=p,habitat=u,habitat=w,class
0,F,F,F,F,F,T,F,F,T,F,...,F,F,F,F,F,F,F,T,F,B
1,F,F,F,F,F,T,F,F,T,F,...,F,F,F,T,F,F,F,F,F,A
2,T,F,F,F,F,F,F,F,T,F,...,F,F,F,F,F,T,F,F,F,A
3,F,F,F,F,F,T,F,F,F,T,...,F,F,F,F,F,F,F,T,F,B
4,F,F,F,F,F,T,F,F,T,F,...,F,F,F,T,F,F,F,F,F,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,F,F,F,T,F,F,F,F,T,F,...,F,F,F,F,T,F,F,F,F,A
8120,F,F,F,F,F,T,F,F,T,F,...,T,F,F,F,T,F,F,F,F,A
8121,F,F,T,F,F,F,F,F,T,F,...,F,F,F,F,T,F,F,F,F,A
8122,F,F,F,T,F,F,F,F,F,T,...,T,F,F,F,T,F,F,F,F,B


## Sklearn naive_bayes does not support categorical values in X 
## Therefore, I'll use label_encoder which transform T to 1 and F to 0

In [172]:
from sklearn.preprocessing import LabelEncoder

X = data.drop(columns=['class'])
y = data['class']
label_encoder = LabelEncoder()
for column in X.columns:
    if X[column].dtype == 'object' and len(X[column].unique()) == 2:
        X[column] = label_encoder.fit_transform(X[column])

## Use Naive Bayes algorithm for binary classification. Use stratified 10-folds cross-validation to measure the performance of the algorithm.
## 10-folds means in each iteration of cross-validation, the model is trained on 9*(dataset_size/10) samples and rest will be using on test

## To measure performance metrics I will store TP, TN, FP, FN in each stratified k-fold iteration

In [141]:
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB


nb_classifier = GaussianNB()


# use n_splits = 10 for 10-folds
# Initialize stratified 10-fold cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Parameters for calculating macro-average metrics
tn_A, fp_A, fn_A, tp_A = 0,0,0,0
tn_B, fp_B, fn_B, tp_B = 0,0,0,0


# Initialize variables for accumulating macro-average metrics for class A
total_fold_accuracy_macro_A = 0
total_fold_precision_macro_A = 0
total_fold_recall_macro_A = 0
total_fold_tn_rate_macro_A = 0
total_fold_f1_macro_A = 0

# Initialize variables for accumulating macro-average metrics for class B
total_fold_accuracy_macro_B = 0
total_fold_precision_macro_B = 0
total_fold_recall_macro_B = 0
total_fold_tn_rate_macro_B = 0
total_fold_f1_macro_B = 0

# Parameters for calculating micro-average metrics

total_fold_accuracy_micro = 0
total_fold_precision_micro = 0
total_fold_recall_micro = 0
total_fold_tn_rate_micro = 0 
total_fold_f1_micro = 0


# Iterate over each fold
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train Naive Bayes classifier
    nb_classifier.fit(X_train, y_train)
    
    # Predict on test data
    y_pred = nb_classifier.predict(X_test)
    
    
    # tn means the model predicted the negative and the actual label was also negative
    # fp means the model predicted the positive and the actual label was negative
    # fn means the model predicted the negative and the actual label was positive
    # tn means the model predicted the negative and the actual label was also negative

    # our negative class is B
    # our positive class is A
    tn_A = sum((y_pred == 'B')&(y_test == 'B'))
    fp_A = sum((y_pred == 'A')&(y_test == 'B'))
    fn_A = sum((y_pred == 'B')&(y_test == 'A'))
    tp_A = sum((y_pred == 'A')&(y_test == 'A'))
    
    # our negative class is A
    # our positive class is B
    tn_B = sum((y_pred == 'A')&(y_test == 'A'))
    fp_B = sum((y_pred == 'B')&(y_test == 'A'))
    fn_B = sum((y_pred == 'A')&(y_test == 'B'))
    tp_B = sum((y_pred == 'B')&(y_test == 'B'))

    # Calculate each iteration's macro average metrics
    # First calculate for class A
    accuracy_A = (tp_A + tn_A) / (tp_A + tn_A + fp_A + fn_A)
    precision_A = tp_A / (tp_A + fp_A)
    recall_A = tp_A / (tp_A + fn_A)
    tn_rate_A = tn_A / (tn_A + fp_A)
    f1_A = 2 * (precision_A * recall_A) / (precision_A + recall_A)

    # Calculate for class B
    accuracy_B = (tp_B + tn_B) / (tp_B + tn_B + fp_B + fn_B)
    precision_B = tp_B / (tp_B + fp_B)
    recall_B = tp_B / (tp_B + fn_B) 
    tn_rate_B = tn_B / (tn_B + fp_B)
    f1_B = 2 * (precision_B * recall_B) / (precision_B + recall_B)

    # Accumulate macro-average metrics for each class
    total_fold_accuracy_macro_A += accuracy_A
    total_fold_precision_macro_A += precision_A
    total_fold_recall_macro_A += recall_A
    total_fold_tn_rate_macro_A += tn_rate_A
    total_fold_f1_macro_A += f1_A
    
    total_fold_accuracy_macro_B += accuracy_B
    total_fold_precision_macro_B += precision_B
    total_fold_recall_macro_B += recall_B
    total_fold_tn_rate_macro_B += tn_rate_B
    total_fold_f1_macro_B += f1_B

    # Micro-average metrics variables
    total_tp = tp_A + tp_B
    total_tn = tn_A + tn_B
    total_fp = fp_A + fp_B
    total_fn = fn_A + fn_B

    # Calculate micro-average metrics
    fold_accuracy_micro = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)
    fold_precision_micro = (total_tp / (total_tp + total_fp))
    fold_recall_micro = (total_tp / (total_tp+ total_fn))
    fold_tn_rate_micro = (total_tn / (total_tn + total_fp))
    fold_f1_micro = 2 * (fold_precision_micro * fold_recall_micro) / (fold_precision_micro + fold_recall_micro)

    total_fold_accuracy_micro += fold_accuracy_micro
    total_fold_precision_micro += fold_precision_micro
    total_fold_recall_micro += fold_recall_micro
    total_fold_tn_rate_micro += fold_tn_rate_micro
    total_fold_f1_micro += fold_f1_micro

# Calculate macro average metrics
# Calculate macro-average metrics by averaging across all folds
accuracy_macro_A = total_fold_accuracy_macro_A / 10
precision_macro_A = total_fold_precision_macro_A / 10
recall_macro_A = total_fold_recall_macro_A / 10
tn_rate_macro_A = total_fold_tn_rate_macro_A / 10
f1_macro_A = total_fold_f1_macro_A / 10

accuracy_macro_B = total_fold_accuracy_macro_B / 10
precision_macro_B = total_fold_precision_macro_B / 10
recall_macro_B = total_fold_recall_macro_B / 10
tn_rate_macro_B = total_fold_tn_rate_macro_B / 10
f1_macro_B = total_fold_f1_macro_B / 10

# Combine the macro-average metrics for class A and class B
accuracy_macro = (accuracy_macro_A + accuracy_macro_B) / 2
precision_macro = (precision_macro_A + precision_macro_B) / 2
recall_macro = (recall_macro_A + recall_macro_B) / 2
tn_rate_macro = (tn_rate_macro_A + tn_rate_macro_B) / 2
f1_macro = (f1_macro_A + f1_macro_B) / 2

# Micro-average metrics
accuracy_micro = total_fold_accuracy_micro / 10
recall_micro = total_fold_recall_micro / 10
tn_rate_micro = total_fold_tn_rate_micro / 10
precision_micro = total_fold_precision_micro / 10
f1_micro = total_fold_f1_micro / 10

print("Macro Averages:")
print("Accuracy: ",accuracy_macro)
print("TPrate (Recall): " ,recall_macro)
print("TNrate: ", tn_rate_macro)
print("Precision: ",precision_macro)
print("F-Score: " ,f1_macro)



print("\n\nMicro Averages:")
print("Accuracy: ",accuracy_micro)
print("Tprate (Recall): ",recall_micro)
print("Tnrate: ", tn_rate_micro)
print("Precision: ", precision_micro)
print("F-Score: ",f1_micro)

Macro Averages:
Accuracy:  0.9438706911699659
TPrate (Recall):  0.9457895952969642
TNrate:  0.9457895952969642
Precision:  0.947778909104917
F-Score:  0.9438461751393117


Micro Averages:
Accuracy:  0.9438706911699659
Tprate (Recall):  0.9438706911699659
Tnrate:  0.9438706911699659
Precision:  0.9438706911699659
F-Score:  0.9438706911699659


# Calculate evaluation metrics
    
    # accuracy = (tp + tn) / (tp + tn + fp + fn)
    # precision = tp / (tp + fp)
    # recall = tp / (tp + fn) 
    # tn_rate = tn / (tn + fp)
    # f1 = 2 * (precision * recall) / (precision + recall)

## Apply feature-selection methods

## Mutual-information

In [142]:
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import LabelEncoder
import pandas as pd

data = pd.read_csv('dataset/data.csv')

X = data.drop(columns=['class'])
y = data['class']
label_encoder = LabelEncoder()
for column in X.columns:
    if X[column].dtype == 'object' and len(X[column].unique()) == 2:
        X[column] = label_encoder.fit_transform(X[column])

# X_train: Feature matrix, y_train: Target vector
# Compute mutual information

mi_scores = mutual_info_classif(X, y, random_state=32)

feature_scores = dict(zip(X.columns, mi_scores))

sorted_features = sorted(feature_scores.items(), key = lambda x: x[1], reverse=True)

top_features = [feature[0] for feature in sorted_features[:90]]

## I will create a data frame with selected features 
## Then will run naive bayes with the dataset

In [143]:
X2 = X[top_features].copy()

In [144]:
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB


nb_classifier = GaussianNB()


# use n_splits = 10 for 10-folds
# Initialize stratified 10-fold cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Parameters for calculating macro-average metrics
tn_A, fp_A, fn_A, tp_A = 0,0,0,0
tn_B, fp_B, fn_B, tp_B = 0,0,0,0


# Initialize variables for accumulating macro-average metrics for class A
total_fold_accuracy_macro_A = 0
total_fold_precision_macro_A = 0
total_fold_recall_macro_A = 0
total_fold_tn_rate_macro_A = 0
total_fold_f1_macro_A = 0

# Initialize variables for accumulating macro-average metrics for class B
total_fold_accuracy_macro_B = 0
total_fold_precision_macro_B = 0
total_fold_recall_macro_B = 0
total_fold_tn_rate_macro_B = 0
total_fold_f1_macro_B = 0

# Parameters for calculating micro-average metrics

total_fold_accuracy_micro = 0
total_fold_precision_micro = 0
total_fold_recall_micro = 0
total_fold_tn_rate_micro = 0 
total_fold_f1_micro = 0


# Iterate over each fold
for train_index, test_index in skf.split(X2, y):
    X_train, X_test = X2.iloc[train_index], X2.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train Naive Bayes classifier
    nb_classifier.fit(X_train, y_train)
    
    # Predict on test data
    y_pred = nb_classifier.predict(X_test)
    
    
    # tn means the model predicted the negative and the actual label was also negative
    # fp means the model predicted the positive and the actual label was negative
    # fn means the model predicted the negative and the actual label was positive
    # tn means the model predicted the negative and the actual label was also negative

    # our negative class is B
    # our positive class is A
    tn_A = sum((y_pred == 'B')&(y_test == 'B'))
    fp_A = sum((y_pred == 'A')&(y_test == 'B'))
    fn_A = sum((y_pred == 'B')&(y_test == 'A'))
    tp_A = sum((y_pred == 'A')&(y_test == 'A'))
    
    # our negative class is A
    # our positive class is B
    tn_B = sum((y_pred == 'A')&(y_test == 'A'))
    fp_B = sum((y_pred == 'B')&(y_test == 'A'))
    fn_B = sum((y_pred == 'A')&(y_test == 'B'))
    tp_B = sum((y_pred == 'B')&(y_test == 'B'))

    # Calculate each iteration's macro average metrics
    # First calculate for class A
    accuracy_A = (tp_A + tn_A) / (tp_A + tn_A + fp_A + fn_A)
    precision_A = tp_A / (tp_A + fp_A)
    recall_A = tp_A / (tp_A + fn_A)
    tn_rate_A = tn_A / (tn_A + fp_A)
    f1_A = 2 * (precision_A * recall_A) / (precision_A + recall_A)

    # Calculate for class B
    accuracy_B = (tp_B + tn_B) / (tp_B + tn_B + fp_B + fn_B)
    precision_B = tp_B / (tp_B + fp_B)
    recall_B = tp_B / (tp_B + fn_B) 
    tn_rate_B = tn_B / (tn_B + fp_B)
    f1_B = 2 * (precision_B * recall_B) / (precision_B + recall_B)

    # Accumulate macro-average metrics for each class
    total_fold_accuracy_macro_A += accuracy_A
    total_fold_precision_macro_A += precision_A
    total_fold_recall_macro_A += recall_A
    total_fold_tn_rate_macro_A += tn_rate_A
    total_fold_f1_macro_A += f1_A
    
    total_fold_accuracy_macro_B += accuracy_B
    total_fold_precision_macro_B += precision_B
    total_fold_recall_macro_B += recall_B
    total_fold_tn_rate_macro_B += tn_rate_B
    total_fold_f1_macro_B += f1_B

    # Micro-average metrics variables
    total_tp = tp_A + tp_B
    total_tn = tn_A + tn_B
    total_fp = fp_A + fp_B
    total_fn = fn_A + fn_B

    # Calculate micro-average metrics
    fold_accuracy_micro = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)
    fold_precision_micro = (total_tp / (total_tp + total_fp))
    fold_recall_micro = (total_tp / (total_tp+ total_fn))
    fold_tn_rate_micro = (total_tn / (total_tn + total_fp))
    fold_f1_micro = 2 * (fold_precision_micro * fold_recall_micro) / (fold_precision_micro + fold_recall_micro)

    total_fold_accuracy_micro += fold_accuracy_micro
    total_fold_precision_micro += fold_precision_micro
    total_fold_recall_micro += fold_recall_micro
    total_fold_tn_rate_micro += fold_tn_rate_micro
    total_fold_f1_micro += fold_f1_micro

# Calculate macro average metrics
# Calculate macro-average metrics by averaging across all folds
accuracy_macro_A = total_fold_accuracy_macro_A / 10
precision_macro_A = total_fold_precision_macro_A / 10
recall_macro_A = total_fold_recall_macro_A / 10
tn_rate_macro_A = total_fold_tn_rate_macro_A / 10
f1_macro_A = total_fold_f1_macro_A / 10

accuracy_macro_B = total_fold_accuracy_macro_B / 10
precision_macro_B = total_fold_precision_macro_B / 10
recall_macro_B = total_fold_recall_macro_B / 10
tn_rate_macro_B = total_fold_tn_rate_macro_B / 10
f1_macro_B = total_fold_f1_macro_B / 10

# Combine the macro-average metrics for class A and class B
accuracy_macro = (accuracy_macro_A + accuracy_macro_B) / 2
precision_macro = (precision_macro_A + precision_macro_B) / 2
recall_macro = (recall_macro_A + recall_macro_B) / 2
tn_rate_macro = (tn_rate_macro_A + tn_rate_macro_B) / 2
f1_macro = (f1_macro_A + f1_macro_B) / 2

# Micro-average metrics
accuracy_micro = total_fold_accuracy_micro / 10
recall_micro = total_fold_recall_micro / 10
tn_rate_micro = total_fold_tn_rate_micro / 10
precision_micro = total_fold_precision_micro / 10
f1_micro = total_fold_f1_micro / 10

print("Macro Averages:")
print("Accuracy: ",accuracy_macro)
print("TPrate (Recall): " ,recall_macro)
print("TNrate: ", tn_rate_macro)
print("Precision: ",precision_macro)
print("F-Score: " ,f1_macro)



print("\n\nMicro Averages:")
print("Accuracy: ",accuracy_micro)
print("Tprate (Recall): ",recall_micro)
print("Tnrate: ", tn_rate_micro)
print("Precision: ", precision_micro)
print("F-Score: ",f1_micro)

Macro Averages:
Accuracy:  0.9927371712140767
TPrate (Recall):  0.9929263899611274
TNrate:  0.9929263899611274
Precision:  0.992606191219768
F-Score:  0.9927305753368805


Micro Averages:
Accuracy:  0.9927371712140767
Tprate (Recall):  0.9927371712140767
Tnrate:  0.9927371712140767
Precision:  0.9927371712140767
F-Score:  0.9927371712140767


##### Random state 32, k = 90, 0.9927
## Selected Features:

In [13]:
top_features

['odor=n',
 'odor=f',
 'stalk-surface-above-ring=k',
 'stalk-surface-below-ring=k',
 'gill-color=b',
 'gill-size',
 'spore-print-color=h',
 'ring-type=l',
 'ring-type=p',
 'stalk-surface-above-ring=s',
 'bruises?',
 'population=v',
 'stalk-surface-below-ring=s',
 'spore-print-color=n',
 'spore-print-color=k',
 'stalk-root=b',
 'gill-spacing=w',
 'gill-spacing=c',
 'spore-print-color=w',
 'habitat=p',
 'odor=y',
 'stalk-color-above-ring=g',
 'odor=s',
 'gill-color=n',
 'stalk-color-below-ring=g',
 'stalk-color-above-ring=b',
 'population=n',
 'odor=a',
 'ring-type=e',
 'population=a',
 'odor=l',
 'stalk-color-above-ring=n',
 'stalk-color-below-ring=b',
 'stalk-color-above-ring=p',
 'gill-color=u',
 'stalk-color-above-ring=w',
 'stalk-root=e',
 'stalk-color-below-ring=n',
 'stalk-root=c',
 'cap-shape=b',
 'odor=p',
 'stalk-surface-below-ring=y',
 'stalk-color-below-ring=w',
 'habitat=l',
 'stalk-color-below-ring=p',
 'stalk-color-above-ring=o',
 'ring-number=t',
 'odor=c',
 'gill-color=w

In [264]:
from sklearn.feature_selection import chi2

# Calculate chi-square scores and p-values for the entire dataset
chi2_scores, _ = chi2(X, y)

# Create a dictionary mapping feature names to their chi-square scores
feature_scores = dict(zip(X.columns, chi2_scores))

# Sort features based on their chi-square scores in descending order
sorted_features = sorted(feature_scores.items(), key=lambda x: x[1], reverse=True)

# Select the top k features with the highest chi-square scores
k = 120 # Number of features to select
top_features = [feature[0] for feature in sorted_features[:k]]

X3 = X[top_features].copy()


In [265]:
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB


nb_classifier = GaussianNB()


# use n_splits = 10 for 10-folds
# Initialize stratified 10-fold cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Parameters for calculating macro-average metrics
tn_A, fp_A, fn_A, tp_A = 0,0,0,0
tn_B, fp_B, fn_B, tp_B = 0,0,0,0


# Initialize variables for accumulating macro-average metrics for class A
total_fold_accuracy_macro_A = 0
total_fold_precision_macro_A = 0
total_fold_recall_macro_A = 0
total_fold_tn_rate_macro_A = 0
total_fold_f1_macro_A = 0

# Initialize variables for accumulating macro-average metrics for class B
total_fold_accuracy_macro_B = 0
total_fold_precision_macro_B = 0
total_fold_recall_macro_B = 0
total_fold_tn_rate_macro_B = 0
total_fold_f1_macro_B = 0

# Parameters for calculating micro-average metrics

total_fold_accuracy_micro = 0
total_fold_precision_micro = 0
total_fold_recall_micro = 0
total_fold_tn_rate_micro = 0 
total_fold_f1_micro = 0


# Iterate over each fold
for train_index, test_index in skf.split(X3, y):
    X_train, X_test = X3.iloc[train_index], X3.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train Naive Bayes classifier
    nb_classifier.fit(X_train, y_train)
    
    # Predict on test data
    y_pred = nb_classifier.predict(X_test)
    
    
    # tn means the model predicted the negative and the actual label was also negative
    # fp means the model predicted the positive and the actual label was negative
    # fn means the model predicted the negative and the actual label was positive
    # tn means the model predicted the negative and the actual label was also negative

    # our negative class is B
    # our positive class is A
    tn_A = sum((y_pred == 'B')&(y_test == 'B'))
    fp_A = sum((y_pred == 'A')&(y_test == 'B'))
    fn_A = sum((y_pred == 'B')&(y_test == 'A'))
    tp_A = sum((y_pred == 'A')&(y_test == 'A'))
    
    # our negative class is A
    # our positive class is B
    tn_B = sum((y_pred == 'A')&(y_test == 'A'))
    fp_B = sum((y_pred == 'B')&(y_test == 'A'))
    fn_B = sum((y_pred == 'A')&(y_test == 'B'))
    tp_B = sum((y_pred == 'B')&(y_test == 'B'))

    # Calculate each iteration's macro average metrics
    # First calculate for class A
    accuracy_A = (tp_A + tn_A) / (tp_A + tn_A + fp_A + fn_A)
    precision_A = tp_A / (tp_A + fp_A)
    recall_A = tp_A / (tp_A + fn_A)
    tn_rate_A = tn_A / (tn_A + fp_A)
    f1_A = 2 * (precision_A * recall_A) / (precision_A + recall_A)

    # Calculate for class B
    accuracy_B = (tp_B + tn_B) / (tp_B + tn_B + fp_B + fn_B)
    precision_B = tp_B / (tp_B + fp_B)
    recall_B = tp_B / (tp_B + fn_B) 
    tn_rate_B = tn_B / (tn_B + fp_B)
    f1_B = 2 * (precision_B * recall_B) / (precision_B + recall_B)

    # Accumulate macro-average metrics for each class
    total_fold_accuracy_macro_A += accuracy_A
    total_fold_precision_macro_A += precision_A
    total_fold_recall_macro_A += recall_A
    total_fold_tn_rate_macro_A += tn_rate_A
    total_fold_f1_macro_A += f1_A
    
    total_fold_accuracy_macro_B += accuracy_B
    total_fold_precision_macro_B += precision_B
    total_fold_recall_macro_B += recall_B
    total_fold_tn_rate_macro_B += tn_rate_B
    total_fold_f1_macro_B += f1_B

    # Micro-average metrics variables
    total_tp = tp_A + tp_B
    total_tn = tn_A + tn_B
    total_fp = fp_A + fp_B
    total_fn = fn_A + fn_B

    # Calculate micro-average metrics
    fold_accuracy_micro = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)
    fold_precision_micro = (total_tp / (total_tp + total_fp))
    fold_recall_micro = (total_tp / (total_tp+ total_fn))
    fold_tn_rate_micro = (total_tn / (total_tn + total_fp))
    fold_f1_micro = 2 * (fold_precision_micro * fold_recall_micro) / (fold_precision_micro + fold_recall_micro)

    total_fold_accuracy_micro += fold_accuracy_micro
    total_fold_precision_micro += fold_precision_micro
    total_fold_recall_micro += fold_recall_micro
    total_fold_tn_rate_micro += fold_tn_rate_micro
    total_fold_f1_micro += fold_f1_micro

# Calculate macro average metrics
# Calculate macro-average metrics by averaging across all folds
accuracy_macro_A = total_fold_accuracy_macro_A / 10
precision_macro_A = total_fold_precision_macro_A / 10
recall_macro_A = total_fold_recall_macro_A / 10
tn_rate_macro_A = total_fold_tn_rate_macro_A / 10
f1_macro_A = total_fold_f1_macro_A / 10

accuracy_macro_B = total_fold_accuracy_macro_B / 10
precision_macro_B = total_fold_precision_macro_B / 10
recall_macro_B = total_fold_recall_macro_B / 10
tn_rate_macro_B = total_fold_tn_rate_macro_B / 10
f1_macro_B = total_fold_f1_macro_B / 10

# Combine the macro-average metrics for class A and class B
accuracy_macro = (accuracy_macro_A + accuracy_macro_B) / 2
precision_macro = (precision_macro_A + precision_macro_B) / 2
recall_macro = (recall_macro_A + recall_macro_B) / 2
tn_rate_macro = (tn_rate_macro_A + tn_rate_macro_B) / 2
f1_macro = (f1_macro_A + f1_macro_B) / 2

# Micro-average metrics
accuracy_micro = total_fold_accuracy_micro / 10
recall_micro = total_fold_recall_micro / 10
tn_rate_micro = total_fold_tn_rate_micro / 10
precision_micro = total_fold_precision_micro / 10
f1_micro = total_fold_f1_micro / 10

print("Macro Averages:")
print("Accuracy: ",accuracy_macro)
print("TPrate (Recall): " ,recall_macro)
print("TNrate: ", tn_rate_macro)
print("Precision: ",precision_macro)
print("F-Score: " ,f1_macro)



print("\n\nMicro Averages:")
print("Accuracy: ",accuracy_micro)
print("Tprate (Recall): ",recall_micro)
print("Tnrate: ", tn_rate_micro)
print("Precision: ", precision_micro)
print("F-Score: ",f1_micro)

Macro Averages:
Accuracy:  0.9018968243869632
TPrate (Recall):  0.903012466269168
TNrate:  0.903012466269168
Precision:  0.9229343425773788
F-Score:  0.8971061950648327


Micro Averages:
Accuracy:  0.9018968243869632
Tprate (Recall):  0.9018968243869632
Tnrate:  0.9018968243869632
Precision:  0.9018968243869632
F-Score:  0.9018968243869632
