In [1]:
import numpy as np

# Confusion matrix (rows: predicted, columns: actual)
cm = np.array([
    [52, 3, 7, 2],
    [2, 28, 2, 0],
    [5, 2, 25, 12],
    [1, 1, 9, 40]
])

# Number of samples per class
samples = np.array([60, 34, 43, 54])

# Initialize lists to store metrics per class
precision_list = []
recall_list = []
f1_list = []

# Calculate precision, recall, and F1 score for each class
for i in range(len(samples)):
    tp = cm[i, i]
    fp = cm[:, i].sum() - tp
    fn = cm[i, :].sum() - tp

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

# Macro averages (unweighted mean)
macro_precision = np.mean(precision_list)
macro_recall = np.mean(recall_list)
macro_f1 = np.mean(f1_list)

# Weighted F1 score (weighted by the number of samples per class)
weighted_f1 = np.sum(f1_list * samples) / np.sum(samples)

# Net precision and net recall (micro average)
total_tp = np.trace(cm)
total_pred = cm.sum(axis=0).sum()
total_actual = cm.sum(axis=1).sum()

net_precision = total_tp / total_pred if total_pred > 0 else 0
net_recall = total_tp / total_actual if total_actual > 0 else 0

precision_list, recall_list, f1_list, macro_precision, macro_recall, macro_f1, weighted_f1, net_precision, net_recall


([np.float64(0.8666666666666667),
  np.float64(0.8235294117647058),
  np.float64(0.5813953488372093),
  np.float64(0.7407407407407407)],
 [np.float64(0.8125),
  np.float64(0.875),
  np.float64(0.5681818181818182),
  np.float64(0.7843137254901961)],
 [np.float64(0.8387096774193549),
  np.float64(0.8484848484848485),
  np.float64(0.5747126436781609),
  np.float64(0.7619047619047618)],
 np.float64(0.7530830420023307),
 np.float64(0.7599988859180036),
 np.float64(0.7559529828717815),
 np.float64(0.7593013943176136),
 np.float64(0.7591623036649214),
 np.float64(0.7591623036649214))

In [2]:
# Print the results in a readable format
print("Per-Class Metrics:")
print(f"{'Class':<6}{'Precision':<12}{'Recall':<10}{'F1 Score':<10}")
for i, (p, r, f1) in enumerate(zip(precision_list, recall_list, f1_list), 1):
    print(f"{i:<6}{p:<12.3f}{r:<10.3f}{f1:<10.3f}")

print("\nAggregate Metrics:")
print(f"Macro Precision : {macro_precision:.3f}")
print(f"Macro Recall    : {macro_recall:.3f}")
print(f"Macro F1 Score  : {macro_f1:.3f}")
print(f"Net Precision   : {net_precision:.3f} (Micro Average)")
print(f"Net Recall      : {net_recall:.3f} (Micro Average)")
print(f"Weighted F1 Score: {weighted_f1:.3f}")


Per-Class Metrics:
Class Precision   Recall    F1 Score  
1     0.867       0.812     0.839     
2     0.824       0.875     0.848     
3     0.581       0.568     0.575     
4     0.741       0.784     0.762     

Aggregate Metrics:
Macro Precision : 0.753
Macro Recall    : 0.760
Macro F1 Score  : 0.756
Net Precision   : 0.759 (Micro Average)
Net Recall      : 0.759 (Micro Average)
Weighted F1 Score: 0.759
