In [None]:
import numpy as np
from sklearn.metrics import (
    f1_score, precision_score, recall_score, roc_auc_score
)


val_probs = np.load("y_probs_val.npy") 
val_labels = np.load("y_true_val.npy")
test_probs = np.load("y_probs.npy")
test_labels = np.load("y_true.npy")

num_classes = 15
best_thresholds = np.zeros(num_classes)


thresholds = np.linspace(0, 1, 101)

for i in range(num_classes):
    best_f1 = 0
    for t in thresholds:
        preds = (val_probs[:, i] >= t).astype(int)
        f1 = f1_score(val_labels[:, i], preds, zero_division=0)
        if f1 > best_f1:
            best_f1 = f1
            best_thresholds[i] = t

print("Optimal thresholds per class:\n", best_thresholds)


test_preds = (test_probs >= best_thresholds).astype(int)


print("\n--- Test Set Metrics After Thresholding ---")
print("Exact Match Ratio  :", np.all(test_preds == test_labels, axis=1).mean())
print("Macro F1           :", f1_score(test_labels, test_preds, average='macro', zero_division=0))
print("Macro Precision    :", precision_score(test_labels, test_preds, average='macro', zero_division=0))
print("Macro Recall       :", recall_score(test_labels, test_preds, average='macro', zero_division=0))
print("AUC (macro average):", roc_auc_score(test_labels, test_probs, average='macro'))


for i in range(num_classes):
    f1 = f1_score(test_labels[:, i], test_preds[:, i], zero_division=0)
    print(f"Class {i:2d} | F1: {f1:.4f} | Threshold: {best_thresholds[i]:.2f}")
