In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.metrics import precision_score, recall_score

In [None]:
run_name = "vitb_CT-RATE"
checkpoint_name = "training_549999"

test_name = "cross_validation_cls_logits"

results_path = os.path.join("results", run_name, checkpoint_name, test_name)

labels = os.listdir(results_path)

In [None]:
def get_pr_f1(logits, labels):
    probabilities = 1 / (1 + np.exp(-logits))
    thresholds = np.linspace(0, 1, 100)
    best_threshold = 0.5
    best_f1 = 0

    for threshold in thresholds:
        predictions = (probabilities >= threshold).astype(int)
        precision = precision_score(labels, predictions, zero_division=0)
        recall = recall_score(labels, predictions, zero_division=0)

        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
            if f1 > best_f1:
                best_f1 = f1
                best_threshold = threshold

    final_predictions = (probabilities >= best_threshold).astype(int)
    final_precision = precision_score(labels, final_predictions)
    final_recall = recall_score(labels, final_predictions)

    return {"precision": final_precision, "recall": final_recall, "f1": best_f1}

In [None]:
metrics = ["roc_auc", "pr_auc", "f1", "precision", "recall"]
cv_results = {l: {m: None for m in metrics} for l in labels}
test_results = {l: {m: None for m in metrics} for l in labels}
n_folds = 5

for label in labels:
    label_result_path = os.path.join(results_path, label)

    label_cv_results = {m: [] for m in metrics}

    for i in range(1, n_folds + 1):
        cv_fold_path = os.path.join(label_result_path, f"{i}.csv")
        df = pd.read_csv(cv_fold_path)
        final_row = df.iloc[-1].to_dict()

        for m in ["roc_auc", "pr_auc"]:
            label_cv_results[m].append(final_row[m])

        cv_fold_logits_path = os.path.join(label_result_path, f"logits_{i}.csv")
        df = pd.read_csv(cv_fold_logits_path)
        t_metrics = get_pr_f1(df["logits"], df["labels"])

        for m in ["precision", "recall", "f1"]:
            label_cv_results[m].append(t_metrics[m])

    test_path = os.path.join(label_result_path, "test.csv")
    df = pd.read_csv(cv_fold_path)
    final_row = df.iloc[-1].to_dict()

    for m in ["roc_auc", "pr_auc"]:
        test_results[label][m] = final_row[m]

    test_logits_path = os.path.join(label_result_path, f"logits_test.csv")
    df = pd.read_csv(test_logits_path)
    t_metrics = get_pr_f1(df["logits"], df["labels"])

    for m in ["precision", "recall", "f1"]:
        test_results[label][m] = t_metrics[m]

    for m in metrics:
        mean = float(np.mean(label_cv_results[m]))
        std = float(np.std(label_cv_results[m]))
        ci = std / (n_folds**0.5) * 1.96
        cv_results[label][m] = (mean, std, ci)

In [None]:
header = "| Label | " + " | ".join(metrics) + " |"
separator = "|-" + "-|-".join(["----"] * (len(metrics) + 1)) + "|"

print(header)
print(separator)

for label in labels:
    row = f"| {label} "
    for m in metrics:
        mean, std, ci = cv_results[label][m]
        row += f"| {mean:.4f} ± {ci:.4f} "
    row += "|"
    print(row)

In [None]:
header = "| Label | " + " | ".join(metrics) + " |"
separator = "|-" + "-|-".join(["----"] * (len(metrics) + 1)) + "|"

print(header)
print(separator)

for label in labels:
    row = f"| {label} "
    for m in metrics:
        mean = test_results[label][m]
        row += f"| {mean:.4f} "
    row += "|"
    print(row)