In [None]:
%cd ..
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from sklearn.metrics import roc_auc_score
import seaborn as sns
from tqdm import tqdm

from typing import List, Dict, Tuple

In [None]:
load_dotenv()
project_path = os.getenv("PROJECTPATH")
data_path = os.getenv("DATAPATH")

In [None]:
def get_logits(
    result_path: str, valid_name: int, epoch: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Folds and epochs are 0-indexed.
    """
    epoch_predictions_path = os.path.join(
        result_path, valid_name, "predictions", f"epoch_{epoch:02d}.csv"
    )
    epoch_predictions = pd.read_csv(epoch_predictions_path)
    logits = epoch_predictions["logits"].values
    labels = epoch_predictions["labels"].values

    return logits, labels

In [None]:
def get_roc_auc_se(auc, labels):
    n1 = np.sum(labels)
    n2 = len(labels) - n1
    q1 = auc / (2 - auc)
    q2 = 2 * auc**2 / (1 + auc)
    se = np.sqrt(
        (auc * (1 - auc) + (n1 - 1) * (q1 - auc**2) + (n2 - 1) * (q2 - auc**2))
        / (n1 * n2)
    )
    return se

In [None]:
def get_cv_results(
    results_path: str, n_folds: int = 5, n_epochs: int = 10
) -> Tuple[Dict[str, List[float]], Dict[str, List[Tuple[float, float]]]]:
    epochs_performance = {ep: [] for ep in range(n_epochs)}

    for ep in range(n_epochs):
        logits, labels = zip(
            *[get_logits(results_path, f"fold_{i}", ep) for i in range(n_folds)]
        )

        for i in range(n_folds):
            roc_auc = roc_auc_score(labels[i], logits[i])
            epochs_performance[ep].append(roc_auc)

    confidence_intervals = [(np.mean(perf), np.std(perf) * ( (1/5 + 1/4)**0.5 )) for perf in epochs_performance.values()]

    return confidence_intervals

In [None]:
def get_test_results(
    results_path: str, n_epochs: int = 10
) -> Tuple[Dict[str, List[float]], Dict[str, List[Tuple[float, float]]]]:

    epochs_performance = []

    for ep in range(n_epochs):
        logits, labels = get_logits(results_path, "test", ep)

        roc_auc = roc_auc_score(labels, logits)
        roc_auc_se = get_roc_auc_se(roc_auc, labels)
        
        epochs_performance.append((roc_auc, roc_auc_se))

    return epochs_performance

In [None]:
def get_best_epoch(epochs_performance: List[Tuple[float, float]]) -> int:
    epoch_mean_selection = [x[0] for x in epochs_performance]

    best_epoch = np.argmax(epoch_mean_selection)

    return int(best_epoch)

In [None]:
def process_experiment(experiment_name, all_labels, epochs_train=10, epochs_test=10):
    cv_all = {}
    test_all = {}
    
    for label in tqdm(labels):
    
        results_path = os.path.join(project_path, "runs", experiment_name, "results", label)
    
        cv = get_cv_results(results_path, n_epochs=epochs_train)
        test = get_test_results(results_path, n_epochs=epochs_test)
    
        best_epoch_cv = get_best_epoch(cv)
        best_epoch_test = get_best_epoch(test)
    
        cv_all[label] = cv[best_epoch_cv]
        test_all[label] = test[best_epoch_test]

    return cv_all, test_all

In [None]:
def multimodel_barplot(
    results,
    critical=2.776,
    x_label="Abnormality",
    y_label="ROC AUC",
    title="",
    output_filename=None,
    bar_ratio=0.8,
    top_limit=1.0
):

    data = []
    for model, tasks_data in results.items():

        for task, (mean, std) in tasks_data.items():
            ci_half_width = std * critical
        
            data.append({
                "Model": model,
                "Task": task,
                "Mean Performance": mean,
                "CI_Half_Width": ci_half_width
            })
    
    df = pd.DataFrame(data)
    
    max_performance_per_task = df.groupby("Task")["Mean Performance"].max().sort_values(ascending=False)
    sorted_tasks = max_performance_per_task.index.tolist()
    
    sns.set_theme(style="whitegrid", palette="viridis")
    
    fig, ax = plt.subplots(figsize=(12, 10))

    models = df["Model"].unique()
    n_models = len(models)
    bar_width = bar_ratio / n_models
    
    palette = sns.color_palette("viridis", n_models)
    
    x = np.arange(len(sorted_tasks))
    
    for i, model in enumerate(models):
        model_df = df[df["Model"] == model].set_index("Task").reindex(sorted_tasks).reset_index()
        model_df = model_df.dropna(subset=['Mean Performance']) 

        means = model_df["Mean Performance"].values
        ci_half_widths = model_df["CI_Half_Width"].values
        
        current_x = x + (i - n_models / 2 + 0.5) * bar_width
        
        bars = ax.bar(
            current_x, 
            means, 
            width=bar_width, 
            yerr=ci_half_widths, 
            capsize=2,
            color=palette[i], 
            label=model,
            edgecolor=".2",
            linewidth=0.1,
            error_kw={'elinewidth': 1.0, 'capthick': 1.0}
        )

    plt.title(title, fontsize=16, pad=50)
    plt.xlabel(x_label, fontsize=14, labelpad=15)
    plt.ylabel(y_label, fontsize=14, labelpad=15)
    
    plt.ylim(0, top_limit)
    ax.set_xlim(-bar_width*(n_models-1), len(sorted_tasks) - 1 + bar_width*(n_models-1))
    
    ax.set_xticks(x + (bar_width / n_models / 2))
    ax.set_xticklabels(sorted_tasks, rotation=45, ha='right', fontsize=14)

    ax.yaxis.set_major_locator(MultipleLocator(0.2))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))
    
    plt.legend(
        loc='upper center', 
        bbox_to_anchor=(0.5, 1.1), 
        ncol=len(models),
        frameon=False,
        fontsize=14
    )
    
    plt.tight_layout(rect=[0, 0, 0.88, 1])

    ax.grid(True, axis='y', alpha=0.7)

    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
    ax.grid(which='minor', linestyle='-', linewidth='0.2', color='black')

    ax.grid(False, axis='x')

    if output_filename:
        try:
            plt.savefig(output_filename, dpi=300, bbox_inches='tight')
            print(f"Figure saved to {output_filename}")
        except Exception as e:
            print(f"Error saving figure to {output_filename}: {e}")
    
    plt.show()

In [None]:
experiment_epochs = [
    ("cv_base", 10, 20),
    ("cv_large", 10, 20),
    ("cv_base_ctrate", 10, 20),
    ("cv_ct_clip", 10, 10),
    ("cv_ct_fm", 10, 10)
]
experiment_alias = {
    "cv_base":"DINO-B",
    "cv_large": "DINO-L",
    "cv_base_ctrate": "DINO-B(CT-RATE)",
    "cv_ct_clip": "CT-CLIP",
    "cv_ct_fm": "CT-FM"
}

labels = [
    "Arterial wall calcification",
    "Cardiomegaly",
    "Pericardial effusion",
    "Coronary artery wall calcification",
    "Emphysema",
    "Atelectasis",
    "Lung nodule",
    "Lung opacity",
    "Pulmonary fibrotic sequela",
    "Pleural effusion",
    "Mosaic attenuation pattern",
    "Peribronchial thickening",
    "Consolidation",
    "Bronchiectasis",
    "Interlobular septal thickening",
]
select_metric = "roc_auc"

cv_all_experiments = {}
test_all_experiments = {}
for experiment, epochs_train, epochs_test in experiment_epochs:
    cv_exp, test_exp = process_experiment(experiment, labels, epochs_train=epochs_train, epochs_test=epochs_test)
    cv_all_experiments[experiment_alias[experiment]] = cv_exp
    test_all_experiments[experiment_alias[experiment]] = test_exp

In [None]:
for exp, exp_data in test_all_experiments.items():
    print(exp)
    means = [x[0] for x in exp_data.values()]
    m = np.mean(means)
    ci = np.std(means) * 0.717
    print(m, ci)

In [None]:
multimodel_barplot(
    cv_all_experiments,
    output_filename="evaluation/tasks/ct_rate/figures/ctrate_cv.png",
    #title="Cross-Validation Results for Multi-Label Abnormality Detection in CT-RATE"
)

In [None]:
multimodel_barplot(
    test_all_experiments,
    output_filename="evaluation/tasks/ct_rate/figures/ctrate_test.png",
    #title="Test dataset Results for Multi-Label Abnormality Detection in CT-RATE"
)

In [None]:
label_perf_cv = {l:[] for l in labels}
label_perf_test = {l:[] for l in labels}

exp_names = ["cv_ct_clip", "cv_ct_fm"] # ["cv_base", "cv_large", "cv_base_ctrate"]

for exp in exp_names:

    model_name = experiment_alias[exp]
    
    experiment_cv = cv_all_experiments[model_name]
    experiment_test = test_all_experiments[model_name]

    print(model_name)

    for label in labels:
        cv_mean = experiment_cv[label][0]
        cv_se = experiment_cv[label][1]*2.776
        test_mean = experiment_test[label][0]
        test_se = experiment_test[label][1]*2.776

        label_perf_cv[label].append(f"{cv_mean:.03f}$\pm${cv_se:.03f}")
        label_perf_test[label].append(f"{test_mean:.03f}$\pm${test_se:.03f}")

for label, row in label_perf_cv.items():
    
    print(f"{label} & " + " & ".join(row) + "\\\\") # & {test_mean:.04f}$\pm${test_se:.04f}

print()

for label, row in label_perf_test.items():
    
    print(f"{label} & " + " & ".join(row) + "\\\\") 


In [None]:
experiment_epochs = [
    ("cv_COVID_base", 50, 50),
    ("cv_COVID_large", 50, 50),
    ("cv_COVID_base_ctrate", 50, 50),
    ("cv_COVID_ctclip", 50, 50),
    ("cv_COVID_ctfm", 50, 50)
]
experiment_alias = {
    "cv_COVID_base":"DINO-B",
    "cv_COVID_large": "DINO-L",
    "cv_COVID_base_ctrate": "DINO-B(CT-RATE)",
    "cv_COVID_ctclip": "CT-CLIP",
    "cv_COVID_ctfm": "CT-FM"
}

labels = [
    "Covid-19",
    "Pneumonia(general)",
]

cv_all_experiments = {}
test_all_experiments = {}
for experiment, epochs_train, epochs_test in experiment_epochs:
    cv_exp, test_exp = process_experiment(experiment, labels, epochs_train=epochs_train, epochs_test=epochs_test)
    cv_all_experiments[experiment_alias[experiment]] = cv_exp
    test_all_experiments[experiment_alias[experiment]] = test_exp

In [None]:
experiment_epochs = [
    ("cv_FIBROSIS_base", 10, 10),
    ("cv_FIBROSIS_large", 10, 10),
    ("cv_FIBROSIS_base_ctrate", 10, 10),
    ("cv_FIBROSIS_ct_clip", 10, 10),
    ("cv_FIBROSIS_ctfm", 10, 10)
]
experiment_alias = {
    "cv_FIBROSIS_base":"DINO-B",
    "cv_FIBROSIS_large": "DINO-L",
    "cv_FIBROSIS_base_ctrate": "DINO-B(CT-RATE)",
    "cv_FIBROSIS_ct_clip": "CT-CLIP",
    "cv_FIBROSIS_ctfm": "CT-FM"
}

labels = [
    "Fibrosis_survival(5yr)",
]

for experiment, epochs_train, epochs_test in experiment_epochs:
    cv_exp, test_exp = process_experiment(experiment, labels, epochs_train=epochs_train, epochs_test=epochs_test)
    cv_all_experiments[experiment_alias[experiment]].update(cv_exp)
    test_all_experiments[experiment_alias[experiment]].update(test_exp)

In [None]:
multimodel_barplot(
    cv_all_experiments,
    output_filename="evaluation/tasks/ct_rate/figures/external_cv.png",
    x_label="External Task",
    bar_ratio=0.5
)

In [None]:
multimodel_barplot(
    test_all_experiments,
    output_filename="evaluation/tasks/ct_rate/figures/external_test.png",
    x_label="External Task",
    bar_ratio=0.5,
    top_limit=1.02
)

In [None]:
labels = [
    "Covid-19",
    "Pneumonia(general)",
    "Fibrosis_survival(5yr)",
]

label_perf_cv = {l:[] for l in labels}
label_perf_test = {l:[] for l in labels}

exp_names = ["cv_FIBROSIS_ct_clip", "cv_FIBROSIS_ctfm"]#["cv_FIBROSIS_base", "cv_FIBROSIS_large", "cv_FIBROSIS_base_ctrate"]#, 


for exp in exp_names:

    model_name = experiment_alias[exp]
    
    experiment_cv = cv_all_experiments[model_name]
    experiment_test = test_all_experiments[model_name]

    print(model_name)

    for label in labels:
        cv_mean = experiment_cv[label][0]
        cv_se = experiment_cv[label][1]*2.776
        test_mean = experiment_test[label][0]
        test_se = experiment_test[label][1]*2.776

        label_perf_cv[label].append(f"{cv_mean:.03f}$\pm${cv_se:.03f}")
        label_perf_test[label].append(f"{test_mean:.03f}$\pm${test_se:.03f}")

for label, row in label_perf_cv.items():
    
    print(f"{label} & " + " & ".join(row) + "\\\\") # & {test_mean:.04f}$\pm${test_se:.04f}

print()

for label, row in label_perf_test.items():
    
    print(f"{label} & " + " & ".join(row) + "\\\\") 
