In [None]:
# Copyright (C) 2024 Mila - Institut québécois d'intelligence artificielle
# SPDX-License-Identifier: CC-BY-4.0

In [None]:
import glob
import os
import sys

import pandas as pd
from sklearn.metrics import auc, precision_recall_curve, roc_curve
from sklearn.preprocessing import MinMaxScaler

In [None]:
# TODO update the correct path
results_folder = "results"
pred_file = os.path.join(results_folder, "predictions.csv")

In [None]:
pred_df = pd.read_csv(pred_file)
pred_df

In [None]:
pred_df["anomaly_score"] = pred_df["score"].str.replace("tensor([", "")
pred_df["anomaly_score"] = pred_df["anomaly_score"].str.replace("])", "")
pred_df["anomaly_score"] = pd.to_numeric(pred_df["anomaly_score"], errors="coerce")

pred_df

In [None]:
kfold_labels = "CableInspect-AD/kfold_labels"
cables = ["C01", "C02", "C03"]

In [None]:
def calculate_aupr(y_true, y_score):
    precision, recall, _ = precision_recall_curve(y_true, y_score)
    aupr = auc(recall, precision)
    return aupr


def calculate_auroc(y_true, y_score):
    fpr, tpr, _ = roc_curve(y_true, y_score)
    auroc = auc(fpr, tpr)
    return auroc


def generate_and_save_prediction_stats(val_predictions: list, run_name: str, test_predictions
) -> None:
    """Generate prediction stats and save them using different thresholding techniques."""
    output_dir = os.path.join(results_folder, "results", run_name)

    scaler = MinMaxScaler()
    scaler.fit(val_predictions[["anomaly_score"]])
    val_predictions["normalized_anomaly_score"] = scaler.transform(val_predictions[["anomaly_score"]])
    test_predictions["normalized_anomaly_score"] = scaler.transform(test_predictions[["anomaly_score"]])

    aupr = calculate_aupr(
        test_predictions["label_index"].tolist(), test_predictions["normalized_anomaly_score"].tolist()
    )
    auroc = calculate_auroc(
        test_predictions["label_index"].tolist(), test_predictions["normalized_anomaly_score"].tolist()
    )

    indp_metrics = pd.DataFrame()
    indp_metrics["AUPR"] = [aupr]
    indp_metrics["AUROC"] = [auroc]
    metrics_fname = os.path.join(output_dir, "indp_metrics.csv")
    indp_metrics.to_csv(metrics_fname, index=False)


In [None]:
results_list = []
for cable in cables:
    labels = glob.glob(os.path.join(kfold_labels, cable) + "/*")
    for label in labels:
        print(label)
        run_name = os.path.basename(label)[:-4]
        label_df = pd.read_csv(label)
        duplicates = label_df['image_path'].duplicated(keep=False)
        label_df = label_df[~duplicates]
        df_merged = pd.merge(pred_df, label_df, on="image_path", how="inner")
        train_preds = df_merged[(df_merged["split"] == "train")]
        test_preds = df_merged[(df_merged["split"] == "test")]

        generate_and_save_prediction_stats(train_preds, run_name, test_preds)

In [None]:
results_stats_folder = os.path.join(results_folder, "results")

In [None]:
aggregated_results = pd.DataFrame()
for cable in cables:
    cable_results_files = glob.glob(results_stats_folder + f"/label_cable-{cable}_anomaly_id*")
    cable_results_df = pd.DataFrame()
    for cable_file in cable_results_files:
        results = pd.read_csv(os.path.join(cable_file, "indp_metrics.csv"))
        cable_results_df = pd.concat([cable_results_df, results], axis=0)

    cable_results_df.to_csv(os.path.join(results_stats_folder, f"{cable}_aggregated_results.csv"), index=False)
cable_results_df

In [None]:
all_results = pd.DataFrame()
for cable in cables:
    cable_df = pd.read_csv(os.path.join(results_stats_folder, f"{cable}_aggregated_results.csv"))
    cable_df["cable"] = cable
    all_results = pd.concat([cable_df, all_results], axis=0)

In [None]:
all_results

In [None]:
all_results["AUROC"].agg(["mean", "std"]).round(2)