In [None]:
# Copyright (C) 2022 Mila - Institut québécois d'intelligence artificielle
# SPDX-License-Identifier: Apache-2.0

In [None]:
# This notebook permits to visualize the metrics per unique anomalies.

In [None]:
import glob
import os
import sys

import pandas as pd

sys.path.append("../")
from utils import compute_metrics, plot_distribution_per_group

%matplotlib inline

In [None]:
# To adapt
root_directory = os.path.join(os.environ["HOME"])

In [None]:
cables = ["C01", "C02", "C03"]
metrics_lst = ["F1Score", "Precision", "Recall", "AUPR"]
metrics_dict = {cable: {k: [] for k in metrics_lst} for cable in cables}
for cable in cables:
    experiment_directory = os.path.join(root_directory, f"results/patchcore/hq/hq_kfold_unsupervised_{cable}")
    runs_directories = glob.glob(f"{experiment_directory}/*/")
    for run_directory in runs_directories:
        # Load predictions
        predictions_fname = os.path.join(run_directory, "test_identification_predictions.csv")
        if not os.path.isfile(predictions_fname):
            print(f"Broken run: {run_directory}")
            continue
        predictions = pd.read_csv(predictions_fname)
        # Get image threshold
        normalization_stats = pd.read_csv(os.path.join(run_directory, "normalization_stats.csv"))
        image_threshold = round(normalization_stats["image_threshold"].values[0], 6)
        # Compute metrics per unique anomaly
        scores = compute_metrics(
            predictions["target"],
            predictions["anomaly_score"],
            image_threshold,
            metrics_lst,
        )
        for k, v in zip(metrics_lst, scores):
            metrics_dict[cable][k].append(v)

In [None]:
df = None
cables = sorted(metrics_dict.keys())
for cable in cables:
    if df is None:
        df = pd.DataFrame(metrics_dict[cable])
        df["cable"] = cable
    else:
        temp_df = pd.DataFrame(metrics_dict[cable])
        temp_df["cable"] = cable
        df = pd.concat([df, temp_df], ignore_index=True)

In [None]:
mapping_cable = {"C01": "Cable 1", "C02": "Cable 2", "C03": "Cable 3"}
df["cable"].replace(mapping_cable, inplace=True)

In [None]:
# Metrics ID level test set (multiple folds)
# Possible options: "F1Score", "Precision", "Recall", "AUPR"
metric = "Recall"
ylim = {"ymax": 1.0, "ymin": 0.6}
plot_distribution_per_group(
    df,
    "cable",
    [metric],
    "",  # "Cable ID (# of folds)"
    metric,
    title="",  # f"{metric} ID level test set (multiple folds)"
    ylim=ylim,
)