In [None]:
# Copyright (C) 2024 Mila - Institut québécois d'intelligence artificielle
# SPDX-License-Identifier: Apache-2.0

In [None]:
# This notebook permits to visualize the metrics per anomaly types.

In [None]:
import os
import sys
import glob

import pandas as pd

sys.path.append("../")
from utils import compute_metrics_per_anomaly_types, plot_distribution_per_group

%matplotlib inline

In [None]:
# To adapt
root_directory = os.path.join(os.environ["HOME"])
cable = "C01"

In [None]:
data_directory = os.path.join(root_directory, "hq/preprocess_data/tight_crop")
experiment_directory = os.path.join(root_directory, f"results/patchcore/hq/hq_kfold_unsupervised_{cable}")
runs_directories = glob.glob(f"{experiment_directory}/*/")

In [None]:
# Load and pre-process labels
labels = pd.read_csv(os.path.join(data_directory, "labels.csv"))
labels["anomaly_types"] = labels["anomaly_type"].fillna("good") + " " + labels["anomaly_grade"].fillna("")
labels["anomaly_types"].replace("good ", "good", inplace=True)
labels["identification"] = labels["identification"].fillna("good")
column_names = ["image_path", "frame_id", "anomaly_types", "identification"]
labels = labels[column_names]

In [None]:
metrics_dict = {}
for run_directory in runs_directories:
    # Load predictions
    predictions_fname = os.path.join(run_directory, "test_image_predictions.csv")
    if not os.path.isfile(predictions_fname):
        print(f"Broken run: {run_directory}")
        continue
    predictions = pd.read_csv(predictions_fname)
    # Add labels to prediction
    predictions = predictions.merge(labels, on="image_path", how="left")
    # Get image threshold
    normalization_stats = pd.read_csv(os.path.join(run_directory, "normalization_stats.csv"))
    image_threshold = round(normalization_stats["image_threshold"].values[0], 6)
    # Compute metrics per anomaly types
    # metrics_dict is updated inplace in the method.
    metrics_dict = compute_metrics_per_anomaly_types(predictions, image_threshold, metrics_dict)

In [None]:
df = None
anomaly_types = sorted(metrics_dict.keys())
for anomaly_type in anomaly_types:
    if df is None:
        df = pd.DataFrame(metrics_dict[anomaly_type])
        df["anomaly_type"] = anomaly_type
    else:
        temp_df = pd.DataFrame(metrics_dict[anomaly_type])
        temp_df["anomaly_type"] = anomaly_type
        df = pd.concat([df, temp_df], ignore_index=True)

In [None]:
# Test set (multiple folds)
# Metrics per anomaly type
# Possible options: "F1Score", "Precision", "Recall", "AUPR"
metric = "Recall"
plot_distribution_per_group(
    df,
    "anomaly_type",
    [metric],
    "Anomaly types (# of folds)",
    metric,
    title="",  # f"Cable {cable[-1]} test set (multiple folds)\n{metric} per anomaly type"
)