# OOD Detection Results Analysis

This notebook loads the results from `stats.json` and displays them sorted by AUROC.

In [None]:
import json
import pandas as pd
import numpy as np
from pathlib import Path

In [None]:
# Load the stats.json file
stats_file = Path("stats.json")

with open(stats_file, "r") as f:
    stats_data = json.load(f)

print(f"Loaded {len(stats_data)} results")

In [None]:
# Convert to DataFrame
df = pd.DataFrame(stats_data)

# Extract confusion matrix values for easier viewing (optional)
# The confusion matrix is [[TN, FP], [FN, TP]]
df['true_negatives'] = df['confusion_matrix'].apply(lambda x: x[0][0])
df['false_positives'] = df['confusion_matrix'].apply(lambda x: x[0][1])
df['false_negatives'] = df['confusion_matrix'].apply(lambda x: x[1][0])
df['true_positives'] = df['confusion_matrix'].apply(lambda x: x[1][1])

# Calculate total samples
df['total_samples'] = df['true_negatives'] + df['false_positives'] + df['false_negatives'] + df['true_positives']

# Filter rows with at least 100 total samples
df = df[df['total_samples'] >= 100].copy()

# Sort by AUROC (descending - best first)
df_sorted = df.sort_values('auroc', ascending=False).reset_index(drop=True)

print(f"Total results: {len(df_sorted)}")
print(f"\nBest AUROC: {df_sorted['auroc'].max():.4f}")
print(f"Worst AUROC: {df_sorted['auroc'].min():.4f}")
print(f"Mean AUROC: {df_sorted['auroc'].mean():.4f}")

In [None]:
# Display the full table sorted by AUROC
# Select columns to display
display_columns = [
    'transformations',
    'scoring_function',
    'auroc',
    'true_positive_rate',
    'false_positive_rate',
    'true_positives',
    'true_negatives',
    'false_positives',
    'false_negatives'
]

df_display = df_sorted[display_columns].copy()

# Format numeric columns for better readability
df_display['auroc'] = df_display['auroc'].apply(lambda x: f"{x:.4f}")
df_display['true_positive_rate'] = df_display['true_positive_rate'].apply(lambda x: f"{x:.4f}")
df_display['false_positive_rate'] = df_display['false_positive_rate'].apply(lambda x: f"{x:.4f}")

df_display.to_csv('df_display.csv', index=False)

# Display the table
df_display


In [None]:
# Show top 20 results
print("Top 20 Results by AUROC:\n")
df_display.head(20)

In [None]:
# Summary statistics by scoring function
print("Summary by Scoring Function:\n")
scoring_summary = df_sorted.groupby('scoring_function')['auroc'].agg([
    'count', 'mean', 'std', 'min', 'max'
]).sort_values('mean', ascending=False)
scoring_summary.columns = ['Count', 'Mean AUROC', 'Std AUROC', 'Min AUROC', 'Max AUROC']
scoring_summary['Mean AUROC'] = scoring_summary['Mean AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Std AUROC'] = scoring_summary['Std AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Min AUROC'] = scoring_summary['Min AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Max AUROC'] = scoring_summary['Max AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary