# OOD Detection Results Analysis

This notebook loads the results from `stats.json` and displays them sorted by AUROC.

In [1]:
import json
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
# Load the stats.json file
stats_file = Path("stats.json")

with open(stats_file, "r") as f:
    stats_data = json.load(f)

print(f"Loaded {len(stats_data)} results")

Loaded 281 results


In [3]:
# Convert to DataFrame
df = pd.DataFrame(stats_data)

# Extract confusion matrix values for easier viewing (optional)
# The confusion matrix is [[TN, FP], [FN, TP]]
df['true_negatives'] = df['confusion_matrix'].apply(lambda x: x[0][0])
df['false_positives'] = df['confusion_matrix'].apply(lambda x: x[0][1])
df['false_negatives'] = df['confusion_matrix'].apply(lambda x: x[1][0])
df['true_positives'] = df['confusion_matrix'].apply(lambda x: x[1][1])

# Calculate total samples
df['total_samples'] = df['true_negatives'] + df['false_positives'] + df['false_negatives'] + df['true_positives']

# Filter rows with at least 100 total samples
df = df[df['total_samples'] >= 100].copy()

# Sort by AUROC (descending - best first)
df_sorted = df.sort_values('auroc', ascending=False).reset_index(drop=True)

print(f"Total results: {len(df_sorted)}")
print(f"\nBest AUROC: {df_sorted['auroc'].max():.4f}")
print(f"Worst AUROC: {df_sorted['auroc'].min():.4f}")
print(f"Mean AUROC: {df_sorted['auroc'].mean():.4f}")

Total results: 275

Best AUROC: 0.7759
Worst AUROC: 0.3388
Mean AUROC: 0.5268


In [4]:
# Display the full table sorted by AUROC
# Select columns to display
display_columns = [
    'transformations',
    'scoring_function',
    'auroc',
    'true_positive_rate',
    'false_positive_rate',
    'true_positives',
    'true_negatives',
    'false_positives',
    'false_negatives'
]

df_display = df_sorted[display_columns].copy()

# Format numeric columns for better readability
df_display['auroc'] = df_display['auroc'].apply(lambda x: f"{x:.4f}")
df_display['true_positive_rate'] = df_display['true_positive_rate'].apply(lambda x: f"{x:.4f}")
df_display['false_positive_rate'] = df_display['false_positive_rate'].apply(lambda x: f"{x:.4f}")

df_display.to_csv('df_display.csv', index=False)

# Display the table
df_display


Unnamed: 0,transformations,scoring_function,auroc,true_positive_rate,false_positive_rate,true_positives,true_negatives,false_positives,false_negatives
0,pool_mean_std_PCA(n_components=50),knn(k=3),0.7759,0.4500,0.1500,204,108,132,36
1,pool_mean_std_PCA(n_components=50),knn(k=2),0.7754,0.4417,0.1583,202,106,134,38
2,pool_mean_std_PCA(n_components=50),knn(k=5),0.7750,0.4292,0.1708,199,103,137,41
3,pool_mean_std_PCA(n_components=50),knn(k=5),0.7749,0.4250,0.1750,198,102,138,42
4,pool_mean_std_PCA(n_components=50),knn(k=1),0.7732,0.4542,0.1458,205,109,131,35
...,...,...,...,...,...,...,...,...,...
270,pool_last_k_tokens(k=10)_KMeansDistance(n_clus...,mahalanobis_distance,0.3621,0.2042,0.3958,145,49,191,95
271,pool_last_k_tokens(k=5)_KMeansDistance(n_clust...,mahalanobis_distance,0.3585,0.2292,0.3708,151,55,185,89
272,pool_last_k_tokens(k=5)_KMeansDistance(n_clust...,mahalanobis_distance,0.3562,0.2125,0.3875,147,51,189,93
273,pool_last_k_tokens(k=10)_KMeansDistance(n_clus...,mahalanobis_distance,0.3465,0.2083,0.3917,146,50,190,94


In [5]:
# Show top 20 results
print("Top 20 Results by AUROC:\n")
df_display.head(20)

Top 20 Results by AUROC:



Unnamed: 0,transformations,scoring_function,auroc,true_positive_rate,false_positive_rate,true_positives,true_negatives,false_positives,false_negatives
0,pool_mean_std_PCA(n_components=50),knn(k=3),0.7759,0.45,0.15,204,108,132,36
1,pool_mean_std_PCA(n_components=50),knn(k=2),0.7754,0.4417,0.1583,202,106,134,38
2,pool_mean_std_PCA(n_components=50),knn(k=5),0.775,0.4292,0.1708,199,103,137,41
3,pool_mean_std_PCA(n_components=50),knn(k=5),0.7749,0.425,0.175,198,102,138,42
4,pool_mean_std_PCA(n_components=50),knn(k=1),0.7732,0.4542,0.1458,205,109,131,35
5,pool_mean_std_PCA(n_components=50),knn(k=10),0.7635,0.425,0.175,198,102,138,42
6,pool_mean_std_PCA(n_components=50),knn(k=20),0.7489,0.3917,0.2083,190,94,146,50
7,pool_mean_std_PCA(n_components=20),knn(k=10),0.7389,0.4083,0.1917,194,98,142,46
8,pool_mean_std_Subsample(50),knn(k=5),0.733,0.4458,0.1542,203,107,133,37
9,pool_mean_std_PCA(n_components=20),knn(k=20),0.7293,0.4125,0.1875,195,99,141,45


In [6]:
# Summary statistics by scoring function
print("Summary by Scoring Function:\n")
scoring_summary = df_sorted.groupby('scoring_function')['auroc'].agg([
    'count', 'mean', 'std', 'min', 'max'
]).sort_values('mean', ascending=False)
scoring_summary.columns = ['Count', 'Mean AUROC', 'Std AUROC', 'Min AUROC', 'Max AUROC']
scoring_summary['Mean AUROC'] = scoring_summary['Mean AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Std AUROC'] = scoring_summary['Std AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Min AUROC'] = scoring_summary['Min AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Max AUROC'] = scoring_summary['Max AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary

Summary by Scoring Function:



Unnamed: 0_level_0,Count,Mean AUROC,Std AUROC,Min AUROC,Max AUROC
scoring_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
knn(k=3),2,0.7118,0.0906,0.6477,0.7759
knn(k=2),2,0.7052,0.0992,0.6351,0.7754
knn(k=1),2,0.7046,0.097,0.636,0.7732
knn(k=5),10,0.6517,0.0935,0.5311,0.775
identity,60,0.5513,0.0877,0.444,0.7256
knn(k=10),50,0.5217,0.0798,0.4154,0.7635
knn(k=20),50,0.518,0.0813,0.4036,0.7489
knn(k=50),50,0.5128,0.0818,0.3907,0.7196
mahalanobis_distance,49,0.4778,0.0778,0.3388,0.6752
