# OOD Detection Results Analysis

This notebook loads the results from `stats.json` and displays them sorted by AUROC.

In [1]:
import json
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
# Load the stats.json file
stats_file = Path("stats.json")

with open(stats_file, "r") as f:
    stats_data = json.load(f)

print(f"Loaded {len(stats_data)} results")

Loaded 302 results


In [3]:
# Convert to DataFrame
df = pd.DataFrame(stats_data)

# Extract confusion matrix values for easier viewing (optional)
# The confusion matrix is [[TN, FP], [FN, TP]]
df['true_negatives'] = df['confusion_matrix'].apply(lambda x: x[0][0])
df['false_positives'] = df['confusion_matrix'].apply(lambda x: x[0][1])
df['false_negatives'] = df['confusion_matrix'].apply(lambda x: x[1][0])
df['true_positives'] = df['confusion_matrix'].apply(lambda x: x[1][1])

# Calculate total samples
df['total_samples'] = df['true_negatives'] + df['false_positives'] + df['false_negatives'] + df['true_positives']

# Filter rows with at least 100 total samples
df = df[df['total_samples'] >= 100].copy()

# Sort by AUROC (descending - best first)
df_sorted = df.sort_values('auroc', ascending=False).reset_index(drop=True)

print(f"Total results: {len(df_sorted)}")
print(f"\nBest AUROC: {df_sorted['auroc'].max():.4f}")
print(f"Worst AUROC: {df_sorted['auroc'].min():.4f}")
print(f"Mean AUROC: {df_sorted['auroc'].mean():.4f}")

Total results: 267

Best AUROC: 0.6912
Worst AUROC: 0.3671
Mean AUROC: 0.4969


In [7]:
# Display the full table sorted by AUROC
# Select columns to display
display_columns = [
    'transformations',
    'scoring_function',
    'auroc',
    'true_positive_rate',
    'false_positive_rate',
    'true_positives',
    'true_negatives',
    'false_positives',
    'false_negatives'
]

df_display = df_sorted[display_columns].copy()

# Format numeric columns for better readability
df_display['auroc'] = df_display['auroc'].apply(lambda x: f"{x:.4f}")
df_display['true_positive_rate'] = df_display['true_positive_rate'].apply(lambda x: f"{x:.4f}")
df_display['false_positive_rate'] = df_display['false_positive_rate'].apply(lambda x: f"{x:.4f}")

df_display.to_csv('df_display.csv', index=False)

# Display the table
df_display


Unnamed: 0,transformations,scoring_function,auroc,true_positive_rate,false_positive_rate,true_positives,true_negatives,false_positives,false_negatives
0,pool_mean_std_PCA(n_components=50),knn(k=10),0.6912,0.4040,0.1960,402,202,298,98
1,pool_mean_std_PCA(n_components=50),knn(k=20),0.6827,0.4140,0.1860,407,207,293,93
2,pool_mean_std_PCA(n_components=50),knn(k=50),0.6740,0.3840,0.2160,392,192,308,108
3,pool_mean_std_PCA(n_components=20),knn(k=10),0.6708,0.3820,0.2180,391,191,309,109
4,pool_mean_std_PCA(n_components=20),knn(k=20),0.6675,0.3680,0.2320,384,184,316,116
...,...,...,...,...,...,...,...,...,...
262,pool_last_k_tokens(k=5)_KMeansDistance(n_clust...,knn(k=50),0.3796,0.2200,0.3800,310,110,390,190
263,pool_last_k_tokens(k=5)_KMeansDistance(n_clust...,knn(k=10),0.3792,0.2180,0.3820,309,109,391,191
264,pool_last_k_tokens(k=5)_KMeansDistance(n_clust...,knn(k=20),0.3790,0.2160,0.3840,308,108,392,192
265,pool_last_k_tokens(k=10)_KMeansDistance(n_clus...,knn(k=50),0.3790,0.2180,0.3820,309,109,391,191


In [5]:
# Show top 20 results
print("Top 20 Results by AUROC:\n")
df_display.head(20)

Top 20 Results by AUROC:



Unnamed: 0,transformations,scoring_function,auroc,true_positive_rate,false_positive_rate,true_positives,true_negatives,false_positives,false_negatives
0,pool_mean_std_PCA(n_components=50),knn(k=10),0.6912,0.404,0.196,402,202,298,98
1,pool_mean_std_PCA(n_components=50),knn(k=20),0.6827,0.414,0.186,407,207,293,93
2,pool_mean_std_PCA(n_components=50),knn(k=50),0.674,0.384,0.216,392,192,308,108
3,pool_mean_std_PCA(n_components=20),knn(k=10),0.6708,0.382,0.218,391,191,309,109
4,pool_mean_std_PCA(n_components=20),knn(k=20),0.6675,0.368,0.232,384,184,316,116
5,pool_mean_std_PCA(n_components=50),mahalanobis_distance,0.6533,0.398,0.202,399,199,301,101
6,pool_mean_std_PCA(n_components=10),mahalanobis_distance,0.6532,0.404,0.196,402,202,298,98
7,pool_mean_std_PCA(n_components=20),knn(k=50),0.653,0.35,0.248,376,175,325,124
8,pool_mean_std_PCA(n_components=10),knn(k=10),0.6517,0.38,0.22,390,190,310,110
9,pool_mean_std_PCA(n_components=10),knn(k=20),0.6478,0.368,0.232,384,184,316,116


In [6]:
# Summary statistics by scoring function
print("Summary by Scoring Function:\n")
scoring_summary = df_sorted.groupby('scoring_function')['auroc'].agg([
    'count', 'mean', 'std', 'min', 'max'
]).sort_values('mean', ascending=False)
scoring_summary.columns = ['Count', 'Mean AUROC', 'Std AUROC', 'Min AUROC', 'Max AUROC']
scoring_summary['Mean AUROC'] = scoring_summary['Mean AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Std AUROC'] = scoring_summary['Std AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Min AUROC'] = scoring_summary['Min AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary['Max AUROC'] = scoring_summary['Max AUROC'].apply(lambda x: f"{x:.4f}")
scoring_summary

Summary by Scoring Function:



Unnamed: 0_level_0,Count,Mean AUROC,Std AUROC,Min AUROC,Max AUROC
scoring_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
identity,55,0.5191,0.0466,0.4338,0.6109
knn(k=10),54,0.5004,0.076,0.3792,0.6912
knn(k=20),53,0.4957,0.0762,0.379,0.6827
knn(k=50),53,0.4883,0.0778,0.379,0.674
mahalanobis_distance,52,0.4799,0.0697,0.3671,0.6533
