# KuiperHunter Robustness Analysis
This notebook demonstrates the evaluation pipeline: from injection to tracking and metrics.

In [None]:
%load_ext autoreload
%autoreload 2
import torch
import yaml
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from src.models.unet3d import UNet3D
from src.evaluation.robustness import RobustnessSweeper

In [None]:
# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

with open('../config/smoke_test.yaml', 'r') as f:
    config = yaml.safe_load(f)

model = UNet3D(n_channels=1, n_classes=1).to(device)
try:
    model.load_state_dict(torch.load('../data/checkpoints/model_epoch_5.pth', map_location=device))
    print("Loaded checkpoint.")
except:
    print("Using random weights.")
model.eval();

In [None]:
# Run Sweep: Recall vs Magnitude
sweeper = RobustnessSweeper(config, model, device)

# Test broad magnitude range
mags = [15.0, 18.0, 20.0, 22.0, 24.0]
df_mag = sweeper.run_sweep('magnitude', mags, num_samples_per_point=5)

print(df_mag)

In [None]:
# Plot Results
plt.figure(figsize=(8, 5))
plt.plot(df_mag['magnitude'], df_mag['recall'], marker='o', label='Recall')
plt.plot(df_mag['magnitude'], df_mag['precision'], marker='s', label='Precision')
plt.xlabel('Magnitude (Fainter ->)')
plt.ylabel('Score')
plt.title('Detection Performance vs Magnitude')
plt.grid(True)
plt.legend()
plt.show()