# Baseline Evaluation
This notebook computes baseline DTW verification performance using a simple global threshold rule.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score
import pathlib

## Load and merge data

In [None]:
pairs = pd.read_parquet('data/pairs_meta.parquet')
dtw = pd.read_parquet('data/dtw_cache.parquet')
df = pairs.merge(dtw, on='pair_id')
label_map = {'genuine': 1, 'forgery': 0}
y = df['label'].map(label_map).values
distance_cols = ['d_raw', 'd_norm1', 'd_norm2']

## Helper functions

In [None]:
def compute_metrics(scores, labels):
    fpr, tpr, thr = roc_curve(labels, -scores)
    auc = roc_auc_score(labels, -scores)
    fnr = 1 - tpr
    idx = np.nanargmin(np.abs(fnr - fpr))
    eer = (fpr[idx] + fnr[idx]) / 2
    eer_thr = thr[idx]
    return fpr, tpr, auc, eer, eer_thr

## Global evaluation

In [None]:
results = []
roc_data = {}
for col in distance_cols:
    fpr, tpr, auc, eer, thr = compute_metrics(df[col].values, y)
    results.append({'distance': col, 'eer': eer, 'auc': auc})
    roc_data[col] = (fpr, tpr)
metrics_df = pd.DataFrame(results)
metrics_df.to_csv('results/baseline_metrics.csv', index=False)
metrics_df

## Plot ROC and DET curves

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12,5))
for col, (fpr, tpr) in roc_data.items():
    ax[0].plot(fpr, tpr, label=col)
    fnr = 1 - tpr
    ax[1].plot(fpr, fnr, label=col)
ax[0].set_title('ROC')
ax[0].set_xlabel('FPR')
ax[0].set_ylabel('TPR')
ax[1].set_title('DET')
ax[1].set_xlabel('FPR')
ax[1].set_ylabel('FNR')
for a in ax:
    a.legend()
fig.tight_layout()
pathlib.Path('figures').mkdir(exist_ok=True)
fig.savefig('figures/baseline_curves.png')
fig

## Per-user threshold analysis

In [None]:
user_metrics = []
for user, grp in df.groupby('userA'):
    entry = {'user': user}
    for col in distance_cols:
        _, _, _, eer, _ = compute_metrics(grp[col].values, grp['label'].map(label_map).values)
        entry[col] = eer
    user_metrics.append(entry)
user_df = pd.DataFrame(user_metrics)
user_df.describe()[distance_cols]

In [None]:
user_df[distance_cols].hist(bins=20, figsize=(10,4), layout=(1,3))
plt.tight_layout()
plt.savefig('figures/user_eer_hist.png')
plt.show()