# 06: Anomaly Detection Validation

Validate anomaly detector using synthetic anomaly injection.

In [None]:
import sys
import os

# Set working directory to project root
os.chdir(os.path.dirname(os.path.abspath('__file__')))
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
print(f"Working dir: {os.getcwd()}")

sys.path.insert(0, 'src')

import pandas as pd
import numpy as np
import plotly.express as px

from validation_anomaly import (
    inject_synthetic_anomalies,
    run_anomaly_detector,
    evaluate_anomaly_detection,
    run_anomaly_validation
)

## Inject Synthetic Anomalies

In [None]:
features = pd.read_parquet('data/processed/model_features.parquet')

# Inject with 5% anomaly rate
df_corrupted, ground_truth = inject_synthetic_anomalies(
    features, 
    anomaly_rate=0.05,
    spike_magnitude=3.0,
    drop_magnitude=0.3
)

print(f"Injected {ground_truth.sum()} synthetic anomalies out of {len(ground_truth)} rows")

## Run Detection

In [None]:
feature_cols = ['bio_update_child', 'demo_update_child', 'update_backlog_child', 'completion_rate_child']
feature_cols = [c for c in feature_cols if c in df_corrupted.columns]
print(f"Using features: {feature_cols}")

predictions = run_anomaly_detector(df_corrupted, feature_cols)
print(f"Detected: {predictions.sum()} anomalies")

## Evaluate Metrics

In [None]:
metrics = evaluate_anomaly_detection(ground_truth, predictions)

print("ANOMALY DETECTION METRICS")
print("="*40)
print(f"Precision: {metrics['precision']:.3f}")
print(f"Recall: {metrics['recall']:.3f}")
print(f"F1-Score: {metrics['f1_score']:.3f}")

if metrics['precision'] >= 0.8:
    print("\n✅ PASS: Precision ≥ 0.8")
else:
    print(f"\n⚠️ Precision {metrics['precision']:.2f} < 0.8 target")
    
if metrics['recall'] >= 0.6:
    print("✅ PASS: Recall ≥ 0.6")
else:
    print(f"⚠️ Recall {metrics['recall']:.2f} < 0.6 target")

## Multi-Trial Validation

In [None]:
results = run_anomaly_validation(
    features_path='data/processed/model_features.parquet',
    n_trials=5
)

if 'trial_results' in results and not results['trial_results'].empty:
    fig = px.line(
        results['trial_results'],
        x='trial', y=['precision', 'recall', 'f1_score'],
        title='Detection Metrics Across Trials'
    )
    fig.show()
else:
    print("No trial results to visualize")