
# Anomaly Detection POC (Synthetic, Binder-ready) â€” hack2025

Run cells sequentially. Plots and metrics will appear inline.


In [None]:

import numpy as np, pandas as pd, matplotlib.pyplot as plt, os, warnings
warnings.filterwarnings('ignore')
from datetime import datetime, timedelta
from sklearn.ensemble import IsolationForest
from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
%matplotlib inline

os.makedirs('outputs', exist_ok=True)
print('Environment ready.')


In [None]:

np.random.seed(42)
start = datetime(2024, 1, 1)
periods = 24 * 90
index = [start + timedelta(hours=i) for i in range(periods)]

trend = np.linspace(0, 10, periods)
daily = 5 * np.sin(2 * np.pi * (np.arange(periods) % 24) / 24)
noise = np.random.normal(0, 0.8, periods)
value = 50 + trend + daily + noise

anomaly_idx = np.random.choice(np.arange(50, periods-50), size=25, replace=False)
value[anomaly_idx] += np.random.choice([15, -15], size=25) + np.random.normal(0, 3, 25)
for start_shift in [1000, 2000]:
    value[start_shift:start_shift+48] += 8

labels = np.zeros(periods, dtype=int)
labels[anomaly_idx] = 1
labels[1000:1048] = 1
labels[2000:2048] = 1

feature_temp = 20 + 10*np.sin(2*np.pi*(np.arange(periods)%24)/24) + np.random.normal(0,1,periods)
feature_load = 0.3*value + np.random.normal(0,2,periods)

df = pd.DataFrame({
    'timestamp': index,
    'value': value,
    'is_anomaly': labels,
    'feature_temp': feature_temp,
    'feature_load': feature_load
})


In [None]:

series = df['value'].values.astype(float)
y_true = df['is_anomaly'].values.astype(int)
scaler = StandardScaler()
z = scaler.fit_transform(series.reshape(-1,1)).ravel()
z_thresh = 3.0
pred_baseline = (np.abs(z) > z_thresh).astype(int)

p_b, r_b, f_b, _ = precision_recall_fscore_support(y_true, pred_baseline, average='binary', zero_division=0)
print({'precision': p_b, 'recall': r_b, 'f1': f_b})


In [None]:

X = df[['value', 'feature_temp', 'feature_load']].values
contam = max(1e-3, y_true.mean()+0.01)
iso = IsolationForest(n_estimators=200, contamination=contam, random_state=42)
iso.fit(X)
scores = -iso.score_samples(X)
percentile = 100*contam
thresh = np.percentile(scores, 100 - percentile)
pred_iso = (scores >= thresh).astype(int)

p_i, r_i, f_i, _ = precision_recall_fscore_support(y_true, pred_iso, average='binary', zero_division=0)
print({'precision': p_i, 'recall': r_i, 'f1': f_i})
