# Drift Detector Evaluation — Report Notebook

_VN_: Notebook này **tự động** tạo pipeline, chạy thí nghiệm và tạo bảng/biểu đồ **đủ cho báo cáo**.
- Detectors: ADWIN, DDM, EDDM, HDDM_A, HDDM_W, KSWIN, Page-Hinkley (River)
- Datasets: SEA (synthetic, có ground-truth drift), Elec2 (real-world), RandomRBFDrift (gradual)
- Chỉ số: Accuracy, F1, #alarms, β-score (SEA), delay (samples & %window), **F1@AR**, **Accuracy@AR** + Global scores

_EN_: One-click River-based benchmark that outputs report-ready tables and plots.

In [1]:
# 0) Install dependencies (safe to re-run)
import sys, subprocess
pkgs = ['river>=0.17','pandas>=2.0','numpy>=1.23','pyyaml>=6.0','pyarrow>=14.0','rich>=13.0']
subprocess.check_call([sys.executable,'-m','pip','install','--quiet', *pkgs])
print('Installed:', pkgs)


[31mERROR: Operation cancelled by user[0m[31m
[0m

KeyboardInterrupt: 

In [None]:
# 1) Bootstrap helper package: driftlab/
from pathlib import Path
import textwrap, os
root = Path('driftlab')
for p in [root/'src/driftlab', root/'results', root/'configs']: p.mkdir(parents=True, exist_ok=True)
(root/'src/__init__.py').write_text('')
(root/'src/driftlab/__init__.py').write_text('from . import datasets, detectors, metrics, runner\n')

(root/'src/driftlab/datasets.py').write_text(textwrap.dedent('''\
from __future__ import annotations
from typing import Iterator, Dict, Any, List, Optional
from dataclasses import dataclass
from river import datasets as r_datasets

@dataclass
class StreamBatch:
    x: dict
    y: Any
    t: int

def iter_elec2(limit: Optional[int] = None) -> Iterator[StreamBatch]:
    ds = r_datasets.Elec2()
    for i,(x,y) in enumerate(ds):
        if limit is not None and i >= limit: break
        yield StreamBatch(x=x, y=y, t=i+1)

def iter_sea(n_samples: int = 100000, drift_positions: List[int] = [30000, 60000], seed: int = 42):
    positions = sorted(drift_positions)
    segments = [positions[0]] + [positions[i]-positions[i-1] for i in range(1, len(positions))] + [n_samples-positions[-1]]
    variants = [0, 1, 2, 3]
    t = 0
    for seg_len, var in zip(segments, variants):
        sea = r_datasets.synth.SEA(variant=var, seed=seed + var)
        for (x, y) in sea.take(seg_len):
            t += 1
            yield StreamBatch(x=x, y=y, t=t)

def sea_ground_truth(n_samples: int, drift_positions: List[int]) -> Dict[str, Any]:
    positions = sorted(drift_positions)
    return {"drift_times": positions, "window_len": positions[0] if positions else n_samples}

def iter_randomrbf(n_samples: int = 100000, speed: float = 0.87, n_centroids: int = 50, seed: int = 42):
    gen = r_datasets.synth.RandomRBFDrift(seed_model=seed, seed_sample=seed+1, n_classes=2, n_features=10, n_centroids=n_centroids, change_speed=speed)
    for i, (x, y) in enumerate(gen.take(n_samples)):
        yield StreamBatch(x=x, y=y, t=i+1)

def get_stream(name: str, **kwargs):
    name = name.lower()
    if name in ("elec2","electricity"):
        limit = kwargs.get("limit", None)
        return iter_elec2(limit=limit), {"drift_times": None, "window_len": None}
    elif name == "sea":
        n = int(kwargs.get("n_samples", 100000))
        drifts = kwargs.get("drift_positions", [30000, 60000])
        iterator = iter_sea(n_samples=n, drift_positions=drifts)
        gt = sea_ground_truth(n, drifts)
        return iterator, gt
    elif name in ("rbf","randomrbf","randomrbfdrift"):
        n = int(kwargs.get("n_samples", 100000))
        speed = float(kwargs.get("speed", 0.87))
        it = iter_randomrbf(n_samples=n, speed=speed)
        return it, {"drift_times": None, "window_len": None}
    else:
        raise ValueError(f"Unknown dataset: {name}")
'''))

(root/'src/driftlab/detectors.py').write_text(textwrap.dedent('''\
from river.drift import ADWIN, DDM, EDDM, HDDM_A, HDDM_W, KSWIN, PageHinkley

def make_detector(name: str, **kwargs):
    n = name.lower()
    return {
        'adwin': ADWIN, 'ddm': DDM, 'eddm': EDDM,
        'hddm_a': HDDM_A, 'hddm_w': HDDM_W, 'kswin': KSWIN,
        'pagehinkley': PageHinkley,
    }[n](**kwargs)

def update_supervised(det, error01: int) -> bool:
    det.update(error01)
    return bool(det.change_detected)

def update_univariate(det, value: float) -> bool:
    det.update(value)
    return bool(det.change_detected)
'''))

(root/'src/driftlab/metrics.py').write_text(textwrap.dedent('''\
from __future__ import annotations
from typing import List, Optional, Dict, Any
import numpy as np

def match_alarms_to_drifts(alarms: List[int], drifts: List[int], max_delay: Optional[int]=None) -> Dict[str, Any]:
    alarms = sorted(alarms); drifts = sorted(drifts)
    tp=fp=fn=0; delays=[]; used=set(); j=0
    for dt in drifts:
        while j < len(alarms) and alarms[j] < dt: j+=1
        if j < len(alarms):
            d = alarms[j]-dt
            if max_delay is None or d <= max_delay:
                tp+=1; delays.append(d); used.add(j); j+=1
            else:
                fn+=1
        else:
            fn+=1
    fp = len([a for idx,a in enumerate(alarms) if idx not in used])
    return {"tp":tp,"fp":fp,"fn":fn,"delays":delays}

def beta_score(tp:int, fp:int, p:int, beta:float=0.5) -> float:
    return float('nan') if p<=0 else tp / (p + beta*fp)

def delay_stats(delays: List[int]) -> Dict[str,float]:
    if not delays: return {"mean":float('nan'),"median":float('nan'),"min":float('nan'),"max":float('nan')}
    arr = np.array(delays, dtype=float)
    return {"mean":float(arr.mean()), "median":float(np.median(arr)), "min":float(arr.min()), "max":float(arr.max())}
'''))

(root/'src/driftlab/runner.py').write_text(textwrap.dedent('''\
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, Any, List, Optional
import math
from river import tree, metrics as r_metrics
from .datasets import get_stream
from .detectors import make_detector, update_supervised, update_univariate
from .metrics import match_alarms_to_drifts, beta_score, delay_stats

@dataclass
class RunConfig:
    dataset: str
    detector: str
    classifier: str = 'HT'
    n_samples: int = 100000
    sea_drifts: Optional[List[int]] = None
    rbf_speed: float = 0.87
    seed: int = 42
    max_delay: Optional[int] = None
    beta: float = 0.5

def make_classifier(name:str):
    return tree.HoeffdingTreeClassifier()

def run_once(cfg: RunConfig) -> Dict[str, Any]:
    if cfg.dataset.lower()=='sea':
        iterator, gt = get_stream('sea', n_samples=cfg.n_samples, drift_positions=cfg.sea_drifts or [30000,60000])
    elif cfg.dataset.lower() in ('rbf','randomrbf','randomrbfdrift'):
        iterator, gt = get_stream('randomrbf', n_samples=cfg.n_samples, speed=cfg.rbf_speed)
    elif cfg.dataset.lower() in ('elec2','electricity'):
        iterator, gt = get_stream('elec2', limit=cfg.n_samples)
    else:
        raise ValueError('Unsupported dataset')

    model = make_classifier(cfg.classifier)
    det = make_detector(cfg.detector)

    acc = r_metrics.Accuracy(); f1 = r_metrics.F1()
    alarms: List[int] = []; n=0

    for batch in iterator:
        n += 1
        x, y = batch.x, batch.y

        y_pred = model.predict_one(x)
        prob_pred = None
        if hasattr(model,'predict_proba_one') and y_pred is not None:
            proba_dict = model.predict_proba_one(x)
            prob_pred = proba_dict.get(y_pred, None)

        if cfg.detector.lower()=='kswin' and prob_pred is not None:
            if update_univariate(det, float(prob_pred)): alarms.append(n)
        else:
            err01 = 1 if (y_pred is None or y_pred != y) else 0
            if update_supervised(det, err01): alarms.append(n)

        if y_pred is not None:
            acc.update(y_true=y, y_pred=y)
            f1.update(y_true=y, y_pred=y)

        model.learn_one(x, y)
        if cfg.n_samples and n >= cfg.n_samples: break

    out: Dict[str, Any] = {
        'dataset': cfg.dataset,
        'detector': cfg.detector,
        'classifier': cfg.classifier,
        'n_seen': n,
        'alarms': len(alarms),
        'acc': float(acc.get() or 0.0),
        'f1': float(f1.get() or 0.0),
        'alarm_times': alarms,
    }

    if gt.get('drift_times'):
        res = match_alarms_to_drifts(alarms, gt['drift_times'], cfg.max_delay)
        out.update({'tp':res['tp'],'fp':res['fp'],'fn':res['fn'],'delays':res['delays']})
        p = len(gt['drift_times'])
        out['beta_score'] = float(beta_score(res['tp'], res['fp'], p, beta=cfg.beta))
        dstats = delay_stats(res['delays']) if res['delays'] else {'mean': float('nan')}
        out.update({f'delay_{k}': v for k,v in dstats.items()})
        if gt.get('window_len') and 'mean' in dstats and not math.isnan(dstats['mean']):
            out['delay_percent_mean'] = 100.0 * dstats['mean'] / float(gt['window_len'])
    return out
'''))

print('Bootstrap done in', root.resolve())


In [None]:
# 2) Run batch experiments (SEA + Elec2 + RBF)
import sys
sys.path.append('driftlab/src')
from driftlab.runner import RunConfig, run_once
import pandas as pd

jobs=[]
detectors=['adwin','ddm','eddm','hddm_a','hddm_w','kswin','pagehinkley']

# SEA (ground-truth drifts)
for det in detectors:
    res = run_once(RunConfig(dataset='sea', detector=det, n_samples=60000, sea_drifts=[20000,40000], beta=0.5, max_delay=5000))
    res['context']='sea'; jobs.append(res)

# Elec2 (real-world)
for det in detectors:
    res = run_once(RunConfig(dataset='elec2', detector=det, n_samples=30000))
    res['context']='elec2'; jobs.append(res)

# RandomRBFDrift (gradual)
for det in detectors:
    res = run_once(RunConfig(dataset='rbf', detector=det, n_samples=50000, rbf_speed=0.8))
    res['context']='rbf'; jobs.append(res)

df = pd.DataFrame(jobs)
df.head(10)


In [None]:
# 3) Save raw results (CSV/Parquet)
from pathlib import Path
out = Path('driftlab/results/notebook_runs')
out.mkdir(parents=True, exist_ok=True)
df.to_csv(out/'combined.csv', index=False)
try:
    df.to_parquet(out/'combined.parquet', index=False)
except Exception as e:
    print('Parquet skipped:', e)
out.resolve()


## 4) Visual summaries (matplotlib)
- SEA: β-score, mean delay (% of first segment)
- Elec2: Accuracy
- RBF: F1

_Rule_: one chart per figure; no manual colors.

In [None]:
import matplotlib.pyplot as plt
sea = df[df['context']=='sea'].copy().sort_values('beta_score', ascending=False)
plt.figure(); plt.bar(sea['detector'], sea['beta_score'].fillna(0)); plt.title('SEA: β-score'); plt.xlabel('Detector'); plt.ylabel('β-score'); plt.xticks(rotation=45); plt.show()


In [None]:
plt.figure(); plt.bar(sea['detector'], sea['delay_percent_mean'].fillna(0)); plt.title('SEA: mean delay (%)'); plt.xlabel('Detector'); plt.ylabel('delay %'); plt.xticks(rotation=45); plt.show()


In [None]:
elec = df[df['context']=='elec2'].copy().sort_values('acc', ascending=False)
plt.figure(); plt.bar(elec['detector'], elec['acc'].fillna(0)); plt.title('Elec2: Accuracy'); plt.xlabel('Detector'); plt.ylabel('Accuracy'); plt.xticks(rotation=45); plt.show()


In [None]:
rbf = df[df['context']=='rbf'].copy().sort_values('f1', ascending=False)
plt.figure(); plt.bar(rbf['detector'], rbf['f1'].fillna(0)); plt.title('RBF: F1'); plt.xlabel('Detector'); plt.ylabel('F1'); plt.xticks(rotation=45); plt.show()


## 5) Aggregate metrics: F1@AR & Accuracy@AR (plus normalized Global Scores)
**Alarm Rate** per 10k samples: $AR = (\#\text{alarms}/N)\times 10^4$.  
**F1@AR**: $F1 - \lambda \cdot AR$ (default $\lambda=0.01$).  
**Accuracy@AR**: $Acc - \lambda_{acc} \cdot AR$ (default $\lambda_{acc}=0.01$).  
Chuẩn hoá min–max riêng trên từng dataset, sau đó macro-average qua datasets → GlobalScore.

In [None]:
import pandas as pd
LAMBDA_F1 = 0.01
LAMBDA_ACC = 0.01

dfm = df.copy()
dfm['AR_per10k'] = dfm['alarms'] / dfm['n_seen'] * 1e4
dfm['F1@AR'] = dfm['f1'] - LAMBDA_F1 * dfm['AR_per10k']
dfm['Acc@AR'] = dfm['acc'] - LAMBDA_ACC * dfm['AR_per10k']

def _minmax(s):
    mx, mn = s.max(), s.min()
    return (s - mn) / (mx - mn) if mx > mn else 1.0

dfm['F1@AR_norm']  = dfm.groupby('dataset')['F1@AR'].transform(_minmax)
dfm['Acc@AR_norm'] = dfm.groupby('dataset')['Acc@AR'].transform(_minmax)

global_f1ar  = dfm.groupby('detector')['F1@AR_norm'].mean().sort_values(ascending=False).rename('GlobalScore_F1@AR')
global_accar = dfm.groupby('detector')['Acc@AR_norm'].mean().sort_values(ascending=False).rename('GlobalScore_Acc@AR')

display(global_f1ar.to_frame())
display(global_accar.to_frame())


In [None]:
# 6) Export report tables
from pathlib import Path
report_dir = Path('driftlab/results/report')
report_dir.mkdir(parents=True, exist_ok=True)
df.to_csv(report_dir/'raw_combined.csv', index=False)
dfm.to_csv(report_dir/'with_F1@AR_and_Acc@AR.csv', index=False)
global_f1ar.to_csv(report_dir/'global_F1@AR_scores.csv')
global_accar.to_csv(report_dir/'global_Acc@AR_scores.csv')
print('Saved to', report_dir.resolve())
