# 02 — Marker QC & seizure intervals (ds003029)

Mục tiêu:
- Kiểm tra consistency của seizure markers trong `*_events.tsv`
- Tạo bảng intervals an toàn cho labeling (multi-seizure / pairing)

Đầu vào:
- `eda_outputs/ds003029_run_summary.csv` (từ notebook 01)

Đầu ra (trong `eda_outputs/`):
- `ds003029_marker_qc_by_run.csv`
- `ds003029_seizure_intervals_by_run.csv`
- `ds003029_trial_type_*_vocab.csv`

In [None]:
from __future__ import annotations

import sys
from pathlib import Path

import pandas as pd

ws = Path.cwd().resolve()
src_dir = ws / 'src'
if not src_dir.exists() and (ws.parent / 'src').exists():
    ws = ws.parent
src_dir = (ws / 'src').resolve()
sys.path.insert(0, str(src_dir))

from ds003029_eda.paths import get_paths
from ds003029_eda.marker_qc import build_marker_qc, export_marker_qc

paths = get_paths()
run_summary_csv = paths.outputs_dir / 'ds003029_run_summary.csv'
assert run_summary_csv.exists(), f'Missing {run_summary_csv}. Run notebook 01 first.'

run_summary = pd.read_csv(run_summary_csv)
events_paths = run_summary[['base','events_tsv']].copy() if 'events_tsv' in run_summary.columns else run_summary[['base']].assign(events_tsv='')
events_paths = events_paths[events_paths['events_tsv'].astype(str).str.len() > 0].copy()
print('runs in run_summary:', len(run_summary))
print('runs with events.tsv:', len(events_paths))

In [None]:
outputs = build_marker_qc(events_paths)
export_marker_qc(paths.outputs_dir, outputs)

per_run = outputs.per_run
intervals = outputs.intervals

print('events.tsv readable:', int(per_run['events_exists'].sum()))
ok = per_run[per_run['events_exists'] == True]
if len(ok) > 0:
    print('runs with >=1 onset:', int((ok['n_onset_markers'] > 0).sum()))
    print('runs with >=1 offset:', int((ok['n_offset_markers'] > 0).sum()))
    print('runs with paired intervals >=1:', int((ok['n_intervals_paired'] > 0).sum()))
    print('multi-seizure candidates:', int(ok['multi_seizure_candidate'].sum()))
    print('has unpaired onset:', int(ok['has_unpaired_onset'].sum()))
    print('has orphan offset:', int(ok['has_orphan_offset'].sum()))

display(per_run.head())
display(intervals.head())

print('Wrote:', (paths.outputs_dir / 'ds003029_marker_qc_by_run.csv').resolve())
print('Wrote:', (paths.outputs_dir / 'ds003029_seizure_intervals_by_run.csv').resolve())