# Lightning Detection with NCD
This notebook demonstrates compression-based lightning detection using **Normalised Compression Distance** (NCD). We also compare a simple amplitude-threshold baseline.

In [None]:
import json, numpy as np, matplotlib.pyplot as plt
from pathlib import Path
from pandas import Series
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
from tqdm import tqdm
import seaborn as sns
from leela_ml.datamodules_npy import StrikeDataset
from leela_ml.ncd import ncd_adjacent, ncd_first


## 1. Generate synthetic data

In [None]:
from leela_ml.signal_sim.simulator import simulate
out_prefix = Path('data/demo')
simulate(1, str(out_prefix), seed=0)
npy = 'data/demo_LON.npy'
meta = 'data/demo_meta.json'


## 2. Load dataset

In [None]:
ds = StrikeDataset(npy, meta, chunk_size=512, overlap=0.9)
win = ds._windows.astype(np.float32, copy=False)
lab = ds.labels.astype(bool)
fs = ds.fs; hop = ds.hop
print("windows", ds.n_win, "positives", int(lab.sum()))


## 3. NCD computation

In [None]:
err = ncd_adjacent(win, per_win_norm=True)
win_len = max(1, int(0.01 * fs / hop))
thr = Series(err).rolling(win_len, center=True, min_periods=1).median() + 6*Series(err).rolling(win_len, center=True, min_periods=1).apply(lambda v: np.median(np.abs(v-np.median(v))), raw=True)
mask = err > thr.values
tn, fp, fn, tp = confusion_matrix(lab, mask).ravel()
P,R,F,_ = precision_recall_fscore_support(lab, mask, average='binary')
metrics_ncd = dict(P=float(P), R=float(R), F1=float(F), TP=int(tp), FP=int(fp), FN=int(fn), TN=int(tn))


In [None]:
err_first = ncd_first(win, baseline_idx=0, per_win_norm=True)
thr_first = Series(err_first).rolling(win_len, center=True, min_periods=1).median() + 6*Series(err_first).rolling(win_len, center=True, min_periods=1).apply(lambda v: np.median(np.abs(v-np.median(v))), raw=True)
mask_first = err_first > thr_first.values
tn, fp, fn, tp = confusion_matrix(lab, mask_first).ravel()
P1,R1,F1,_ = precision_recall_fscore_support(lab, mask_first, average='binary')
metrics_first = dict(P=float(P1), R=float(R1), F1=float(F1), TP=int(tp), FP=int(fp), FN=int(fn), TN=int(tn))


## 4. Simple amplitude threshold baseline

In [None]:
amp = np.sqrt((win**2).mean(axis=1))
thr_amp = Series(amp).rolling(win_len, center=True, min_periods=1).median() + 6*Series(amp).rolling(win_len, center=True, min_periods=1).apply(lambda v: np.median(np.abs(v-np.median(v))), raw=True)
mask_amp = amp > thr_amp.values
tn, fp, fn, tp = confusion_matrix(lab, mask_amp).ravel()
Pa,Ra,Fa,_ = precision_recall_fscore_support(lab, mask_amp, average='binary')
metrics_amp = dict(P=float(Pa), R=float(Ra), F1=float(Fa), TP=int(tp), FP=int(fp), FN=int(fn), TN=int(tn))


## 5. Compare

In [None]:
print('NCD metrics', metrics_ncd)
print('Amplitude metrics', metrics_amp)


### Plot NCD and baseline

In [None]:
plt.figure(figsize=(15,4))
plt.plot(err, label='NCD', lw=0.4)
plt.plot(thr, '--', label='threshold', lw=0.8)
plt.legend(); plt.title('NCD curve')
