In [1]:
!jt -t oceans16 -T -N -kl

In [2]:
!del /Q utils\__pycache__

In [3]:
import pandas as pd
from tqdm.notebook import tqdm

In [4]:
# from adtk.metrics import precision, recall, f1_score

In [5]:
from utils.metrics import time_span_metrics

In [6]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset, TepKasperskyDataset
from utils.watchmen import LimitWatchman, LimitPcaWatchman, SpePcaWatchman
from utils.custom_plots import plot_stacked

In [7]:
SEED = 31

# Preparing datasets

In [8]:
datasets = {
    0: GhlKasperskyDataset(),
    1: TepHarvardDataset(),
    2: TepKasperskyDataset(),
}

# Preparing watchmen

## LimitWatchman

In [9]:
# one for every dataset
limit_watchmen = {
    0: LimitWatchman(),
    1: LimitWatchman(ewma='3 min'),
    2: LimitWatchman(),
}

## LimitPcaWatchman

In [10]:
# one for every dataset
limit_pca_watchmen = {
    0: LimitPcaWatchman(n_components=1),
    1: LimitPcaWatchman(n_components=12),
    2: LimitPcaWatchman(n_components=3),
}

In [11]:
for d in datasets:
    datasets[d].shake_not_stir(random_state=SEED)
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_scaler(data)
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

fit scaler on dataset 2: 0it [00:00, ?it/s]

fit pca on dataset 2: 0it [00:00, ?it/s]

## SpePcaWatchman

In [12]:
# one for every dataset
spe_pca_watchmen = {
    0: SpePcaWatchman(n_components=1),
    1: SpePcaWatchman(n_components=12),
    2: SpePcaWatchman(n_components=3),
}

In [13]:
for d in datasets:
    datasets[d].shake_not_stir(random_state=SEED)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_scaler(data)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

fit scaler on dataset 2: 0it [00:00, ?it/s]

fit pca on dataset 2: 0it [00:00, ?it/s]

## Bring them all

In [14]:
watchmen = {
    0: limit_watchmen,
    1: limit_pca_watchmen,
    2: spe_pca_watchmen,
}

# Learning

In [15]:
for d in datasets:
    datasets[d].shake_not_stir(random_state=SEED)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'learn dataset {d}'):
        for w in watchmen:
            watchmen[w][d].partial_fit(data)

learn dataset 0: 0it [00:00, ?it/s]

learn dataset 1: 0it [00:00, ?it/s]

learn dataset 2: 0it [00:00, ?it/s]

# Examine

In [16]:
exam_index = [watchmen[w][0].__class__.__name__ for w in watchmen]
exam_metric = ['precision', 'recall', 'f1_score']
exam_table = {d: pd.DataFrame(index=exam_index, columns=exam_metric) for d in datasets}

In [17]:
for d in datasets:
    datasets[d].shake_not_stir(random_state=SEED)
    exam_list = {w: pd.DataFrame(columns=['precision', 'recall', 'f1_score'], dtype='float') for w in watchmen}
    for data, faults, info in tqdm(datasets[d].test_generator(), desc=f'examine dataset {d}'):
        for w in watchmen:
            detect = watchmen[w][d].predict(data)
            exam_list[w].loc[info, exam_metric] = time_span_metrics(faults, detect)
    for w in watchmen:
        exam_table[d].loc[exam_index[w], exam_metric] = exam_list[w][exam_metric].mean()

examine dataset 0: 0it [00:00, ?it/s]

examine dataset 1: 0it [00:00, ?it/s]

examine dataset 2: 0it [00:00, ?it/s]

In [18]:
for d in datasets:
    print(datasets[d])
    display(exam_table[d])
    print()

GhlKasperskyDataset(E:\Datasets\GHL)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.32572,0.643899,0.378634
LimitPcaWatchman,0.5,0.382242,0.426135
SpePcaWatchman,,0.0,0.0



TepHarvardDataset(E:\Datasets\TEP\dataverse)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.989649,0.64215,0.676454
LimitPcaWatchman,0.991836,0.499568,0.539571
SpePcaWatchman,1.0,0.559246,0.595541



TepKasperskyDataset(E:\Datasets\TEP\kaspersky)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.749245,0.435298,0.406428
LimitPcaWatchman,0.906977,0.243597,0.245449
SpePcaWatchman,0.976389,0.245598,0.253069



