In [1]:
!del /Q utils\__pycache__

In [2]:
import pandas as pd
from tqdm.notebook import tqdm

In [3]:
# from adtk.metrics import precision, recall, f1_score

In [4]:
from utils.metrics import time_span_metrics

In [5]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset
from utils.watchmen import LimitWatchman, LimitPcaWatchman, SpePcaWatchman
from utils.custom_plots import plot_stacked

# Preparing datasets

## GHL

In [6]:
ghl = GhlKasperskyDataset()
ghl

GhlKasperskyDataset(E:\Datasets\GHL)

## TEP Harvard

In [7]:
tep1 = TepHarvardDataset()
tep1

TepHarvardDataset(E:\Datasets\TEP\dataverse)

## Bring them all

In [8]:
datasets = {
    0: ghl,
    1: tep1,
}

# Preparing watchmen

## LimitWatchman

In [9]:
# one for every dataset
limit_watchmen = {
    0: LimitWatchman(),
    1: LimitWatchman(ewma='3 min'),
}

## LimitPcaWatchman

In [10]:
# one for every dataset
limit_pca_watchmen = {
    0: LimitPcaWatchman(n_components=1),
    1: LimitPcaWatchman(n_components=12),
}

In [11]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_scaler(data)
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

## SpePcaWatchman

In [12]:
# one for every dataset
spe_pca_watchmen = {
    0: SpePcaWatchman(n_components=1),
    1: SpePcaWatchman(n_components=12),
}

In [13]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_scaler(data)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

## Bring them all

In [14]:
watchmen = {
    0: limit_watchmen,
    1: limit_pca_watchmen,
    2: spe_pca_watchmen,
}

# Learning

In [15]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'learn dataset {d}'):
        for w in watchmen:
            watchmen[w][d].learn(data)

learn dataset 0: 0it [00:00, ?it/s]

learn dataset 1: 0it [00:00, ?it/s]

# Examine

In [16]:
exam_index = [watchmen[w][0].__class__.__name__ for w in watchmen]
exam_metric = ['precision', 'recall', 'f1_score']
exam_table = {d: pd.DataFrame(index=exam_index, columns=exam_metric) for d in datasets}

In [17]:
for d in datasets:
    datasets[d].shake_not_stir()
    exam_list = {w: pd.DataFrame(columns=['precision', 'recall', 'f1_score'], dtype='float') for w in watchmen}
    for data, faults, info in tqdm(datasets[d].test_generator(), desc=f'examine dataset {d}'):
        for w in watchmen:
            detect = watchmen[w][d].examine(data) # dataframe or series
            exam_list[w].loc[info, exam_metric] = time_span_metrics(faults, detect)
    for w in watchmen:
        exam_table[d].loc[exam_index[w], exam_metric] = exam_list[w][exam_metric].mean()

examine dataset 0: 0it [00:00, ?it/s]

examine dataset 1: 0it [00:00, ?it/s]

In [18]:
for d in datasets:
    print(datasets[d])
    display(exam_table[d])
    print()

GhlKasperskyDataset(E:\Datasets\GHL)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.32572,0.643899,0.378634
LimitPcaWatchman,0.5,0.382242,0.426135
SpePcaWatchman,,0.0,0.0



TepHarvardDataset(E:\Datasets\TEP\dataverse)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.895785,0.654288,0.630264
LimitPcaWatchman,0.914738,0.531437,0.530534
SpePcaWatchman,1.0,0.593725,0.627698



