In [1]:
!del /Q utils\__pycache__

In [2]:
import pandas as pd
from tqdm.notebook import tqdm

In [3]:
from adtk.metrics import precision, recall, f1_score

In [4]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset
from utils.watchmen import LimitWatchman, LimitPcaWatchman, SpePcaWatchman
from utils.custom_plots import plot_stacked

# Preparing datasets

## GHL

In [5]:
ghl = GhlKasperskyDataset()
ghl

GhlKasperskyDataset(E:\Datasets\GHL)

## TEP Harvard

In [6]:
tep1 = TepHarvardDataset()
tep1

TepHarvardDataset(E:\Datasets\TEP\dataverse)

## Bring them all

In [7]:
datasets = {
    0: ghl,
    1: tep1,
}

# Preparing watchmen

## LimitWatchman

In [8]:
# using datasets keys
limit_watchmen = {
    0: LimitWatchman(),
    1: LimitWatchman(ewma='3 min'),
}

## LimitPcaWatchman

In [9]:
# using datasets keys
limit_pca_watchmen = {
    0: LimitPcaWatchman(n_components=1),
    1: LimitPcaWatchman(n_components=12),
}

In [10]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_scaler(data)
    for data, faults, info in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        limit_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

## SpePcaWatchman

In [11]:
# using datasets keys
spe_pca_watchmen = {
    0: SpePcaWatchman(n_components=1),
    1: SpePcaWatchman(n_components=12),
}

In [12]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit scaler on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_scaler(data)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit pca on dataset {d}'):
        spe_pca_watchmen[d].partial_fit_pca(data)        

fit scaler on dataset 0: 0it [00:00, ?it/s]

fit pca on dataset 0: 0it [00:00, ?it/s]

fit scaler on dataset 1: 0it [00:00, ?it/s]

fit pca on dataset 1: 0it [00:00, ?it/s]

## Bring them all

In [13]:
watchmen = {
    0: limit_watchmen,
    1: limit_pca_watchmen,
    2: spe_pca_watchmen,
}

# Learning

In [14]:
for d in datasets:
    datasets[d].shake_not_stir()
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'learn dataset {d}'):
        for w in watchmen:
            watchmen[w][d].learn(data)

learn dataset 0: 0it [00:00, ?it/s]

learn dataset 1: 0it [00:00, ?it/s]

# Examine

In [15]:
exam_index = [watchmen[w][0].__class__.__name__ for w in watchmen]
exam_metric = ['precision', 'recall', 'f1_score']
exam_table = {d: pd.DataFrame(index=exam_index, columns=exam_metric) for d in datasets}

In [16]:
for d in datasets:
    datasets[d].shake_not_stir()
    exam_list = {w: pd.DataFrame(columns=['precision', 'recall', 'f1_score'], dtype='float') for w in watchmen}
    for data, faults, info in tqdm(datasets[d].test_generator(), desc=f'examine dataset {d}'):
        for w in watchmen:
            detect = watchmen[w][d].examine(data) # dataframe or series
            if isinstance(detect, pd.DataFrame):
                detect = detect.sum(axis=1) # dataframe to series
            exam_list[w].loc[info, 'precision'] = precision(faults, detect)
            exam_list[w].loc[info, 'recall'] = recall(faults, detect)
            exam_list[w].loc[info, 'f1_score'] = f1_score(faults, detect)
    for w in watchmen:
        exam_table[d].loc[exam_index[w], 'precision'] = exam_list[w]['precision'].mean()
        exam_table[d].loc[exam_index[w], 'recall'] = exam_list[w]['recall'].mean()
        exam_table[d].loc[exam_index[w], 'f1_score'] = exam_list[w]['f1_score'].mean()

examine dataset 0: 0it [00:00, ?it/s]

examine dataset 1: 0it [00:00, ?it/s]

In [18]:
for d in datasets:
    print(datasets[d])
    display(exam_table[d])

GhlKasperskyDataset(E:\Datasets\GHL)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.354426,0.35489,0.362392
LimitPcaWatchman,0.5,0.2572,0.664033
SpePcaWatchman,,0.0,


TepHarvardDataset(E:\Datasets\TEP\dataverse)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.896402,0.623058,0.748686
LimitPcaWatchman,0.914768,0.490085,0.64627
SpePcaWatchman,1.0,0.54021,0.723497
