In [1]:
!del /Q utils\__pycache__

In [2]:
import pandas as pd
from tqdm.notebook import tqdm

In [3]:
from utils.metrics import time_span_metrics

In [4]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset, TepKasperskyDataset, SwatItrustDataset
from utils.watchmen import LimitWatchman, LimitPcaWatchman, SpePcaWatchman, IsolatingWatchman, LinearPredictWatchman
from utils.custom_plots import plot_stacked

In [5]:
SEED = 1729

# Preparing datasets

In [6]:
datasets = {
    0: GhlKasperskyDataset(),
#     1: TepHarvardDataset(),
    2: TepKasperskyDataset(),
    3: SwatItrustDataset(),
}

# Preparing watchmen

## LimitWatchman

In [7]:
# one for every dataset
limit_watchmen = {
    0: LimitWatchman(),
    1: LimitWatchman(ewma='3 min'),
    2: LimitWatchman(),
    3: LimitWatchman(),
}

## LimitPcaWatchman

In [8]:
# one for every dataset
limit_pca_watchmen = {
    0: LimitPcaWatchman(n_components=3),
    1: LimitPcaWatchman(n_components=12),
    2: LimitPcaWatchman(n_components=3),
    3: LimitPcaWatchman(n_components=3),
}

## SpePcaWatchman

In [9]:
# one for every dataset
spe_pca_watchmen = {
    0: SpePcaWatchman(n_components=3),
    1: SpePcaWatchman(n_components=12),
    2: SpePcaWatchman(n_components=3),
    3: SpePcaWatchman(n_components=3),
}

## Isolating Watchman

In [10]:
# one for every dataset
isolating_watchmen = {
    0: IsolatingWatchman(max_samples=256, random_state=SEED),
    1: IsolatingWatchman(max_samples=256, random_state=SEED),
    2: IsolatingWatchman(max_samples=1024, random_state=SEED),
    3: IsolatingWatchman(max_samples=256, random_state=SEED),
}

## Linear Predict Watchman

In [11]:
# one for every dataset
linear_predict_watchmen = {
    0: LinearPredictWatchman(random_state=SEED),
    1: LinearPredictWatchman(random_state=SEED),
    2: LinearPredictWatchman(random_state=SEED),
    3: LinearPredictWatchman(random_state=SEED),
}

## Bring them all

In [12]:
watchmen = {
    0: limit_watchmen,
    1: limit_pca_watchmen,
    2: spe_pca_watchmen,
    3: isolating_watchmen,
    4: linear_predict_watchmen,
}

# Learning

## Prefit

In [13]:
for d in datasets:
    datasets[d].shake_not_stir(valid_test_ratio=0.0, random_state=SEED)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'prefit on dataset {d}'):
        for w in watchmen:
            watchmen[w][d].prefit(data)

prefit on dataset 0: 0it [00:00, ?it/s]

prefit on dataset 2: 0it [00:00, ?it/s]

prefit on dataset 3: 0it [00:00, ?it/s]

## Fit

In [14]:
for d in datasets:
    datasets[d].shake_not_stir(valid_test_ratio=0.0, random_state=SEED)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'fit on dataset {d}'):
        for w in watchmen:
            watchmen[w][d].partial_fit(data)

fit on dataset 0: 0it [00:00, ?it/s]

fit on dataset 2: 0it [00:00, ?it/s]

fit on dataset 3: 0it [00:00, ?it/s]

# Examine

In [15]:
# СДЕЛАТЬ ТАБЛИЦУ ИНАЧЕ !!

In [16]:
exam_index = [watchmen[w][0].__class__.__name__ for w in watchmen]
exam_metric = ['precision', 'recall', 'f1_score']
exam_table = {d: pd.DataFrame(index=exam_index, columns=exam_metric) for d in datasets}

In [17]:
for d in datasets:
    datasets[d].shake_not_stir(valid_test_ratio=0.0, random_state=SEED)
    exam_list = {w: pd.DataFrame(columns=['precision', 'recall', 'f1_score'], dtype='float') for w in watchmen}
    for data, faults, info in tqdm(datasets[d].test_generator(), desc=f'examine dataset {d}'):
        for w in watchmen:
            detect = watchmen[w][d].predict(data)
            exam_list[w].loc[info, exam_metric] = time_span_metrics(faults, detect)
    for w in watchmen:
        exam_table[d].loc[exam_index[w], exam_metric] = exam_list[w][exam_metric].mean()

examine dataset 0: 0it [00:00, ?it/s]

examine dataset 2: 0it [00:00, ?it/s]

examine dataset 3: 0it [00:00, ?it/s]

In [19]:
for d in datasets:
    print(datasets[d])
    display(exam_table[d])
    print()

GhlKasperskyDataset(E:\Datasets\GHL)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.32572,0.643899,0.378634
LimitPcaWatchman,0.369174,0.383978,0.355303
SpePcaWatchman,0.0,0.0,0.0
IsolatingWatchman,0.016294,0.970982,0.031934
LinearPredictWatchman,0.013827,1.0,0.027189



TepKasperskyDataset(E:\Datasets\TEP\kaspersky)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.742059,0.433985,0.404575
LimitPcaWatchman,0.788462,0.243649,0.235529
SpePcaWatchman,0.976871,0.245566,0.253022
IsolatingWatchman,0.075873,1.0,0.116781
LinearPredictWatchman,0.352244,0.478348,0.250919



SwatItrustDataset(E:\Datasets\SWaT\dataset12)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.3,1.0,0.461538
LimitPcaWatchman,0.285714,0.333333,0.307692
SpePcaWatchman,,0.0,0.0
IsolatingWatchman,0.3,1.0,0.461538
LinearPredictWatchman,0.25,0.666667,0.363636



