In [12]:
!del /Q utils\__pycache__

In [13]:
import pandas as pd
from tqdm.notebook import tqdm

In [14]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset, TepKasperskyDataset, SwatItrustDataset
from utils.custom_plots import plot_stacked
from utils.metrics import time_span_metrics
from utils.watchmen import LimitWatchman, LimitPcaWatchman, SpePcaWatchman, IsolatingWatchman, LinearPredictWatchman

In [15]:
SEED = 1729

# Preparing datasets

In [16]:
datasets = {
    0: GhlKasperskyDataset(),
    1: TepHarvardDataset(),
    2: TepKasperskyDataset(),
    3: SwatItrustDataset(),
}

In [19]:
shake_kwargs = {
    'random_state': SEED,
    'valid_test_ratio': 0.3,
}

# Preparing watchmen

In [17]:
watchmen = {
    0: {  # GhlKasperskyDataset
        0: LimitWatchman(),
        1: LimitPcaWatchman(n_components=3),
        2: SpePcaWatchman(n_components=3),
        3: IsolatingWatchman(max_samples=256, random_state=SEED),
        4: LinearPredictWatchman(random_state=SEED),
    },
    1: {  # TepHarvardDataset
        0: LimitWatchman(ewma='3 min'),
        1: LimitPcaWatchman(n_components=12),
        2: SpePcaWatchman(n_components=12),
        3: IsolatingWatchman(max_samples=256, random_state=SEED),
        4: LinearPredictWatchman(random_state=SEED),
    },
    2: {  # TepKasperskyDataset
        0: LimitWatchman(),
        1: LimitPcaWatchman(n_components=3),
        2: SpePcaWatchman(n_components=3),
        3: IsolatingWatchman(max_samples=1024, random_state=SEED),
        4: LinearPredictWatchman(random_state=SEED),
    },
    3: {  # SwatItrustDataset
        0: LimitWatchman(),
        1: LimitPcaWatchman(n_components=3),
        2: SpePcaWatchman(n_components=3),
        3: IsolatingWatchman(max_samples=256, random_state=SEED),
        4: LinearPredictWatchman(random_state=SEED),
    },
}

# Learning

## Prefit

In [20]:
for d in datasets:
    datasets[d].shake_not_stir(shake_kwargs)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'Prefit on train {d}'):
        for w in watchmen[d]:
            watchmen[d][w].prefit(data)

Prefit on train 0: 0it [00:00, ?it/s]

Prefit on train 1: 0it [00:00, ?it/s]

Prefit on train 2: 0it [00:00, ?it/s]

Prefit on train 3: 0it [00:00, ?it/s]

## Fit

In [21]:
for d in datasets:
    datasets[d].shake_not_stir(shake_kwargs)
    for data, _, _ in tqdm(datasets[d].train_generator(), desc=f'Fit on train {d}'):
        for w in watchmen[d]:
            watchmen[d][w].partial_fit(data)

Fit on train 0: 0it [00:00, ?it/s]

Fit on train 1: 0it [00:00, ?it/s]

Fit on train 2: 0it [00:00, ?it/s]

Fit on train 3: 0it [00:00, ?it/s]

# Examine

## Throw stones

In [39]:
stones = dict()
for d in datasets:
    stones[d] = dict()
    for w in watchmen[d]:
        stones[d][w] = list()

In [24]:
for d in datasets:
    datasets[d].shake_not_stir(shake_kwargs)
    for data, faults, info in tqdm(datasets[d].valid_generator(), desc=f'Detect on validation {d}'):
        for w in watchmen[d]:
            detect = watchmen[d][w].predict(data)
            stones[d][w].append(pd.concat([faults, detect], axis=1))
            stones[d][w][-1].index.name = info

Detect on validation 0: 0it [00:00, ?it/s]

Detect on validation 1: 0it [00:00, ?it/s]

KeyboardInterrupt: 

## Individual results

In [None]:
indi_results = dict()
for d in datasets:
    exam_paper = 
    for w in watchmen[d]:
        exam_paper = exam_paper.assign()
        for st in stones[d][w]:
            exam_paper.iloc[st.index.name, w] = time_span_metrics(st.iloc[:, 0], st.iloc[:, 1:])
            
    indi_results[str(datasets[d])] = 


In [19]:
for d in indi_results:
    print(d)
    display(indi_results[d])

GhlKasperskyDataset(E:\Datasets\GHL)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.32572,0.643899,0.378634
LimitPcaWatchman,0.369174,0.383978,0.355303
SpePcaWatchman,0.0,0.0,0.0
IsolatingWatchman,0.016294,0.970982,0.031934
LinearPredictWatchman,0.013827,1.0,0.027189



TepKasperskyDataset(E:\Datasets\TEP\kaspersky)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.742059,0.433985,0.404575
LimitPcaWatchman,0.788462,0.243649,0.235529
SpePcaWatchman,0.976871,0.245566,0.253022
IsolatingWatchman,0.075873,1.0,0.116781
LinearPredictWatchman,0.352244,0.478348,0.250919



SwatItrustDataset(E:\Datasets\SWaT\dataset12)


Unnamed: 0,precision,recall,f1_score
LimitWatchman,0.3,1.0,0.461538
LimitPcaWatchman,0.285714,0.333333,0.307692
SpePcaWatchman,,0.0,0.0
IsolatingWatchman,0.3,1.0,0.461538
LinearPredictWatchman,0.25,0.666667,0.363636





In [16]:
exam_index = [watchmen[w][0].__class__.__name__ for w in watchmen]
exam_metric = ['precision', 'recall', 'f1_score']
exam_table = {d: pd.DataFrame(index=exam_index, columns=exam_metric) for d in datasets}