In [1]:
!del /Q utils\__pycache__

In [1]:
import pandas as pd
from tqdm.notebook import tqdm

In [3]:
from utils.datasets import GhlKasperskyDataset, TepHarvardDataset, TepKasperskyDataset, SwatItrustDataset
from utils.metrics import time_span_metrics
from utils.watchmen import WatchSquad

# Preparing datasets

In [4]:
datasets = {
    0: GhlKasperskyDataset(),
    1: TepHarvardDataset(),
    2: TepKasperskyDataset(),
    3: SwatItrustDataset(),
}

In [5]:
for d in datasets:
    datasets[d].shake_not_stir()

# Preparing watchhouse

In [6]:
squads = {
    0: WatchSquad(),
    1: WatchSquad(),
    2: WatchSquad(),
    3: WatchSquad(),
}

## you can fit ...

In [None]:
for d in tqdm(datasets):
    for data, _, _ in datasets[d].train_generator():
        squads[d].prefit(data)

In [None]:
for d in tqdm(datasets):
    for data, _, _ in datasets[d].train_generator():
        squads[d].fit(data)

In [None]:
for d in tqdm(datasets):
    for data, _, _ in datasets[d].train_generator():
        squads[d].postfit(data)

In [None]:
for d in tqdm(datasets):
    squads[d].dump(f'{datasets[d].__class__.__name__}')

## ... or just loading

In [8]:
for d in tqdm(datasets):
    squads[d].load(f'{datasets[d].__class__.__name__}')

  0%|          | 0/4 [00:00<?, ?it/s]

# Examine

## Throw stones

In [10]:
stones = dict()
for d in datasets:
    stones[d] = dict()
    for w in watchhouse[d]:
        stones[d][w] = list()

In [None]:
for d in datasets:
    for data, faults, info in tqdm(datasets[d].test_generator(), desc=f'Detect on test {d}'):
        for w in watchhouse[d]:
            detect = watchhouse[d][w].predict(data)
            stones[d][w].append(pd.concat([faults, detect], axis=1))
            stones[d][w][-1].index.name = info

Detect on test 0:   0%|          | 0/48 [00:00<?, ?it/s]

Detect on test 1:   0%|          | 0/10500 [00:00<?, ?it/s]

## Results

In [None]:
metrics = ('precision', 'recall', 'f1_score')
results = pd.DataFrame(columns=metrics)

### Individual

In [None]:
for d in datasets:
    exam_paper = pd.DataFrame(columns=metrics)
    for i_st, st in enumerate(stones[d][w]):
        exam_paper.loc[i_st, metrics] = time_span_metrics(st.iloc[:, 0], st.iloc[:, 1:])
    results[str(datasets[d])].loc[str(watchhouse[d][w]), metrics] = exam_paper.mean().values

### Let's see

In [None]:
for d in results:
    print(d)
    display(results[d].sort_values(by='f1_score', ascending=False))