In [1]:
import warnings

warnings.filterwarnings("ignore")

import os
import sys

dir2 = os.path.abspath("")
dir1 = os.path.dirname(dir2)
if dir1 not in sys.path:
    sys.path.append(dir1)

os.chdir("..")

In [6]:
import numpy as np
import yaml
from src.datasets.datasets import CPDDatasets
from src.ensembles.ensembles import EnsembleCPDModel
from src.metrics.evaluation_pipelines import (
    all_cusums_evaluation_pipeline,
    evaluation_pipeline,
)
from src.metrics.metrics_utils import (
    compute_stds,
)
from torch.utils.data import DataLoader

%load_ext autoreload
%autoreload
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# HAR BCE

In [3]:
model_type = "seq2seq"

experiments_name = "human_activity"

path_to_config = "configs/" + experiments_name + "_" + model_type + ".yaml"

with open(path_to_config, "r") as f:
    args_config = yaml.safe_load(f.read())

args_config["experiments_name"] = experiments_name
args_config["model_type"] = model_type

args_config["loss_type"] = "bce"
args_config["num_workers"] = 2
# args_config["learning"]["gpus"] = 1

# args_config["learning"]["epochs"] = 100

In [4]:
train_dataset, test_dataset = CPDDatasets(experiments_name).get_dataset_()
test_dataloader = DataLoader(
    test_dataset, batch_size=args_config["learning"]["batch_size"], shuffle=False
)

In [5]:
# path_to_models_folder = "saved_models/bce/explosion/sgld_adam"

path_to_models_folder = "saved_models/bce/human_activity/full_sample"

ens_bce = EnsembleCPDModel(args_config, n_models=10, boot_sample_size=None)
ens_bce.load_models_list(path_to_models_folder)

In [12]:
threshold_number = 100
threshold_list = np.linspace(-5, 5, threshold_number)
threshold_list = 1 / (1 + np.exp(-threshold_list))
threshold_list = [-0.001] + list(threshold_list) + [1.001]

metrics, (max_th_f1_margins_dict, max_f1_margins_dic), _, _ = evaluation_pipeline(
    ens_bce,
    test_dataloader,
    threshold_list,
    device="cpu",
    model_type="ensemble",
    verbose=True,
    margin_list=[1, 2, 4],
)

  0%|          | 0/21 [00:00<?, ?it/s]

  5%|▍         | 1/21 [00:00<00:02,  9.01it/s]

Collectting model's outputs


100%|██████████| 21/21 [00:01<00:00, 16.95it/s]


TN: 0, FP: 1313, FN: 0, TP: 24, DELAY:0.0, FP_DELAY:0.0, COVER: 0.6667726252804786
TN: 0, FP: 1313, FN: 0, TP: 24, DELAY:0.0, FP_DELAY:0.0, COVER: 0.6667726252804786
TN: 0, FP: 1313, FN: 0, TP: 24, DELAY:0.0, FP_DELAY:0.0, COVER: 0.6667726252804786
TN: 0, FP: 1313, FN: 0, TP: 24, DELAY:0.0, FP_DELAY:0.0, COVER: 0.6667726252804786
TN: 0, FP: 1313, FN: 0, TP: 24, DELAY:0.0, FP_DELAY:0.0, COVER: 0.6667726252804786
TN: 0, FP: 1306, FN: 0, TP: 31, DELAY:0.0, FP_DELAY:0.052356019616127014, COVER: 0.6693305908750935
TN: 1, FP: 939, FN: 0, TP: 397, DELAY:0.0, FP_DELAY:2.6335079669952393, COVER: 0.7798021087729445
TN: 2, FP: 670, FN: 0, TP: 665, DELAY:0.0, FP_DELAY:5.219895362854004, COVER: 0.8735963463658389
TN: 6, FP: 582, FN: 0, TP: 749, DELAY:0.0, FP_DELAY:5.987285137176514, COVER: 0.9046518215099233
TN: 7, FP: 530, FN: 0, TP: 800, DELAY:0.0, FP_DELAY:6.463724613189697, COVER: 0.918633507070319
TN: 11, FP: 489, FN: 0, TP: 837, DELAY:0.0, FP_DELAY:6.753178596496582, COVER: 0.9295027987353394

In [16]:
compute_stds(
    ens_bce, test_dataloader, half_windows_list=[1, 2, 3, 4], verbose=True, device="cpu"
);

  5%|▍         | 1/21 [00:00<00:02,  9.95it/s]

Computing model's outputs...


100%|██████████| 21/21 [00:01<00:00, 16.46it/s]


Half-window: 1
CP stds list:
Mean = 0.03207298448696197, number is 1141
Normal stds list:
Mean = 0.0164996036672979, number is 1304
p_val analytical = 5.307543072958713e-16, p_val permutational = 9.999000099990002e-05
Stds are not statistically equal
--------------------------------------------------
Half-window: 2
CP stds list:
Mean = 0.026167749766199982, number is 1141
Normal stds list:
Mean = 0.01637909608475911, number is 1268
p_val analytical = 3.2219743337247323e-07, p_val permutational = 9.999000099990002e-05
Stds are not statistically equal
--------------------------------------------------
Half-window: 3
CP stds list:
Mean = 0.023172813579505416, number is 1141
Normal stds list:
Mean = 0.016361490972081408, number is 1224
p_val analytical = 0.0004631365179694354, p_val permutational = 0.0004999500049995
Stds are not statistically equal
--------------------------------------------------
Half-window: 4
CP stds list:
Mean = 0.02135552823568852, number is 1141
Normal stds list:
M

In [None]:
res = all_cusums_evaluation_pipeline(
    threshold_number=50,
    test_dataloader=test_dataloader,
    margin_list=[1, 2, 4],
    args_config=args_config,
    n_models=10,
    save_path=path_to_models_folder,
    # var_coeff=1.0,
    device="cpu",
    verbose=True,
    write_metrics_filename=None,
    min_th_quant=0.1,
    max_th_quant=0.9,
)

In [26]:
normal_sigma, cp_sigma, half_window = args_config["cusum"].values()

global_sigma = normal_sigma
lambda_null = 1.0 / cp_sigma**2
lambda_inf = 1.0 / normal_sigma**2

In [38]:
test_out_bank, test_uncertainties_bank, test_labels_bank = (
    collect_model_predictions_on_set(
        ens_bce, test_dataloader, model_type="ensemble", device="cpu"
    )
)

out_dataset = OutputDataset(test_out_bank, test_uncertainties_bank, test_labels_bank)
out_dataloader = DataLoader(
    out_dataset, batch_size=128, shuffle=True
)  # batch size does not matter

 10%|▉         | 2/21 [00:00<00:01, 14.46it/s]

Collectting model's outputs


100%|██████████| 21/21 [00:01<00:00, 18.54it/s]


In [37]:
test_cusum_model = CusumEnsembleCPDModel(
    args=args_config,
    n_models=10,
    global_sigma=global_sigma,
    boot_sample_size=None,
    train_anomaly_num=None,
    cusum_threshold=0.0,
    cusum_mode="old",
    conditional=True,
    lambda_null=lambda_null,
    lambda_inf=lambda_inf,
    half_wnd=half_window,
    var_coeff=1.0,
)

(out_series_batch, out_series_std_batch), _ = next(iter(out_dataloader))

min_th, max_th = estimate_threshold_range(
    model=test_cusum_model,
    out_series_batch=out_series_batch,
    out_series_std_batch=out_series_std_batch,
    quant_min=0.1,
    quant_max=0.9,
)

min_th, max_th

(0.0, 90.84622192382812)