In [1]:
import warnings

warnings.filterwarnings("ignore")

import os
import sys

dir2 = os.path.abspath("")
dir1 = os.path.dirname(dir2)
if dir1 not in sys.path:
    sys.path.append(dir1)

os.chdir("..")

In [2]:
import numpy as np
from scipy.stats import wasserstein_distance, wasserstein_distance_nd

In [6]:
a = np.random.randn(10)
b = np.random.randn(10)

wasserstein_distance(a, b)

0.5098217169106859

In [7]:
c = np.random.randn(10, 4)  # (n_samples, dim_size)
d = np.random.randn(10, 4)

wasserstein_distance_nd(c, d)

2.2806969028909956

In [4]:
e = np.array([[0, 2, 3], [1, 2, 5]])
e.shape

(2, 3)

In [2]:
import itertools

import numpy as np
import yaml
from src.datasets.datasets import AllModelsOutputDataset, CPDDatasets
from src.ensembles.ensembles import (
    EnsembleCPDModel,
)
from src.metrics.evaluation_pipelines import evaluate_distance_ensemble_model
from src.metrics.metrics_utils import collect_model_predictions_on_set
from torch.utils.data import DataLoader

%load_ext autoreload
%autoreload
%matplotlib inline

comet_ml is installed but `COMET_API_KEY` is not set.


# BCE Explosion

In [1]:
import torch

In [4]:
a = torch.randn(10, 50)  # batch_size x seq_len

torch.quantile(a, 0.05)

tensor(-1.5627)

In [3]:
model_type = "seq2seq"

experiments_name = "explosion"

path_to_config = "configs/" + "video" + "_" + model_type + ".yaml"

with open(path_to_config, "r") as f:
    args_config = yaml.safe_load(f.read())

args_config["experiments_name"] = experiments_name
args_config["model_type"] = model_type

args_config["loss_type"] = "bce"
args_config["num_workers"] = 4
args_config["learning"]["gpus"] = 1

args_config["learning"]["epochs"] = 100

In [4]:
train_dataset, test_dataset = CPDDatasets(experiments_name).get_dataset_()
test_dataloader = DataLoader(
    test_dataset, batch_size=args_config["learning"]["batch_size"], shuffle=False
)

In [5]:
# path_to_models_folder = "saved_models/bce/explosion/sgld_adam"

path_to_models_folder = "saved_models/bce/explosion/layer_norm/train_anomaly_num_155"

ens_bce = EnsembleCPDModel(args_config, n_models=10, boot_sample_size=None)
ens_bce.load_models_list(path_to_models_folder)

In [6]:
# collecting outputs
test_out_bank, _, test_labels_bank = collect_model_predictions_on_set(
    ens_bce, test_dataloader, model_type="ensemble_all_models", device="cuda:1"
)

all_models_out_dataset = AllModelsOutputDataset(test_out_bank, test_labels_bank)
all_models_out_dataloader = DataLoader(
    all_models_out_dataset, batch_size=128, shuffle=False
)

preds, labels = all_models_out_dataset[0]
preds.shape, labels.shape

  0%|          | 0/20 [00:00<?, ?it/s]

Collectting model's outputs


100%|██████████| 20/20 [02:40<00:00,  8.01s/it]


(torch.Size([10, 16]), torch.Size([16]))

In [None]:
res_dict = {}

window_size_list = [1, 2, 3]
anchor_window_type_list = ["start", "prev"]

threshold_list = np.linspace(0, 3, 25)

for window_size, anchor_window_type in itertools.product(
    window_size_list, anchor_window_type_list
):
    print(f"window_size = {window_size}, anchor_window_type = {anchor_window_type}")

    res, best_th = evaluate_distance_ensemble_model(
        threshold_list=threshold_list,
        output_dataloader=all_models_out_dataloader,
        margin_list=[1, 2, 4],
        args_config=args_config,
        n_models=10,
        window_size=window_size,
        # kernel="rbf",
        anchor_window_type=anchor_window_type,
        distance="wasserstein_nd",
        save_path=path_to_models_folder,
        device="cpu",
        verbose=False,
    )

    res_dict[(window_size, anchor_window_type)] = res[best_th]

In [17]:
res_dict

{(1, 'start'): (0.0,
  15.7396821975708,
  0.20317460596561432,
  0.72,
  0.9828878974861117,
  0.9828878974861117,
  {1: 0.11764705882352941, 2: 0.5454545454545454, 4: 0.6086956521739131}),
 (1, 'prev'): (0.0,
  15.688888549804688,
  0.1428571492433548,
  0.6923076923076923,
  0.9866544577556483,
  0.9866544577556483,
  {1: 0.5833333333333334, 2: 0.5833333333333334, 4: 0.6923076923076923}),
 (2, 'start'): (0.0,
  15.777777671813965,
  0.21587301790714264,
  0.6956521739130435,
  0.9831601516869374,
  0.9831601516869374,
  {1: 0.0, 2: 0.42105263157894735, 4: 0.5714285714285714}),
 (2, 'prev'): (0.0,
  15.666666984558105,
  0.13650794327259064,
  0.7407407407407407,
  0.9859919131794131,
  0.9859919131794131,
  {1: 0.38095238095238093, 2: 0.64, 4: 0.7857142857142857}),
 (3, 'start'): (0.0,
  15.7619047164917,
  0.22857142984867096,
  0.6666666666666666,
  0.9809333969601828,
  0.9809333969601828,
  {1: 0.0, 2: 0.0, 4: 0.5454545454545454}),
 (3, 'prev'): (0.0,
  15.631746292114258,
  0.1

# BCE Road Accidents

In [3]:
model_type = "seq2seq"

experiments_name = "road_accidents"

path_to_config = "configs/" + "video" + "_" + model_type + ".yaml"

with open(path_to_config, "r") as f:
    args_config = yaml.safe_load(f.read())

args_config["experiments_name"] = experiments_name
args_config["model_type"] = model_type

args_config["loss_type"] = "bce"
args_config["num_workers"] = 4
args_config["learning"]["gpus"] = 1

args_config["learning"]["epochs"] = 100

In [4]:
train_dataset, test_dataset = CPDDatasets(experiments_name).get_dataset_()
test_dataloader = DataLoader(
    test_dataset, batch_size=args_config["learning"]["batch_size"], shuffle=False
)

In [5]:
path_to_models_folder = "saved_models/bce/road_accidents/layer_norm"

ens_bce = EnsembleCPDModel(args_config, n_models=10, boot_sample_size=None)
ens_bce.load_models_list(path_to_models_folder)

In [6]:
# collecting outputs
test_out_bank, _, test_labels_bank = collect_model_predictions_on_set(
    ens_bce, test_dataloader, model_type="ensemble_all_models", device="cuda:1"
)

all_models_out_dataset = AllModelsOutputDataset(test_out_bank, test_labels_bank)
all_models_out_dataloader = DataLoader(
    all_models_out_dataset, batch_size=128, shuffle=False
)

preds, labels = all_models_out_dataset[0]
preds.shape, labels.shape

  0%|          | 0/20 [00:00<?, ?it/s]

Collectting model's outputs


100%|██████████| 20/20 [02:40<00:00,  8.03s/it]


(torch.Size([10, 16]), torch.Size([16]))

In [7]:
res_dict = {}

window_size_list = [1, 2, 3]
anchor_window_type_list = ["prev"]

threshold_list = np.linspace(0, 3, 20)

for window_size, anchor_window_type in itertools.product(
    window_size_list, anchor_window_type_list
):
    print(f"window_size = {window_size}, anchor_window_type = {anchor_window_type}")

    res, best_th = evaluate_distance_ensemble_model(
        threshold_list=threshold_list,
        output_dataloader=all_models_out_dataloader,
        margin_list=[1, 2, 4],
        args_config=args_config,
        n_models=10,
        window_size=window_size,
        # kernel="rbf",
        anchor_window_type=anchor_window_type,
        distance="wasserstein_1d",
        save_path=path_to_models_folder,
        device="cpu",
        verbose=True,
    )

    res_dict[(window_size, anchor_window_type)] = res[best_th]

  0%|          | 0/20 [00:00<?, ?it/s]

window_size = 1, anchor_window_type = prev


100%|██████████| 3/3 [00:00<00:00,  9.51it/s]
100%|██████████| 3/3 [00:00<00:00,  9.44it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.54it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.38it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.33it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.51it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.49it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.54it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.39it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.48it/s]]
100%|██████████| 3/3 [00:00<00:00,  9.41it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.50it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.53it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.23it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.32it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.98it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.37it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.56it/s]t]
100%|██████████| 3/3 [00:00<00:00,  9.64it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.50it/s]t]
100%|██████████| 20/20 [02:11<00:00,  6.56s/it]
  0

Results for best threshold = 0.15789473684210525
AUDC: 0.0, Time to FA: 15.527, DD: 0.1079, F1: 0.5882, Cover: 0.9822, Max Cover: 0.9822
Max F1 with margin 1: 0.5
Max F1 with margin 2: 0.5
Max F1 with margin 4: 0.5882
window_size = 2, anchor_window_type = prev


100%|██████████| 3/3 [00:00<00:00,  7.86it/s]
100%|██████████| 3/3 [00:00<00:00,  7.96it/s]]
100%|██████████| 3/3 [00:00<00:00,  7.64it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.01it/s]]
100%|██████████| 3/3 [00:00<00:00,  7.94it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.03it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.12it/s]]
100%|██████████| 3/3 [00:00<00:00,  7.54it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.00it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.08it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.00it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.00it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.14it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.85it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.48it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.03it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.96it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.83it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.98it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.99it/s]t]
100%|██████████| 20/20 [02:15<00:00,  6.77s/it]
  0

Results for best threshold = 0.3157894736842105
AUDC: 0.0, Time to FA: 15.7048, DD: 0.1683, F1: 0.6154, Cover: 0.9848, Max Cover: 0.9848
Max F1 with margin 1: 0.2857
Max F1 with margin 2: 0.5
Max F1 with margin 4: 0.6667
window_size = 3, anchor_window_type = prev


100%|██████████| 3/3 [00:00<00:00,  8.32it/s]
100%|██████████| 3/3 [00:00<00:00,  8.31it/s]]
100%|██████████| 3/3 [00:00<00:00,  7.50it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.21it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.24it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.51it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.45it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.37it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.50it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.36it/s]]
100%|██████████| 3/3 [00:00<00:00,  8.22it/s]t]
100%|██████████| 3/3 [00:00<00:00,  7.65it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.45it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.10it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.49it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.42it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.49it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.45it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.42it/s]t]
100%|██████████| 3/3 [00:00<00:00,  8.17it/s]t]
100%|██████████| 20/20 [02:13<00:00,  6.66s/it]

Results for best threshold = 0.3157894736842105
AUDC: 0.0, Time to FA: 15.6413, DD: 0.1492, F1: 0.7143, Cover: 0.9828, Max Cover: 0.9828
Max F1 with margin 1: 0.2
Max F1 with margin 2: 0.56
Max F1 with margin 4: 0.7143





# TS-CP HAR