# Experiments with custom models and datasets

In [None]:
# importing our utils
from utils import datasets, metrics, core_models, cpd_models, klcpd, tscp
from utils.model_utils import fix_seeds

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

import pytorch_lightning as pl

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

%load_ext autoreload
%autoreload
%matplotlib inline

## Fix seeds

In [2]:
SEED = 42
fix_seeds(SEED)

## Create dataset

You should define your own CustomDataset class with self.data and self.labels and then use our CPDDatasets wrapper.For example, we take our HumanActivityDataset defined in utils/datasets.py.

In [3]:
experiments_name = "human_activity"
train_dataset, test_dataset = datasets.CPDDatasets(experiments_name=experiments_name).get_dataset_()

## Define your custom core model

You should use torch.nn.Module wrapper for your core model and define self.forward method. For example, we take our BaseRnn model defined in utils/core_models.py. We use default parameters (specified in configs/human_activity_seq2seq.yaml file).

In [4]:
# define core model for an experiment with our 'indid' loss
core_model_indid = core_models.BaseRnn(
    input_size=28,
    hidden_dim=8,
    n_layers=1,
    drop_prob=0.25
)



In [5]:
# create dictionary with default arguments for consistency with our interface
# define arguments as they are set in the corresponding config file
learning = dict()
learning["batch_size"] = 16
learning["lr"] = 0.001
learning["epochs"] = 5
learning["grad_clip"] = 0.0

loss = dict()
loss["T"] = 5

args = dict()
args["learning"] = learning
args["loss"] = loss

args["experiments_name"] = experiments_name
args["num_workers"] = 2

In [6]:
# define CPDModel with our 'indid' loss
indid_model = cpd_models.CPDModel(
    loss_type="indid",
    args=args,
    model=core_model_indid,
    train_dataset=train_dataset,
    test_dataset=test_dataset
)

## Train the model using pytorch_lightning.trainer

In [None]:
# use logger
model_name = f'seq2seq_indid_seed_{SEED}'
logger = TensorBoardLogger(save_dir=f'logs/{experiments_name}', name=model_name)

# define trainer with custom parameters
trainer = pl.Trainer(
    max_epochs=10,
    gpus=0,
    benchmark=True,
    check_val_every_n_epoch=1,
    gradient_clip_val=0.,
    logger=logger,
    
    # use early stopping
    callbacks=EarlyStopping(monitor="val_loss", min_delta=0, patience=10)
)

trainer.fit(indid_model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type    | Params
----------------------------------
0 | model | BaseRnn | 1.2 K 
1 | loss  | CPDLoss | 0     
----------------------------------
1.2 K     Trainable params
0         Non-trainable params
1.2 K     Total params
0.005     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

## Evaluate your model

In [7]:
# create list of thresholds from [0, 1]
threshold_number = 100
threshold_list = np.linspace(-5, 5, threshold_number)
threshold_list = 1 / (1 + np.exp(-threshold_list))
threshold_list = [-0.001] + list(threshold_list) + [1.001]

metrics_local, delay_list, fp_delay_list = \
    metrics.evaluation_pipeline(indid_model,
                                indid_model.val_dataloader(),
                                threshold_list,
                                device="cpu", # choose 'cpu' or 'cuda' if available
                                model_type="seq2seq",
                                verbose=True
                               )

  0%|                                                   | 0/102 [00:00<?, ?it/s]


RuntimeError: expected scalar type Double but found Float

## Draw detection curve

In [None]:
plt.figure(figsize=(12, 12))
plt.plot(fp_delay_list.values(), delay_list.values(), '-o', markersize=8, label="InDiD")
plt.xlabel('Mean Time to False Alarm', fontsize=28)
plt.ylabel('Mean Detection Delay', fontsize=28)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper left', fontsize=26)
plt.show()

## The same experiment with BCE loss

In [None]:
# define core model for an experiment with 'bce' loss
core_model_bce = core_models.BaseRnn(
    input_size=28,
    hidden_dim=8,
    n_layers=1,
    drop_prob=0.25
)

# define CPDModel with 'bce' loss
bce_model = cpd_models.CPDModel(
    loss_type="indid",
    args=args,
    model=core_model_bce,
    train_dataset=train_dataset,
    test_dataset=test_dataset
)

# use logger
model_name = f'seq2seq_bce_seed_{SEED}'
logger = TensorBoardLogger(save_dir=f'logs/{experiments_name}', name=model_name)

# define trainer with custom parameters
trainer = pl.Trainer(
    max_epochs=10,
    gpus=0,
    benchmark=True,
    check_val_every_n_epoch=1,
    gradient_clip_val=0.,
    logger=logger,
    
    # use early stopping
    callbacks=EarlyStopping(monitor="val_loss", min_delta=0, patience=10)
)

trainer.fit(bce_model)

# evaluate model
metrics_local, delay_list, fp_delay_list = \
    metrics.evaluation_pipeline(bce_model,
                                bce_model.val_dataloader(),
                                threshold_list,
                                device="cpu", # choose 'cpu' or 'cuda' if available
                                model_type="seq2seq",
                                verbose=True
                               )

plt.figure(figsize=(12, 12))
plt.plot(fp_delay_list.values(), delay_list.values(), '-o', markersize=8, label="BCE")
plt.xlabel('Mean Time to False Alarm', fontsize=28)
plt.ylabel('Mean Detection Delay', fontsize=28)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper left', fontsize=26)
plt.show()

## The same experiment with 'Combined' loss

In [None]:
# define core model for an experiment with 'combined' loss
core_model_combined = core_models.BaseRnn(
    input_size=28,
    hidden_dim=8,
    n_layers=1,
    drop_prob=0.25
)

# define 2 CPDModels for this experiment
# note that they share the one and the same core model 
combined_model_1 = cpd_models.CPDModel(
    loss_type="bce",
    args=args,
    model=core_model_combined,
    train_dataset=train_dataset,
    test_dataset=test_dataset
)

combined_model_2 = cpd_models.CPDModel(
    loss_type="indid",
    args=args,
    model=core_model_combined,
    train_dataset=train_dataset,
    test_dataset=test_dataset
)

# use logger
model_name = f'seq2seq_combined_seed_{SEED}'
logger = TensorBoardLogger(save_dir=f'logs/{experiments_name}', name=model_name)

# define trainer with custom parameters
trainer = pl.Trainer(
    max_epochs=10,
    gpus=0,
    benchmark=True,
    check_val_every_n_epoch=1,
    gradient_clip_val=0.,
    logger=logger,
    
    # use early stopping
    callbacks=EarlyStopping(monitor="val_loss", min_delta=0, patience=10)
)

trainer.fit(combined_model_1)
trainer.fit(combined_model_2)

# evaluate model
metrics_local, delay_list, fp_delay_list = \
    metrics.evaluation_pipeline(combined_model_2, # use the second model for evaluation
                                combined_model_2.val_dataloader(),
                                threshold_list,
                                device="cpu", # choose 'cpu' or 'cuda' if available
                                model_type="seq2seq",
                                verbose=True
                               )

plt.figure(figsize=(12, 12))
plt.plot(fp_delay_list.values(), delay_list.values(), '-o', markersize=8, label="Combined")
plt.xlabel('Mean Time to False Alarm', fontsize=28)
plt.ylabel('Mean Detection Delay', fontsize=28)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper left', fontsize=26)
plt.show()

## Experiments with KL-CPD baseline

### Initializing core models: Discriminator and Generator

## Experiments with TS-CP2 baseline