# Model evaluation

In this notebook, I will compare the accuracy metrics of models

In [71]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append("..")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [72]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [73]:
import scienceplots  # noqa # pylint: disable=unused-import

plt.style.use(["science", "ieee", "default"])
plt.rcParams.update({"axes.grid": True})

In [74]:
from tqdm.auto import tqdm

## Prepare dataloader

In [75]:
from pathlib import Path

ROOT_DATA_DIR = Path("../data/")

In [76]:
from collections import defaultdict

import torch
from pytorch_lightning import Trainer, seed_everything
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.class_weight import compute_class_weight
from torch import Tensor, nn
from torch.utils.data import DataLoader, TensorDataset

from src.utils import get_metrics

In [77]:
torch.set_default_dtype(torch.float32)

take only 1 dataset, 1 concrete time series for now...

In [78]:
DATA_DIR = ROOT_DATA_DIR / "KPI-Anomaly-Detection/Preliminary_dataset"

In [79]:
df = pd.read_parquet(DATA_DIR / "train.parquet")
df.rename(columns={"label": "target"}, inplace=True)
df = df[df["KPI ID"] == "02e99bd4f6cfb33f"].copy()
df = df[df.index > 1.49 * 1e9].copy()
df = df.query("timestamp < 1496538120").copy()
df["time"] = df.index - df.index[0]
# df["value"] = df["value"].diff()
# df = df.query('stock==1 & day==0 & train==1')
# df.value = df.value.pct_change()
df.dropna(inplace=True)

In [80]:
def convert_to_windows(x, window_size):
    # convert input time series x to
    # time series of lags, first window_size observations are dropped
    windows = []
    for i in range(window_size, len(x)):
        w = x[i - window_size : i]
        windows.append(w)
    windows = np.array(windows)
    return windows

In [81]:
df["value_diff"] = df["value"].diff()
tr_cols = ["value", "value_diff"]
df = df.dropna()

In [82]:
# fig, axs = plt.subplots(1, 2, figsize=(8, 4))
# plt.subplot(1, 2, 1)
# plt.plot(df.value.diff())
# plt.subplot(1, 2, 2)
# plt.hist(df.value)
# plt.yscale("log")
# plt.tight_layout();

In [83]:
window_size = 8
train_proportion = 0.9
batch_size = 16
learning_rate = 1e-5
epochs = 30

In [84]:
x = df[tr_cols].values.copy()
scaler = MinMaxScaler()
x = scaler.fit_transform(x)
# x_scaled = x
x = convert_to_windows(x, window_size)
y = df["target"].values
y = y[window_size:]

# class_weights = compute_class_weight('balanced', classes=[0, 1], y=y.ravel())
# class_weights = class_weights / class_weights.sum()

x = torch.Tensor(x).float()
y = torch.Tensor(y).float()

train_idx = int(len(x) * train_proportion)
# train_idx = int(1.495 * 1e9)

tr, va = x[:train_idx], x[train_idx:]
# x = np.hstack([x, y.reshape(-1, 1)])
# y = y.reshape(-1, 1)
ytr, yva = y[:train_idx], y[train_idx:]
tr_dataset = TensorDataset(tr, ytr)
va_dataset = TensorDataset(va, yva)

In [85]:
# fig, axs = plt.subplots(1, 2, figsize=(8, 4))
# plt.sca(axs[0])
# plt.plot(tr[:, 0, 0])
# plt.sca(axs[1])
# plt.plot(tr[:, 0, 1])
# fig.tight_layout();

## Evaluate models

In [86]:
from src.transformer import TransformerAnomalyDetector, TransformerEncoder
import pytorch_lightning as pl

### Scaled Dot-Product

In [87]:
window_size = 8
train_proportion = 0.9
batch_size = 2 * 4096
epochs = 25

In [88]:
seed_everything(1)

tr_dl = DataLoader(tr_dataset, batch_size=batch_size, shuffle=False)
va_dl = DataLoader(va_dataset, batch_size=batch_size, shuffle=False)

Global seed set to 1


> **TODO**: add positional enocding

See the code for Anomaly BERT -> implement sequences windows for tensors -> unsqueeze before supplying to the model

In [94]:
input_dim = len(tr_cols)
out_dim = 1

num_layers = 2
block_input_dim = 16
block_embed_dim = block_input_dim  # implicitly set in the block
block_num_heads = 4
block_dim_feedforward = 2 * block_input_dim
block_args = {
    "input_dim": block_input_dim,
    "num_heads": block_num_heads,
    "dim_feedforward": block_dim_feedforward,
}

tf_enc = TransformerEncoder(num_layers=num_layers, **block_args)
# xx = torch.rand(batch_size, window_size, input_dim)

# front_linear = nn.Linear(input_dim, block_input_dim)
# last_linear = nn.Linear(block_input_dim, out_dim)
# xx = front_linear(xx)
# xx = tf_enc(xx)
# last_linear(xx).shape

In [95]:
seed_everything(1)

input_dim = len(tr_cols)

block_args = {
    "input_dim": 16,
    "num_heads": 4,
    "dim_feedforward": 2 * 16,
    "num_layers": 2,
    "enable_layer_norm": True,
}
positional_encoder_args = {
    "enable": False,
    "max_len": window_size,
}

loss_fn = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(5))

model_params = {
    "input_dim": input_dim,
    "block_input_dim": block_args["input_dim"],
    "block_args": block_args,
    "num_layers": block_args["num_layers"],
    "positional_encoder_args": positional_encoder_args,
    "learning_rate": 1e-1,
    "dropout": 0.0,
    "loss_fn": loss_fn,
}



model = TransformerAnomalyDetector(
    **model_params,
)

Global seed set to 1
positional encoding disabled


In [98]:
seed_everything(1)
trainer = pl.Trainer()
trainer.fit(model, tr_dl, va_dl)

Global seed set to 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name                | Type               | Params
-----------------------------------------------------------
0 | front_linear        | Linear             | 48    
1 | positional_encoder  | Identity           | 0     
2 | transformer_encoder | TransformerEncoder | 4.4 K 
3 | final_linear        | Linear             | 17    
4 | loss_fn             | BCEWithLogitsLoss  | 0     
-----------------------------------------------------------
4.4 K     Trainable params
0         Non-trainable params
4.4 K     Total params
0.018     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
# xx = torch.rand(batch_size, window_size, input_dim)
# print(xx.shape)
# model(xx)

In [None]:
# loss_fn = nn.BCELoss()

# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
del optimizer

In [None]:
trainer = pl.Trainer()

In [None]:
seed_everything(1)

res_dict = {}
res_dict["tr_metrics"] = []
res_dict["va_metrics"] = []
verbose = True
for epoch in tqdm(range(epochs)):
    if verbose:
        print(epoch)
    _ = train_loop(
        dataloader=tr_dl,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
    )
    curr_metrics = test_loop(dataloader=tr_dl, model=model, loss_fn=loss_fn)
    if verbose:
        print("train", curr_metrics)
    res_dict["tr_metrics"].append(curr_metrics)
    curr_metrics = test_loop(dataloader=va_dl, model=model, loss_fn=loss_fn)
    if verbose:
        print("val", curr_metrics)
    res_dict["va_metrics"].append(curr_metrics)

    if verbose:
        print()  # empty line

res_dict["tr_metrics"] = pd.DataFrame(res_dict["tr_metrics"])
res_dict["va_metrics"] = pd.DataFrame(res_dict["va_metrics"])

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(8, 8))

plt.sca(axs[0, 0])
plt.plot(res_dict["tr_metrics"]["loss"], label="Train")
plt.plot(res_dict["va_metrics"]["loss"], label="Validation")
plt.title("Loss")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[0, 1])
plt.plot(res_dict["tr_metrics"]["f1"], label="Train")
plt.plot(res_dict["va_metrics"]["f1"], label="Validation")
plt.title("F1")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[1, 0])
plt.plot(res_dict["tr_metrics"]["precision"], label="Train")
plt.plot(res_dict["va_metrics"]["precision"], label="Validation")
plt.title("Precision")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[1, 1])
plt.plot(res_dict["tr_metrics"]["recall"], label="Train")
plt.plot(res_dict["va_metrics"]["recall"], label="Validation")
plt.title("Recall")
plt.xlabel("Epoch")
plt.legend()
fig.tight_layout();

## Linear attention

implementation: https://github.com/idiap/fast-transformers

paper: https://arxiv.org/pdf/2006.16236.pdf

In [None]:
from src.transformer import LinearTransformer

In [None]:
window_size = 8
train_proportion = 0.9
batch_size = 16
learning_rate = 1e-3 / 2
epochs = 30

input_dim = len(tr_cols)
embed_dim = 8
num_heads = 1

In [None]:
seed_everything(1)

# TODO: sampling might be useful for some datasets
# sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))
# tr_dl = DataLoader(tr_dataset, batch_size=batch_size, shuffle=False, sampler=sampler)

tr_dl = DataLoader(tr_dataset, batch_size=batch_size, shuffle=False)
va_dl = DataLoader(va_dataset, batch_size=batch_size, shuffle=True)

In [None]:
seed_everything(1)
model = LinearTransformer(input_dim=input_dim, num_heads=num_heads, embed_dim=embed_dim)

In [None]:
model

In [None]:
# loss_fn = nn.BCELoss()
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
seed_everything(1)

res_dict = {}
res_dict["tr_metrics"] = []
res_dict["va_metrics"] = []
verbose = True
for epoch in tqdm(range(epochs)):
    if verbose:
        print(epoch)
    _ = train_loop(
        dataloader=tr_dl,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
    )
    curr_metrics = test_loop(dataloader=tr_dl, model=model, loss_fn=loss_fn)
    if verbose:
        print("train", curr_metrics)
    res_dict["tr_metrics"].append(curr_metrics)
    curr_metrics = test_loop(dataloader=va_dl, model=model, loss_fn=loss_fn)
    if verbose:
        print("val", curr_metrics)
    res_dict["va_metrics"].append(curr_metrics)

    if verbose:
        print()  # empty line

res_dict["tr_metrics"] = pd.DataFrame(res_dict["tr_metrics"])
res_dict["va_metrics"] = pd.DataFrame(res_dict["va_metrics"])

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(8, 8))

plt.sca(axs[0, 0])
plt.plot(res_dict["tr_metrics"]["loss"], label="Train")
plt.plot(res_dict["va_metrics"]["loss"], label="Validation")
plt.title("Loss")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[0, 1])
plt.plot(res_dict["tr_metrics"]["f1"], label="Train")
plt.plot(res_dict["va_metrics"]["f1"], label="Validation")
plt.title("F1")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[1, 0])
plt.plot(res_dict["tr_metrics"]["precision"], label="Train")
plt.plot(res_dict["va_metrics"]["precision"], label="Validation")
plt.title("Precision")
plt.xlabel("Epoch")
plt.legend()
plt.sca(axs[1, 1])
plt.plot(res_dict["tr_metrics"]["recall"], label="Train")
plt.plot(res_dict["va_metrics"]["recall"], label="Validation")
plt.title("Recall")
plt.xlabel("Epoch")
plt.legend()
fig.tight_layout();