<a href="https://colab.research.google.com/github/phamquiluan/baro/blob/main/tutorials/reproduce_multivariate_bocpd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reproduction of Multivariate BOCPD

In this notebook, we reproduce the anomaly detection output from Multivariate BOCPD module on the Online Boutique dataset.

## Install BARO and import packages



In [1]:
!pip install -q fse-baro

In [6]:
import os
import glob
import json
import warnings
from os.path import join, dirname, basename
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from baro.anomaly_detection import bocpd
from baro._bocpd import online_changepoint_detection, partial, constant_hazard, MultivariateT
from baro.utility import load_json, drop_constant, download_data

## Download Online Boutique dataset from Zenodo

In [3]:
download_data("https://zenodo.org/records/11046533/files/fse-ob.zip?download=1", "fse-ob.zip")
import zipfile
os.makedirs("data")
with zipfile.ZipFile("fse-ob.zip", 'r') as file:
    file.extractall("data")

Downloading fse-ob.zip..: 100%|██████████| 151M/151M [00:03<00:00, 41.0MiB/s]


## Run

In [7]:
def find_cps(maxes):
    cps = []
    for i in range(1, len(maxes)):
        if abs(maxes[i] - maxes[i-1]) > 1:
        # m_t = m_t-1 +   1
            # print(i)
            cps.append((i, abs(maxes[i] - maxes[i-1])))
    return cps


for data_path in glob.glob("./data/fse-ob/**/simple_data.csv", recursive=True):
    service_metric = basename(dirname(dirname(data_path)))
    case_idx = basename(dirname(data_path))
    data_dir = dirname(data_path)

    # PREPARE DATA
    data = pd.read_csv(data_path)

    # read inject_time, cut data
    with open(join(data_dir, "inject_time.txt")) as f:
        inject_time = int(f.readlines()[0].strip())
    normal_df = data[data["time"] < inject_time].tail(300)
    anomal_df = data[data["time"] >= inject_time].head(300)
    data = pd.concat([normal_df, anomal_df], ignore_index=True)

    # drop extra columns
    selected_cols = []
    for c in data.columns:
        if 'queue-master' in c or 'rabbitmq_' in c: continue
        if "latency-50" in c or "_error" in c:
            selected_cols.append(c)
    data = data[selected_cols]

    # handle na
    data = drop_constant(data)
    data = data.fillna(method="ffill")
    data = data.fillna(0)
    for c in data.columns:
        data[c] = (data[c] - np.min(data[c])) / (np.max(data[c]) - np.min(data[c]))
    data = data.fillna(method="ffill")
    data = data.fillna(0)
    data = data.to_numpy()

    # RUN BOCPD
    R, maxes = online_changepoint_detection(
            data,
            partial(constant_hazard, 50),
            MultivariateT(dims=data.shape[1])
    )
    cps = find_cps(maxes)
    cps = [p[0] for p in cps]

    ############# READ ANOMALY DETECTION OUTPUT ###############
    saved_anomalies_path = join(data_dir, "naive_bocpd.json")
    print(saved_anomalies_path)
    saved_anomalies = load_json(saved_anomalies_path)
    saved_anomalies = [i[0] for i in saved_anomalies]
    print(f"Saved Anomalies: {saved_anomalies[:3]}")

    ############# RUN ANOMALY DETECTION ###############
    print(f"Anomalies Detected: {cps}")


./data/fse-ob/checkoutservice_delay/5/naive_bocpd.json
Saved Anomalies: [340, 600]
Anomalies Detected: [340, 600]
./data/fse-ob/checkoutservice_delay/4/naive_bocpd.json
Saved Anomalies: [347, 600]
Anomalies Detected: [347, 600]
./data/fse-ob/checkoutservice_delay/1/naive_bocpd.json
Saved Anomalies: [343, 600]
Anomalies Detected: [343, 600]
./data/fse-ob/checkoutservice_delay/3/naive_bocpd.json
Saved Anomalies: [231, 237, 349]
Anomalies Detected: [231, 237, 349, 600]
./data/fse-ob/checkoutservice_delay/2/naive_bocpd.json
Saved Anomalies: [351, 554, 558]
Anomalies Detected: [351, 554, 558, 600]
./data/fse-ob/checkoutservice_loss/5/naive_bocpd.json
Saved Anomalies: [281, 332, 348]
Anomalies Detected: [281, 332, 348, 354, 359, 457, 549, 577, 597, 600]
./data/fse-ob/checkoutservice_loss/4/naive_bocpd.json
Saved Anomalies: [326, 329, 512]
Anomalies Detected: [326, 329, 512, 569, 600]
./data/fse-ob/checkoutservice_loss/1/naive_bocpd.json
Saved Anomalies: [243, 260, 332]
Anomalies Detected: [2