# BARO's reproducibility

In this notebook, we reproduce the effectiveness of BARO on the Online Boutique dataset as presented in Table 3 (The average Avg@5 is **0.86**).

In [12]:
import os
import glob
import json
import warnings
from os.path import join, dirname, basename
warnings.filterwarnings("ignore")

import pandas as pd 
import numpy as np

from baro.root_cause_analysis import robust_scorer
from baro.utility import load_json

In [19]:
top1_cnt, top2_cnt, top3_cnt, top4_cnt, top5_cnt, total_cnt = 0, 0, 0, 0, 0, 0

for data_path in glob.glob("./data/fse-ob/**/simple_data.csv", recursive=True):
    data = pd.read_csv(data_path)
    data_dir = os.path.dirname(data_path)
    
    service, metric = basename(dirname(dirname(data_path))).split("_")
    # print(f"{service=} {metric=}")
    
    ############# PREPROCESSING ###############
    if "time.1" in data:
        data = data.drop(columns=["time.1"])
    # handle inf
    data = data.replace([np.inf, -np.inf], np.nan)

    # handle na
    data = data.ffill()
    data = data.fillna(0)
    # check if there is any nan or inf
    if data.isnull().values.any():
        print(f"{data_path=} has nan")

    if data.isin([np.inf, -np.inf]).values.any():
        print(f"{data_path=} has inf")
    
    data = data.loc[:, ~data.columns.str.endswith("latency-50")]
    data = data.rename(
        columns={
            c: c.replace("_latency-90", "_latency")
            for c in data.columns
            if c.endswith("_latency-90")
        }
    )
    
    # cut data 
    data_length = 300
    with open(join(data_dir, "inject_time.txt")) as f:
        inject_time = int(f.readlines()[0].strip())
    normal_df = data[data["time"] < inject_time].tail(data_length)
    anomal_df = data[data["time"] >= inject_time].head(data_length)
    data = pd.concat([normal_df, anomal_df], ignore_index=True)    
    
    ############# READ ANOMALY DETECTION OUTPUT ###############
    anomalies = load_json(join(data_dir, "naive_bocpd.json"))
    anomalies = [i[0] for i in anomalies]    
    
    ############# ROOT CAUSE ANALYSIS ###############
    ranks = robust_scorer(data, anomalies=anomalies)["ranks"]
    _service_ranks = [r.split("_")[0] for r in ranks]
    service_ranks = []
    # remove duplicates
    for s in _service_ranks:
        if s not in service_ranks:
            service_ranks.append(s)
    
    ############## EVALUATION ###############
    if service in service_ranks[:1]:
        top1_cnt += 1
    if service in service_ranks[:2]:
        top2_cnt += 1
    if service in service_ranks[:3]:
        top3_cnt += 1
    if service in service_ranks[:4]:
        top4_cnt += 1
    if service in service_ranks[:5]:
        top5_cnt += 1
    total_cnt += 1 
    
############## EVALUATION ###############    
top1_accuracy = top1_cnt / total_cnt
top2_accuracy = top2_cnt / total_cnt
top3_accuracy = top3_cnt / total_cnt
top4_accuracy = top4_cnt / total_cnt
top5_accuracy = top5_cnt / total_cnt
avg5_accuracy = (top1_accuracy + top2_accuracy + top3_accuracy + top4_accuracy + top5_accuracy) / 5

print(f"Avg@5 Accuracy: {avg5_accuracy}")
    
    

Avg@5 Accuracy: 0.86
