In [None]:
import os
import numpy as np
import pandas as pd

from itertools import product

import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

import sys
import os
sys.path.append("../../code")

In [None]:
matplotlib.rcParams.update({
    'font.size': 18,
    'legend.fontsize': 18,
    'legend.handlelength': 2
})

In [None]:
folds = [1, 2, 3, 4, 5]

experiment = "comp"
model_types = []

k = 51

name_mapper = {"CNN": "Arvalus", "GCN": "D-Arvalus"}

In [None]:
import dill

synthesizer = None
with open("../../results/synthesizer.pkl", "rb") as file:
    synthesizer = dill.load(file)

def calculate_loc_score(row, k):
    
    type_dict = {
        "binary": [[] for _ in range(k)],
        "class": [[] for _ in range(k)],
        "scenarios": [[] for _ in range(3)]
    }
     
    graph_true_labels = eval(row["graph_true_labels"])
    graph_true_label_indices = eval(row["graph_true_label_indices"])
    node_pred_labels = eval(row["node_pred_labels"])
    sequence_ids = eval(row["sequence_ids"])
    file_ids = eval(row["file_ids"])
    
    for (labels, indices, preds, sequence_id, file_id) in zip(graph_true_labels, graph_true_label_indices, node_pred_labels, sequence_ids, file_ids):
                        
        sequence_dict = synthesizer.__synthetic_dict__[f"sequence-{sequence_id}"]
        file_dict = synthesizer.__synthetic_dict__[f"file-{file_id:06d}"]
        
        for target in ["binary", "class"]:
            
            score_list = type_dict.get(target)
            scenario_list = type_dict.get("scenarios")
                        
            t_preds = preds if target == "class" else ([int(el > 0) for el in preds])
            t_labels = labels if target == "class" else ([int(el > 0) for el in labels])
            
            for label, idx in zip(t_labels, indices):

                if idx == int(sequence_dict["target_node"]): 
                    ttype = file_dict.get("type")
                    list_idx = 0 if ttype == "local" else (1 if "neighborhood" in ttype else 2)
                    scenario_list[list_idx].append(1 if label == t_preds[idx] else 0)

                f_preds = [p for (i,p) in enumerate(t_preds) if i == idx or i not in indices]
                counts = f_preds.count(label)

                for index, temp_k in enumerate(range(1, k+1)):
                    if label == t_preds[idx] and counts <= temp_k:
                        score_list[index].append(1)
                    else:
                        score_list[index].append(0)
            
    bc_list = [(sum(sub_list) / len(sub_list)) if len(sub_list) else 0 for sub_list in type_dict.get("binary")]
    mc_list = [(sum(sub_list) / len(sub_list)) if len(sub_list) else 0 for sub_list in type_dict.get("class")]
    
    det_list = [(sum(sub_list) / len(sub_list)) if len(sub_list) else 0 for sub_list in type_dict.get("scenarios")]
    
    return pd.Series(bc_list + mc_list + det_list)

In [None]:
path = "../../results/calculated_scores.csv"

df_list = []

if os.path.exists(path):
    temp_df = pd.read_csv(path)

    bc_ac_cols = [f"bc_ac@{i}" for i in range(1,k+1)] 
    mc_ac_cols = [f"mc_ac@{i}" for i in range(1,k+1)] 

    det_cols = ["local", "neighborhood", "adversary"]

    temp_df[bc_ac_cols + mc_ac_cols + det_cols] = temp_df[["graph_true_labels", "graph_true_label_indices", "node_pred_labels", "sequence_ids", "file_ids"]].apply(lambda row: calculate_loc_score(row, k), axis=1)
    print("bc_ac_cols", len(bc_ac_cols))
    print("mc_ac_cols", len(mc_ac_cols))
    temp_df[f"bc_map@{k}"] = temp_df[bc_ac_cols].apply(lambda row: np.mean(row.to_numpy()), axis=1)
    temp_df[f"mc_map@{k}"] = temp_df[mc_ac_cols].apply(lambda row: np.mean(row.to_numpy()), axis=1)

    df_list.append(temp_df)
    model_types.append("CNN")
    model_types.append("GCN")

df = pd.concat(df_list, axis = 0, ignore_index = True)

fig = None

In [None]:
best_df = pd.DataFrame()

for fold, model_type in product(folds, model_types):
    df_ = df.loc[(df['fold'] == fold) & (df['model_type'] == model_type)]
    max_idx = df_["bc_f1_avg"].idxmax(axis=0) 
        
    best_df = best_df.append(df.loc[max_idx], ignore_index=True)
print(len(best_df))

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [None]:
palette = "muted"
    
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(12,3))

renamed_scores = {"local": "local", 
                  "neighborhood": "neighborhood",
                  "adversary": "adversary",
                  f"bc_map@{k}": "MAP", **{(f"bc_ac@{i}"):(f"AC@{i}") for i in range(1, 5)}}
scores_to_plot = sorted(list(renamed_scores.values()))
aggr = {s: ["mean"] for s in scores_to_plot}
cols_to_keep = ["model_type", "fold"] + list(renamed_scores.keys())

df_plot["model_type"] = df_plot["model_type"].apply(lambda val: name_mapper[val])
df_plot = df_plot.loc[:, cols_to_keep]
df_plot = df_plot.rename(columns=renamed_scores)
df_plot = df_plot.groupby(["model_type", "fold"]).agg(aggr)
df_plot.columns = df_plot.columns.droplevel(1)
df_plot = df_plot.stack()
df_plot = df_plot.reset_index(level=1).reset_index()
df_plot = df_plot.drop(["fold"], axis=1)
df_plot.columns = ["Model", "score_type", "score"]

print(df_plot.groupby(["Model", "score_type"]).mean())

df_plot = df_plot.loc[df_plot["score_type"].map(lambda x: x not in ["local", "neighborhood", "adversary"]), :]

sns.barplot(x="score_type", 
            y="score", 
            hue="Model", 
            data=df_plot, 
            ax=ax1, 
            capsize=.15, 
            ci="sd", 
            palette=palette)
ax1.set_title("Anomaly Localization")
ax1.get_legend().remove()
ax1.set_xlabel("")
ax1.set_ylabel("")
ax1.set_xticklabels(["PR@1", "PR@2", "PR@3", "PR@4", "MAP"])


renamed_scores = {"local": "local", 
                  "neighborhood": "neighborhood",
                  "adversary": "adversary",
                  f"mc_map@{k}": "MAP", **{(f"mc_ac@{i}"):(f"AC@{i}") for i in range(1, 5)}}
scores_to_plot = sorted(list(renamed_scores.values()))
aggr = {s: ["mean"] for s in scores_to_plot}
cols_to_keep = ["model_type", "fold"] + list(renamed_scores.keys())

df_plot["model_type"] = df_plot["model_type"].apply(lambda val: name_mapper[val])
df_plot = df_plot.loc[:, cols_to_keep]
df_plot = df_plot.rename(columns=renamed_scores)
df_plot = df_plot.groupby(["model_type", "fold"]).agg(aggr)
df_plot.columns = df_plot.columns.droplevel(1)
df_plot = df_plot.stack()
df_plot = df_plot.reset_index(level=1).reset_index()
df_plot = df_plot.drop(["fold"], axis=1)
df_plot.columns = ["Model", "score_type", "score"]

print(df_plot.groupby(["Model", "score_type"]).mean())

df_plot = df_plot.loc[df_plot["score_type"].map(lambda x: x not in ["local", "neighborhood", "adversary"]), :]

sns.barplot(x="score_type", 
            y="score", 
            hue="Model", 
            data=df_plot, 
            ax=ax2, 
            capsize=.15, 
            ci="sd", 
            palette=palette)
ax2.set_title("Anomaly Type Localization")
ax2.get_legend().remove()
ax2.set_xlabel("")
ax2.set_ylabel("")
ax2.set_xticklabels(["PR@1", "PR@2", "PR@3", "PR@4", "MAP"])


plt.xlabel("")
plt.ylabel("")
plt.ylim(0, 1.05)
plt.legend(loc='upper center', ncol=2, fancybox=True, shadow=True, bbox_to_anchor=(-0.115, 1.42))
fig.savefig("../../results/plots/localization/overall_results.pdf", bbox_inches='tight')