In [None]:
import sys

import pandas as pd
import numpy as np

sys.path.append("/workspace/multivariate-correlation-anomaly-detection/")
from utils.etl_utils import find_anomalies, mix_report_n_class_report_conf_mat

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [None]:
rand_class_report, rand_conf_mat = mix_report_n_class_report_conf_mat(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_649-20240109130804", "epoch_1178-20240110023126", "epoch_1448-20240109230650"], data_sp_mode="val", dataset_name="random", num_classes=5)
above_class_report, above_conf_mat = mix_report_n_class_report_conf_mat(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_1151-20240110080037", "epoch_1043-20240110142418", "epoch_1330-20240110024812"], data_sp_mode="val", dataset_name="above")
below_class_report, below_conf_mat = mix_report_n_class_report_conf_mat(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_1222-20240110135256", "epoch_1369-20240111090436", "epoch_1245-20240109043855"], data_sp_mode="val", dataset_name="below")
display(rand_conf_mat)
display(above_conf_mat)
display(below_conf_mat)

In [None]:
report_df = pd.read_csv("./model_result_csvs/ATTNONEDIMGRURESMAPCORRCLASS/epoch_1043-20240110142418/report_preds_err_degree-test.csv", index_col=['pair_name', 'data_category'])
display_report_df = report_df.loc[(slice(None), ["new_labels", "preds", "preds_err_degree"]), :].sort_index(axis=0, level=0)
display(display_report_df)

find_anomalies(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name="epoch_1043-20240110142418", data_sp_mode="test", save_report=True)

In [None]:
from pathlib import Path
import matplotlib.pyplot as plt

def compute_mts_anomaly_percentage_per_day(model_name: str, model_weights_name_list: list, data_sp_mode: str, dataset_name: str, save_report: bool = False) -> None:
    ###THIS_FILE_DIR = Path(__file__).resolve().parent
    ###report_df_dir = THIS_FILE_DIR/f"../models/exploration_model_result/model_result_csvs/{model_name}/{model_weights_name}/"
    preds_err_degree_df = pd.DataFrame()
    for model_weights_name in model_weights_name_list:
        report_df_dir = Path(f"./model_result_csvs/{model_name}/{model_weights_name}/")
        report_df_path = report_df_dir/f"report_preds_err_degree-{data_sp_mode}.csv"
        report_df = pd.read_csv(report_df_path, index_col=['pair_name', 'data_category'])
        preds_err_degree_each_weight_df = report_df.loc[(slice(None), ["preds_err_degree"]), :]
        anomaly_mask = preds_err_degree_each_weight_df > 0
        preds_err_degree_each_weight_df[anomaly_mask] = 1
        preds_err_degree_df = pd.concat([preds_err_degree_df, preds_err_degree_each_weight_df])
    preds_err_degree_df.loc[(dataset_name, "err_percentage"), ::] = preds_err_degree_df.sum(axis=0)/len(preds_err_degree_df)
    print(f"These input model_weights_list contain {preds_err_degree_df.shape[0]-1} corr_ser")
    print(f"average accuracy of all corr_ser: {1-preds_err_degree_df.loc[(dataset_name, 'err_percentage'), ::].mean()}")
    # display(preds_err_degree_df)

    return preds_err_degree_df.loc[(dataset_name, "err_percentage"), ::]


random_err_percentage = compute_mts_anomaly_percentage_per_day(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_649-20240109130804", "epoch_1178-20240110023126", "epoch_1448-20240109230650"], data_sp_mode="val", dataset_name="random")
above_err_percentage = compute_mts_anomaly_percentage_per_day(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_1151-20240110080037", "epoch_1043-20240110142418", "epoch_1330-20240110024812"], data_sp_mode="val", dataset_name="above")
below_err_percentage = compute_mts_anomaly_percentage_per_day(model_name="ATTNONEDIMGRURESMAPCORRCLASS", model_weights_name_list=["epoch_1222-20240110135256", "epoch_1369-20240111090436", "epoch_1245-20240109043855"], data_sp_mode="val", dataset_name="below")

all_dataset_preds_err_percentage = pd.concat([random_err_percentage, above_err_percentage, below_err_percentage], axis=1)
# all_dataset_preds_err_percentage

display(all_dataset_preds_err_percentage)
plt.figure(figsize=(24,12))
plt.plot(random_err_percentage, label="random")
plt.plot(above_err_percentage, label="strong")
plt.plot(below_err_percentage, label="weak")
plt.xticks(rotation=60, fontsize=18)
plt.xlabel("Date", fontsize=24)
plt.ylabel("Prediction error percentage", fontsize=24)
plt.legend(fontsize=24)
plt.show()
plt.close()