In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, f1_score
from functools import partial

In [2]:
# Get optimal hyperparameters for classifier
df = pd.read_json("../reports/tune_clf.jsonl", orient='records', lines=True)
hparams = ['dataset', 'lr', 'n_hidden_layers', "n_hidden_units"]
df['accuracy'] = df.apply(lambda row: (np.array(row['preds']) == np.array(row['labels'])).mean(), axis=1)
idcs_best = df.groupby('dataset')['accuracy'].idxmax()
df.loc[idcs_best][hparams + ['accuracy']]

Unnamed: 0,dataset,lr,n_hidden_layers,n_hidden_units,accuracy
95,Covertype,0.03125,1,1024,0.7096
238,Insects abrupt,0.0625,1,1024,0.9726
203,Rotated MNIST,0.03125,1,1024,0.9192


In [3]:
df = pd.read_json("../reports/ae_tune_online.jsonl", orient="records", lines=True)
df[(df["n_hidden_layers"] == 1) & (df["online_finetuning"] == False)]
df_pre= df

df_online = pd.read_json("../reports/ae_tune_online_v2.jsonl", orient="records", lines=True)
df_all = pd.concat([df_pre, df_online])
df_all.to_json("../reports/ae_tune.jsonl", orient="records", lines=True, index=False)

In [5]:
# Get optimal hyperparameters for autoencoder
df = pd.read_json("../reports/ae_tune.jsonl", orient="records", lines=True)
hparams = ["dataset", "epochs", "lr_online", "online_finetuning"]

df["anom_scores"] = df["anom_scores"].apply(np.array)
df["is_anom"] = df["is_anom"].apply(np.array)

df["roc_auc"] = df.apply(
    lambda row: roc_auc_score(row["is_anom"], row["anom_scores"]), axis=1
)

idcs_max = (
    df[(df["n_hidden_layers"] == 1)]
    .groupby(["dataset", "online_finetuning", "epochs"])["roc_auc"]
    .idxmax()
)
df.loc[idcs_max][hparams + ["roc_auc"]]

Unnamed: 0,dataset,epochs,lr_online,online_finetuning,roc_auc
578,Covertype,8,1.0,False,1.0
506,Covertype,0,1.0,True,0.914958
2,Covertype,8,0.5,True,1.0
696,Insects abrupt,8,1.0,False,0.795461
540,Insects abrupt,0,1.0,True,0.671406
831,Insects abrupt,8,0.25,True,0.795278
577,Rotated MNIST,8,1.0,False,0.71689
865,Rotated MNIST,0,1.0,True,0.491437
892,Rotated MNIST,8,0.125,True,0.753113


In [6]:
def get_f1_score(row, quantile, window_size=1000):
    is_anom = row["is_anom"]
    anom_score = row["anom_scores"]
    threshold = pd.Series(anom_score).rolling(window_size, min_periods=1).quantile(quantile)
    anom_pred = anom_score > threshold
    return f1_score(is_anom, anom_pred)

In [8]:
window_sizes = [200, 400, 600, 800, 1000]

for window_size in window_sizes:
    df[f"f1@{window_size}"] = df.apply(
        partial(get_f1_score, window_size=window_size, quantile=0.96), axis=1
    )
df.loc[idcs_max][hparams + [f"f1@{window_size}" for window_size in window_sizes]]

Unnamed: 0,dataset,epochs,lr_online,online_finetuning,f1@200,f1@400,f1@600,f1@800,f1@1000
578,Covertype,8,1.0,False,0.826087,0.869565,0.869565,0.869565,0.869565
506,Covertype,0,1.0,True,0.333333,0.222222,0.08,0.08,0.08
2,Covertype,8,0.5,True,0.697674,0.75,0.75,0.75,0.75
696,Insects abrupt,8,1.0,False,0.28,0.269231,0.269231,0.269231,0.269231
540,Insects abrupt,0,1.0,True,0.046512,0.047619,0.04878,0.04878,0.04878
831,Insects abrupt,8,0.25,True,0.170213,0.156863,0.156863,0.156863,0.156863
577,Rotated MNIST,8,1.0,False,0.081633,0.081633,0.081633,0.081633,0.081633
865,Rotated MNIST,0,1.0,True,0.011905,0.01227,0.013245,0.013889,0.014925
892,Rotated MNIST,8,0.125,True,0.047904,0.047619,0.047904,0.048193,0.04908
