In [1]:
%load_ext autoreload
%autoreload 2

import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import concurrent.futures
import numpy as np

import warnings
from typing import Self
from datetime import datetime
from functools import reduce
from sklearn.metrics import precision_recall_curve, auc, roc_curve
from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED

import src.utils.anomalydetectors as m
import src.utils.globals as g
import src.utils.aggregators as agg

from src.utils.experiment import Experiment
from src.utils.noise import NoiseFactory
from src.utils.plotting import plot_rpcurves

In [2]:
def run_experiment(df : pd.DataFrame, models, columns, name):
    df['anomalous'] = df['anomaly_syn_type'] != ""
    df_anomalies = df.groupby('seqid').agg({'anomalous' : any})

    experiment = Experiment(name)
    experiment.run(df, models, columns, verbose=True)
    experiment.set_anomalies(df_anomalies)
    experiment.calculate_metrics(models=models, aggrfunc=agg.aggr_sum)

    experiment.pickle()

    return experiment

def run_gaussian_experiment(df : pd.DataFrame, models, columns, ratio, intensity, robotids = []):
    name = f"gaussian_{ratio}_{intensity}"

    if robotids:
        df = df[df['robotid'].isin(robotids)]
        name = name + f"_{'_'.join(item.strip() for item in robotids)}"

    df_syn = NoiseFactory.gaussian(df, columns[0], ratio, intensity)
    return run_experiment(df_syn, models, columns, name)

In [None]:
df_in = pd.read_parquet(g.path_imu)
models = {"z" : m.ZScore(), "mz" : m.MZScore(), "lof" : m.LOF(), 'if' : m.IF()}
print(df_in['seqid'].unique(), len(df_in['seqid'].unique()))