# Ablations for the proposed post-hoc system

In [None]:
import pandas as pd

labels = pd.read_csv('./data/HR-Avenue-Labels.csv')

In [None]:
# Ensure first NA becomes 0, then forward-fill within each video
def custom_fill(group):
    group = group.copy()
    if pd.isna(group.iloc[0]['AnomalyScore']):
        group.iloc[0, group.columns.get_loc('AnomalyScore')] = 1
    group['AnomalyScore'] = group['AnomalyScore'].ffill()
    return group

def process_preds_to_comparison(results, labels, window_size=100):
    results['vid'] = results['video'].apply(lambda r: r[:2]).astype(int)
    labels['vid'] = labels['vid'].astype(int)
    
    anomaly_preds = results.groupby(['vid', 'frameID']).agg({
        'AnomalyScore': 'min',
    }).reset_index()

    anomaly_preds = anomaly_preds.merge(results[['vid', 'frameID', 'AnomalyScore', 'AnomalyThreshold']], on=['vid', 'frameID', 'AnomalyScore'], how='left')
    anomaly_preds.drop_duplicates(inplace=True)
    comparison = anomaly_preds.merge(labels, how='right', 
                          right_on=['vid', 'Frame_ID'], 
                          left_on=['vid', 'frameID'])
    
    comparison.sort_values(['vid', 'Frame_ID'], inplace=True)
    comparison = comparison.groupby('vid', group_keys=False).apply(custom_fill)
    
    comparison['AnomalyScore'] = comparison['AnomalyScore'].apply(lambda x: 1-x)
    
    comparison['AnomalyScore'] = (
        comparison
        .sort_values(['vid','frameID'])  # ensure correct order
        .groupby(['vid'])['AnomalyScore']
        .transform(lambda x: x.rolling(window=window_size, center=True, min_periods=1).mean())
    )
    
    
    comparison['SmoothedHDAnomaly'] = comparison.apply(lambda row: 1 if row['AnomalyScore'] > 1 - row['AnomalyThreshold'] else 0, axis=1)
    
    return comparison

In [None]:
from glob import glob
import re
from sklearn.metrics import roc_auc_score

aucs = {}

aucs.update({'No Temporal Encoding':[]})
aucs.update({'No ODHD':[]})
aucs.update({'Five Bins':[]})
aucs.update({'Increased temporal steps':[]})
aucs.update({'Proposed post-hoc system': []})

for r in sorted(glob('data/results/*.csv')):
    result= pd.read_csv(r)
    if 'results_posthoc' in r:

        ablation = re.search(r'data/results/(.*)_\d+.csv', r).group(1)

        if ablation == 'results_posthocnotemp':
            ablation = 'No Temporal Encoding'
        elif ablation == 'results_posthocnoodhd':
            ablation = 'No ODHD'
        elif ablation == 'results_posthocmorebin':
            ablation = 'Five Bins'
        elif ablation == 'results_posthocmoretemp':
            ablation = 'Increased temporal steps'
        elif ablation == 'results_posthocsystem':
            ablation = 'Proposed post-hoc system'

        comparison = process_preds_to_comparison(result, labels, 25)

        auc = roc_auc_score(comparison['Anomaly'], comparison['AnomalyScore'])
        print(f"Iteration: {ablation} AUC: {auc}")

        aucs[ablation].append(auc)

In [None]:
import scipy
import numpy as np

recs = []
for ab, auc in aucs.items():
    h = scipy.stats.sem(auc) * scipy.stats.t.ppf((1 + 0.95) / 2., len(auc)-1)
    rec = {
        'Ablation': ab,
        'Mean AUC': np.mean(auc),
        'STDError':  h
    }
    recs.append(rec)

mean_aucs = pd.DataFrame(recs)

In [None]:
mean_aucs

In [None]:
# plot the mean AUCs with error bars
import matplotlib.pyplot as plt

# Style
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 14

plt.figure(figsize=(8, 5))
plt.title('Ablations: Post-hoc System')
plt.bar(mean_aucs['Ablation'], mean_aucs['Mean AUC'], yerr=mean_aucs['STDError'], capsize=5, color="#003D6B",)
plt.xlabel('Ablation')
plt.ylabel('Mean AUC')
plt.xticks(rotation=45)
plt.ylim(0,1)
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import accuracy_score

confusion_mat_data = process_preds_to_comparison(pd.read_csv('./data/results/results_posthocsystem_1.csv'), labels, 25)
accuracy_score(confusion_mat_data['Anomaly'], confusion_mat_data['SmoothedHDAnomaly'])

In [None]:
confusion_mat_data = process_preds_to_comparison(pd.read_csv('./data/results/results_posthocnoodhd_1.csv'), labels, 25)
accuracy_score(confusion_mat_data['Anomaly'], confusion_mat_data['SmoothedHDAnomaly'])