In [None]:
import numpy as np
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from tqdm.notebook import tqdm

%matplotlib inline
from matplotlib import pyplot as plt

In [None]:
SIG_NAMES = {
    '1': {
        1: "stop2b1000_neutralino300",
        2: "glgl1400_neutralino1100",
        28: "monojet_Zp2000.0_DM_50.0",
        29: "glgl1600_neutralino800",
        30: "monotop_200_A",
        31: "stlp_st1000",
        32: "sqsq_sq1800_neut800",
        33: "sqsq1_sq1400_neut800",
        999: "Secret"
    },
    '2a': {
        25: "chaneut_cha250_neut150",
        26: "chaneut_cha400_neut200",
        27: "pp24mt_50",
        28: "pp23mt_50",
        29: "gluino_1000.0_neutralino_1.0",
        30: "chaneut_cha200_neut50",
        31: "chaneut_cha300_neut100",
        999: "Secret"
    },
    '2b': {
        1: "pp24mt_50",
        2: "chaneut_cha200_neut50",
        3: "stlp_st1000",
        4: "chacha_cha600_neut200",
        5: "pp23mt_50",
        6: "chaneut_cha250_neut150",
        7: "chacha_cha400_neut60",
        34: "gluino_1000.0_neutralino_1.0",
        35: "chacha_cha300_neut140",
        999: "Secret"
    },
    '3': {
        1: "glgl1600_neutralino800",
        2: "monojet_Zp2000.0_DM_50.0",
        3: "gluino_1000.0_neutralino_1.0",
        4: "stop2b1000_neutralino300",
        5: "sqsq1_sq1400_neut800",
        6: "monotop_200_A",
        7: "monoV_Zp2000.0_DM_1.0",
        8: "stlp_st1000",
        34: "sqsq_sq1800_neut800",
        35: "glgl1400_neutralino1100",
        999: "Secret"
    }
}
dim_z = [5,8,13,21,34,55,89,144,233]
target_vals = [0,1,2,3,4,10,25]
run = 0

In [None]:
def performance(bkg_events, sig_events):
    # bkg_events is a 1D array of anomaly scores for the background dataset
    # sig_events is a 1D array of anomaly scores for the signal dataset
    # Returns: Area under the ROC curve, and signal efficiencies for three background efficiencies: 10^-2, 10^-3, 10^-4

    #Create background and signal labels
    bkg_labels = np.zeros(len(bkg_events))
    sig_labels = np.ones(len(sig_events))
    
    #stitch all results together
    events = np.append(bkg_events, sig_events)
    labels = np.append(bkg_labels, sig_labels)

    #Build ROC curve using sklearns roc_curve function
    FPR, TPR, thresholds = roc_curve(labels, events)

    #Calculate area under the ROC curve
    AUC = auc(FPR, TPR)

    #background efficiencies
    efficiency1 = 10.0**-2
    efficiency2 = 10.0**-3
    efficiency3 = 10.0**-4
    #epsilon values
    epsilon1 = 0.0
    epsilon2 = 0.0
    epsilon3 = 0.0
    #flags to tell when done
    done1 = False
    done2 = False
    done3 = False

    #iterate through bkg efficiencies and get as close as possible to the desired efficiencies.
    for i in range(len(FPR)):
        bkg_eff = FPR[i]
        if bkg_eff >= efficiency1 and done1 == False:
            epsilon1 = TPR[i]
            done1 = True
        if bkg_eff >= efficiency2 and done2 == False:
            epsilon2 = TPR[i]
            done2 = True
        if bkg_eff >= efficiency3 and done3 == False:
            epsilon3 = TPR[i]
            done3 = True

        if done1 and done2 and done3:
            break
            
    return AUC, epsilon1, epsilon2, epsilon3


In [None]:
# radius-bg-MSE0-run0_2b_89-1
# radius-type-target-run-channel-z-signal
def load_model(target_val, channel, z, sig):
    bg = np.loadtxt('best_scores/radius-bg-MSE' + str(target_val) + '-run0_' + str(channel) + '_' + str(z) + '-' + str(sig))
    sig = np.loadtxt('best_scores/radius-sig-MSE' + str(target_val) + '-run0_' + str(channel) + '_' + str(z) + '-' + str(sig))
    return bg, sig



In [None]:
for channel in SIG_NAMES.keys():
    for sig_val in SIG_NAMES[channel].keys():
        # take a test point for the len
        bg, sig = load_model(target_vals[0], channel, dim_z[0], sig_val)

        tot_bg = np.zeros((len(dim_z)*len(target_vals), len(bg)))
        tot_sig = np.zeros((len(dim_z)*len(target_vals), len(sig)))
        for _target in range(len(target_vals)):
            for _z in range(len(dim_z)):
                bg, sig = load_model(target_vals[_target], channel, dim_z[_z], sig_val)
                tot_bg[_target*len(dim_z)+_z,:] = bg
                tot_sig[_target*len(dim_z)+_z,:] = sig

        max_bg = np.max(tot_bg, axis=0)
        max_sig = np.max(tot_sig, axis=0)
        np.savetxt('fixed_target_combined_scores/max-bg-' + channel + '-' + str(sig_val), max_bg)
        np.savetxt('fixed_target_combined_scores/max-sig-' + channel + '-' + str(sig_val), max_sig)
        print(channel, sig_val, 'all', 'max', performance(max_bg, max_sig)[0])
        min_bg = np.min(tot_bg, axis=0)
        min_sig = np.min(tot_sig, axis=0)
        np.savetxt('fixed_target_combined_scores/min-bg-' + channel + '-' + str(sig_val), min_bg)
        np.savetxt('fixed_target_combined_scores/min-sig-' + channel + '-' + str(sig_val), min_sig)
        print(channel, sig_val, 'all', 'min', performance(min_bg, min_sig)[0])
        avg_bg = np.average(tot_bg, axis=0)
        avg_sig = np.average(tot_sig, axis=0)
        np.savetxt('fixed_target_combined_scores/avg-bg-' + channel + '-' + str(sig_val), avg_bg)
        np.savetxt('fixed_target_combined_scores/avg-sig-' + channel + '-' + str(sig_val), avg_sig)
        print(channel, sig_val, 'all', 'avg', performance(avg_bg, avg_sig)[0])
        prod_bg = np.product(tot_bg, axis=0)
        prod_sig = np.product(tot_sig, axis=0)
        np.savetxt('fixed_target_combined_scores/prod-bg-' + channel + '-' + str(sig_val), prod_bg)
        np.savetxt('fixed_target_combined_scores/prod-sig-' + channel + '-' + str(sig_val), prod_sig)
        print(channel, sig_val, 'all', 'prod', performance(prod_bg, prod_sig)[0])