In [1]:
import numpy as np
from tqdm import tqdm

In [2]:
BH_percentiles = [1e-2, 1e-3, 1e-4]
fixed_cut = [0.51, 0.53, 0.55]

def calc_and_apply_threshold(samples_preds, data_preds, efficiency):
    """
    Returns number of samples and data events before and after cut

    Apply quantile cut based on efficiency to samples classifier scores and then the
    same threshold to data classifier scores 
    """
    #print(1-efficiency)
    #samples_preds = np.where(samples_preds==np.nan, 0, samples_preds)
    #data_preds = np.where(data_preds==np.nan, 0, data_preds)
    #print(samples_preds.shape)
    eps = np.quantile(samples_preds, 1-efficiency, method="nearest")
    #print(eps)
    if efficiency == 1:
        eps=0.
    N_samples_after = np.size(np.where(samples_preds>=eps))
    N_samples = len(samples_preds)
    N_after = np.size(np.where(data_preds>eps))
    N = len(data_preds)
    #print(N_samples_after, N_samples, N_after, N)
    return N_samples_after, N_samples, N_after, N

def apply_fixed_cut(samples_preds, data_preds, eps):
    """
    Returns number of samples and data events before and after cut

    Apply quantile cut based on efficiency to samples classifier scores and then the
    same threshold to data classifier scores 
    """

    samples_preds = np.where(samples_preds==np.nan, 0, samples_preds)
    data_preds = np.where(data_preds==np.nan, 0, data_preds)
    N_samples_after = np.size(np.where(samples_preds>=eps))
    N_samples = len(samples_preds)
    N_after = np.size(np.where(data_preds>=eps))
    N = len(data_preds)
    return N_samples_after, N_samples, N_after, N

def make_arrays(folder, start_runs=0, runs=2100):
    arr_shape = (runs,len(BH_percentiles))
    N_samples_after = np.zeros(arr_shape)
    N_samples_after_fixed = np.zeros(arr_shape)
    N_samples = np.zeros(arr_shape)
    N_after = np.zeros(arr_shape)
    N_after_fixed = np.zeros(arr_shape)
    N = np.zeros(arr_shape)


    for r in tqdm(range(start_runs, runs)):
        f = folder+"run"+str(r)+"/"
        samples_preds = np.load(f+"BT_preds.npy")[:,1]
        data_preds = np.load(f+"data_preds.npy")[:,1]
        for j, perc in enumerate(BH_percentiles):
            N_samples_after[r,j], N_samples[r,j], N_after[r,j], N[r,j] = calc_and_apply_threshold(samples_preds, data_preds, perc)
        for j, cut in enumerate(fixed_cut):
            N_samples_after_fixed[r,j], _, N_after_fixed[r,j], _ = apply_fixed_cut(samples_preds, data_preds, cut)
    np.save(folder+"N_samples_after.npy", N_samples_after)
    np.save(folder+"N_samples_after_fixed.npy", N_samples_after_fixed)
    np.save(folder+"N_samples.npy", N_samples)
    np.save(folder+"N_after.npy", N_after)
    np.save(folder+"N_after_fixed.npy", N_after_fixed)
    np.save(folder+"N.npy", N)


In [3]:
def make_arrays_shifted(folder, start_runs=0, runs=2100):
    arr_shape = (runs,len(BH_percentiles))
    N_samples_after = np.zeros(arr_shape)
    N_samples_after_fixed = np.zeros(arr_shape)
    N_samples = np.zeros(arr_shape)
    N_after = np.zeros(arr_shape)
    N_after_fixed = np.zeros(arr_shape)
    N = np.zeros(arr_shape)


    for r in tqdm(range(start_runs, runs)):
        f = folder+"run"+str(r)+"/"
        samples_preds = np.load(f+"BT_preds.npy")
        d = np.load(f+"data_preds.npy")
        data_preds = d[:10000]
        samples_preds = np.append(samples_preds, d[10000:])
        for j, perc in enumerate(BH_percentiles):
            N_samples_after[r,j], N_samples[r,j], N_after[r,j], N[r,j] = calc_and_apply_threshold(samples_preds, data_preds, perc)
        for j, cut in enumerate(fixed_cut):
            N_samples_after_fixed[r,j], _, N_after_fixed[r,j], _ = apply_fixed_cut(samples_preds, data_preds, cut)
    np.save(folder+"shifted_N_samples_after.npy", N_samples_after)
    np.save(folder+"shifted_N_samples_after_fixed.npy", N_samples_after_fixed)
    np.save(folder+"shifted_N_samples.npy", N_samples)
    np.save(folder+"shifted_N_after.npy", N_after)
    np.save(folder+"shifted_N_after_fixed.npy", N_after_fixed)
    np.save(folder+"shifted_N.npy", N)


In [5]:
make_arrays("/hpcwork/zu992399/look_elsewhere/NN_calibration/", runs=10000)
#make_arrays_shifted("/hpcwork/zu992399/look_elsewhere/NN_calibration/")

  0%|          | 0/10000 [00:00<?, ?it/s]

100%|██████████| 10000/10000 [08:35<00:00, 19.40it/s]


In [4]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_bins255/", runs=10000)
#make_arrays_shifted("/hpcwork/zu992399/look_elsewhere/NN_calibration/")

100%|██████████| 10000/10000 [09:42<00:00, 17.17it/s]


In [5]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_bins31/", runs=10000)

100%|██████████| 10000/10000 [09:50<00:00, 16.94it/s] 


In [6]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_bins127/", runs=10000)

100%|██████████| 10000/10000 [10:08<00:00, 16.43it/s] 
