In [1]:
import numpy as np
from tqdm import tqdm
from scipy import stats

In [2]:
BH_percentiles = [1e-1, 1e-2, 1e-3, 1e-4]
fixed_cut = [0.51, 0.53, 0.55]

def calc_and_apply_threshold(samples_preds, data_preds, efficiency):
    """
    Returns number of samples and data events before and after cut

    Apply quantile cut based on efficiency to samples classifier scores and then the
    same threshold to data classifier scores 
    """
    eps = np.quantile(samples_preds, 1-efficiency, method="nearest")
    #print(eps)
    if efficiency == 1:
        eps=0.
    N_samples_after = np.size(np.where(samples_preds>eps))+1
    N_samples = len(samples_preds)
    N_after = np.size(np.where(data_preds>eps))
    N = len(data_preds)
    #print(N_samples_after, N_samples, N_after, N)
    return N_samples_after, N_samples, N_after, N

def make_arrays(folder, start_runs=0, runs=2100, folds=5, twodims=True):
    arr_shape = (folds, runs,len(BH_percentiles))
    N_samples_after = np.zeros(arr_shape)
    N_samples = np.zeros(arr_shape)
    N_after = np.zeros(arr_shape)
    N = np.zeros(arr_shape)

    for r in tqdm(range(start_runs, runs)):
        f = folder+"run"+str(r)+"/"
        samples_preds = np.load(f+"BT_preds.npy")
        data_preds = np.load(f+"data_preds.npy")
        for fold in range(folds):
            for j, perc in enumerate(BH_percentiles):
                N_samples_after[fold, r,j], N_samples[fold, r,j], N_after[fold, r,j], N[fold, r,j] = calc_and_apply_threshold(samples_preds[fold], data_preds[fold], perc)
    
    np.save(folder+"N_samples_after.npy", np.sum(N_samples_after, axis=0))
    np.save(folder+"N_samples.npy", np.sum(N_samples, axis=0))
    np.save(folder+"N_after.npy", np.sum(N_after, axis=0))
    np.save(folder+"N.npy", np.sum(N, axis=0))


In [4]:
make_arrays("/hpcwork/zu992399/look_elsewhere/NN_kfold_calibration/", runs=10000)
#make_arrays_shifted("/hpcwork/zu992399/look_elsewhere/NN_calibration/")

  0%|          | 0/10000 [00:00<?, ?it/s]

100%|██████████| 10000/10000 [09:37<00:00, 17.33it/s]


In [9]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_kfold_niter2/", runs=10000)

100%|██████████| 10000/10000 [09:50<00:00, 16.94it/s]


In [4]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_kfold_niter1_leaves2/", runs=1000)

100%|██████████| 1000/1000 [00:53<00:00, 18.52it/s]


In [3]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_kfold_niter5/", runs=10000)

100%|██████████| 10000/10000 [09:55<00:00, 16.78it/s]


In [4]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_kfold_niter10/", runs=3000)

100%|██████████| 3000/3000 [02:59<00:00, 16.70it/s]


In [6]:
make_arrays("/hpcwork/zu992399/look_elsewhere/BDT_calibration_kfold_niter20/", runs=969)

100%|██████████| 969/969 [00:59<00:00, 16.37it/s]
