In [1]:
import os
import json

import numpy as np
import tensorflow as tf

import utils_CNN as utils

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# solve the problem of "libdevice not found at ./libdevice.10.bc"
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/home/r10222035/.conda/envs/tf2'

2024-06-06 11:31:07.667617: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-06 11:31:07.749095: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
def get_data_model_nB(config_file):
    # return training dataset, model name, number of background events in signal region
    with open(config_file) as f:
        config = json.load(f)

    train_npy_paths = config['train_npy_paths']

    seed = config['seed']
    sensitivity = config['sensitivity']

    model_name = config['model_name']

    # Training and validation splitting ratio
    r_train, r_val = 0.8, 0.2

    n_SR_S, n_SR_B, n_SB_S, n_SB_B = utils.compute_nevent_in_SR_SB(sensitivity=sensitivity)
    train_nevents = (np.array([n_SR_S, n_SB_S, n_SR_B, n_SB_B]) * r_train).astype(int)

    X_train_SR, y_train_SR, X_train_SB, y_train_SB = utils.get_SR_SB_sample_from_npy(train_npy_paths, train_nevents, seed=seed)
    return (X_train_SR, X_train_SB), (y_train_SR, y_train_SB), model_name, n_SR_B

In [3]:
def sculpting_sensitivity_and_uncertainty(SR_eff, SB_eff, B, N_SR):
    # SR_eff: background efficiency in signal region
    # SB_eff: background efficiency in sideband region
    # B: number of background events in signal region
    # N_SR: number of events used to calculate SR_eff
    
    nS = B * (SR_eff - SB_eff)
    nB = B * SB_eff
    sigma = nS / nB**0.5
    unceitatinty = (B / SB_eff)**0.5 * (SR_eff * (1 - SR_eff) / N_SR)**0.5
    return sigma, unceitatinty

In [4]:
def get_SRfpr_from_SBfpr(X_SRSB, y_SRSB, model_name, bkg_effs=[0.1]):
    # get the fpr in signal region from the fpr in sideband region
    # fpr: false positive rate, background efficiency

    save_model_name = f'./CNN_models/last_model_CWoLa_hunting_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)

    X_SR, X_SB = X_SRSB
    y_SR, y_SB = y_SRSB

    y_prob_SB = loaded_model.predict(X_SB, batch_size=1024)
    fpr_SB, th_SB = utils.get_fpr_thresholds(y_SB == 1, y_prob_SB)

    y_prob_SR = loaded_model.predict(X_SR, batch_size=1024)
    fpr_SR, th_SR = utils.get_fpr_thresholds(y_SR == 1, y_prob_SR)


    bkg_effs_SR = []
    for bkg_eff in bkg_effs:
        th_SB_bkg_eff = utils.get_threshold_from_fpr(fpr_SB, th_SB, bkg_eff)
        n_th = (th_SR < th_SB_bkg_eff).sum()
        bkg_effs_SR.append(fpr_SR[n_th])

    return bkg_effs_SR

In [5]:
def print_SBfpr_SRfpr_fake_sensitivity(X, y, model_name, n_SR_B, SB_effs=[0.1, 0.01, 0.001]):
    # X: (X_SR, X_SB)
    # y: (y_SR, y_SB)
    y_SR, _ = y
    
    SR_effs = get_SRfpr_from_SBfpr(X, y, model_name, SB_effs)

    for SB_eff, SR_eff in zip(SB_effs, SR_effs):
        sigma, uncertainty = sculpting_sensitivity_and_uncertainty(SR_eff, SB_eff, n_SR_B, (y_SR == 0).sum())
        print(f'{SB_eff * 100:.2f}\% & {SR_eff * 100:.2f}\% & ${sigma:.1f} \pm {uncertainty:.1f}$')

In [6]:
SB_effs = [0.1, 0.01, 0.001]

# Resolution: $25 \times 25$

In [7]:
res = 25
X_test_SR, y_test_SR = utils.load_samples(f'../Sample/HVmodel/data/split_val/{res}x{res}/mix_sample_test.npy')
X_test_SB, y_test_SB = utils.load_samples(f'../Sample/HVmodel/data/split_val/{res}x{res}/mix_sample_test-SB.npy')

In [8]:
config_files = [
    f'config_files/origin_{res}x{res}_config_01.json',
    f'config_files/jet_aug_3_{res}x{res}_config_01.json',
    f'config_files/pt_jet_aug_3_{res}x{res}_config_01.json',
]

for config_file in config_files:
    print(config_file)
    X_train, y_train, model_name, n_SR_B = get_data_model_nB(config_file)
    print_SBfpr_SRfpr_fake_sensitivity(X_train, y_train, model_name, n_SR_B, SB_effs=SB_effs)
    print_SBfpr_SRfpr_fake_sensitivity((X_test_SR, X_test_SB), (y_test_SR, y_test_SB), model_name, n_SR_B, SB_effs=SB_effs)
    print()

config_files/origin_25x25_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25']


2024-06-06 11:31:15.528449: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-06 11:31:16.130205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46699 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:3b:00.0, compute capability: 8.6
2024-06-06 11:31:18.978944: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401




2024-06-06 11:31:20.622670: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


10.00\% & 12.37\% & $10.3 \pm 1.2$
1.00\% & 2.02\% & $14.1 \pm 1.6$
0.10\% & 0.22\% & $5.4 \pm 1.7$
10.00\% & 10.71\% & $3.1 \pm 1.3$
1.00\% & 1.71\% & $9.8 \pm 1.8$
0.10\% & 0.06\% & $-1.7 \pm 1.1$

config_files/jet_aug_3_25x25_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25', '../Sample/HVmodel/data/jet_rotation/25x25/01', '../Sample/HVmodel/data/jet_rotation/25x25/02', '../Sample/HVmodel/data/jet_rotation/25x25/03']
10.00\% & 14.21\% & $18.3 \pm 0.6$
1.00\% & 2.68\% & $23.1 \pm 0.9$
0.10\% & 0.11\% & $0.5 \pm 0.6$
10.00\% & 11.18\% & $5.1 \pm 1.4$
1.00\% & 2.16\% & $16.0 \pm 2.0$
0.10\% & 0.03\% & $-3.0 \pm 0.8$

config_files/pt_jet_aug_3_25x25_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset f

In [9]:
config_files = [
    f'config_files/only_jet_aug_4_{res}x{res}_config_01.json',
    f'config_files/only_pt_jet_aug_4_{res}x{res}_config_02.json',
]

for config_file in config_files:
    print(config_file)
    X_train, y_train, model_name, n_SR_B = get_data_model_nB(config_file)
    print_SBfpr_SRfpr_fake_sensitivity(X_train, y_train, model_name, n_SR_B, SB_effs=SB_effs)
    X_train, y_train, _, _ = get_data_model_nB(f'config_files/origin_{res}x{res}_config_01.json')
    print_SBfpr_SRfpr_fake_sensitivity(X_train, y_train, model_name, n_SR_B, SB_effs=SB_effs)
    print_SBfpr_SRfpr_fake_sensitivity((X_test_SR, X_test_SB), (y_test_SR, y_test_SB), model_name, n_SR_B, SB_effs=SB_effs)
    print()

config_files/only_jet_aug_4_25x25_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/jet_rotation/25x25/01', '../Sample/HVmodel/data/jet_rotation/25x25/02', '../Sample/HVmodel/data/jet_rotation/25x25/03', '../Sample/HVmodel/data/jet_rotation/25x25/04']
10.00\% & 14.12\% & $17.9 \pm 0.6$
1.00\% & 2.29\% & $17.7 \pm 0.8$
0.10\% & 0.31\% & $8.9 \pm 1.0$
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25']
10.00\% & 13.05\% & $13.3 \pm 1.2$
1.00\% & 2.29\% & $17.7 \pm 1.7$
0.10\% & 0.19\% & $4.0 \pm 1.5$
10.00\% & 11.02\% & $4.4 \pm 1.4$
1.00\% & 1.93\% & $12.8 \pm 1.9$
0.10\% & 0.07\% & $-1.3 \pm 1.2$

config_files/only_pt_jet_aug_4_25x25_config_02.json
Background cross section, SR: 136.13 

# Resolution: $75 \times 75$

In [10]:
res = 75
X_test_SR, y_test_SR = utils.load_samples(f'../Sample/HVmodel/data/split_val/{res}x{res}/mix_sample_test.npy')
X_test_SB, y_test_SB = utils.load_samples(f'../Sample/HVmodel/data/split_val/{res}x{res}/mix_sample_test-SB.npy')

In [11]:
config_files = [
    f'config_files/origin_{res}x{res}_config_01.json',
    f'config_files/jet_aug_3_{res}x{res}_config_01.json',
    f'config_files/pt_jet_aug_3_{res}x{res}_config_01.json',
]

for config_file in config_files:
    print(config_file)
    X_train, y_train, model_name, n_SR_B = get_data_model_nB(config_file)
    print_SBfpr_SRfpr_fake_sensitivity(X_train, y_train, model_name, n_SR_B, SB_effs=SB_effs)
    print_SBfpr_SRfpr_fake_sensitivity((X_test_SR, X_test_SB), (y_test_SR, y_test_SB), model_name, n_SR_B, SB_effs=SB_effs)
    print()

config_files/origin_75x75_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/75x75']
10.00\% & 12.81\% & $12.2 \pm 1.2$
1.00\% & 2.17\% & $16.1 \pm 1.6$
0.10\% & 0.52\% & $18.4 \pm 2.5$
10.00\% & 10.63\% & $2.7 \pm 1.3$
1.00\% & 1.62\% & $8.5 \pm 1.7$
0.10\% & 0.16\% & $2.6 \pm 1.7$

config_files/jet_aug_3_75x75_config_01.json
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/75x75', '../Sample/HVmodel/data/jet_rotation/75x75/01', '../Sample/HVmodel/data/jet_rotation/75x75/02', '../Sample/HVmodel/data/jet_rotation/75x75/03']
10.00\% & 13.76\% & $16.4 \pm 0.6$
1.00\% & 2.18\% & $16.3 \pm 0.8$
0.10\% & 0.40\% & $13.2 \pm 1.1$
10.00\% & 10.66\% & $2.9 \pm 1.3$
1.00\% & 1.39\% & $5.4 \pm 1