In [1]:
import os
import json

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from pathlib import Path
from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score

os.environ['CUDA_VISIBLE_DEVICES'] = '2'
# solve the problem of "libdevice not found at ./libdevice.10.bc"
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/home/r10222035/.conda/envs/tf2'

2025-05-23 15:51:37.178739: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-23 15:51:37.295463: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# Sampling datasets

In [2]:
def create_mix_sample_from(npy_dirs: list, nevents: tuple, ratios=(0.8, 0.2), seed=0):
    # npy_dirs: list of npy directories
    # nevents: tuple of (n_VBF_SR, n_VBF_BR, n_GGF_SR, n_GGF_BR)
    # ratios: tuple of (r_train, r_val)
    data_tr, data_vl, data_te = None, None, None
    label_tr, label_vl, label_te = None, None, None

    npy_dir0 = Path(npy_dirs[0])

    data_VBF_SR = np.load(npy_dir0 / 'VBF_in_SR-data.npy')
    data_VBF_BR = np.load(npy_dir0 / 'VBF_in_BR-data.npy')
    data_GGF_SR = np.load(npy_dir0 / 'GGF_in_SR-data.npy')
    data_GGF_BR = np.load(npy_dir0 / 'GGF_in_BR-data.npy')

    n_VBF_SR, n_GGF_SR, n_VBF_BR, n_GGF_BR = nevents
    n_test = 10000
    n_VBF_SR_test = int(data_VBF_SR.shape[0] / (data_VBF_SR.shape[0] + data_VBF_BR.shape[0]) * n_test)
    n_VBF_BR_test = n_test - n_VBF_SR_test
    n_GGF_SR_test = int(data_GGF_SR.shape[0] / (data_GGF_SR.shape[0] + data_GGF_BR.shape[0]) * n_test)
    n_GGF_BR_test = n_test - n_GGF_SR_test

    r_tr, r_vl = ratios

    np.random.seed(seed)
    print(data_GGF_SR.shape[0], data_GGF_BR.shape[0], data_VBF_SR.shape[0], data_VBF_BR.shape[0])
    idx_VBF_SR = np.random.choice(data_VBF_SR.shape[0], n_VBF_SR + n_VBF_SR_test, replace=False)
    idx_VBF_BR = np.random.choice(data_VBF_BR.shape[0], n_VBF_BR + n_VBF_BR_test, replace=False)
    idx_GGF_SR = np.random.choice(data_GGF_SR.shape[0], n_GGF_SR + n_GGF_SR_test, replace=False)
    idx_GGF_BR = np.random.choice(data_GGF_BR.shape[0], n_GGF_BR + n_GGF_BR_test, replace=False)

    idx_VBF_SR_tr = idx_VBF_SR[:int(n_VBF_SR*r_tr)]
    idx_VBF_BR_tr = idx_VBF_BR[:int(n_VBF_BR*r_tr)]
    idx_GGF_SR_tr = idx_GGF_SR[:int(n_GGF_SR*r_tr)]
    idx_GGF_BR_tr = idx_GGF_BR[:int(n_GGF_BR*r_tr)]
    idx_VBF_SR_vl = idx_VBF_SR[int(n_VBF_SR*r_tr):n_VBF_SR]
    idx_VBF_BR_vl = idx_VBF_BR[int(n_VBF_BR*r_tr):n_VBF_BR]
    idx_GGF_SR_vl = idx_GGF_SR[int(n_GGF_SR*r_tr):n_GGF_SR]
    idx_GGF_BR_vl = idx_GGF_BR[int(n_GGF_BR*r_tr):n_GGF_BR]
    idx_VBF_SR_te = idx_VBF_SR[n_VBF_SR:]
    idx_VBF_BR_te = idx_VBF_BR[n_VBF_BR:]
    idx_GGF_SR_te = idx_GGF_SR[n_GGF_SR:]
    idx_GGF_BR_te = idx_GGF_BR[n_GGF_BR:]

    print(f'Preparing dataset from {npy_dirs}')
    for npy_dir in npy_dirs:

        npy_dir = Path(npy_dir)
        data_VBF_SR = np.load(npy_dir / 'VBF_in_SR-data.npy')
        data_VBF_BR = np.load(npy_dir / 'VBF_in_BR-data.npy')
        data_GGF_SR = np.load(npy_dir / 'GGF_in_SR-data.npy')
        data_GGF_BR = np.load(npy_dir / 'GGF_in_BR-data.npy')

        new_data_tr = np.concatenate([
            data_VBF_SR[idx_VBF_SR_tr],
            data_GGF_SR[idx_GGF_SR_tr],
            data_VBF_BR[idx_VBF_BR_tr],
            data_GGF_BR[idx_GGF_BR_tr]
        ], axis=0)
        new_data_vl = np.concatenate([
            data_VBF_SR[idx_VBF_SR_vl],
            data_GGF_SR[idx_GGF_SR_vl],
            data_VBF_BR[idx_VBF_BR_vl],
            data_GGF_BR[idx_GGF_BR_vl]
        ], axis=0)
        # new_data_te = np.concatenate([
        #     data_VBF_SR[idx_VBF_SR_te],
        #     data_VBF_BR[idx_VBF_BR_te],
        #     data_GGF_SR[idx_GGF_SR_te],
        #     data_GGF_BR[idx_GGF_BR_te],
        # ], axis=0)

        if data_tr is None:
            data_tr = new_data_tr
            data_vl = new_data_vl
            # data_te = new_data_te
        else:
            data_tr = np.concatenate([data_tr, new_data_tr], axis=0)
            data_vl = np.concatenate([data_vl, new_data_vl], axis=0)
            # data_te = np.concatenate([data_te, new_data_te], axis=0)

        new_label_tr = np.zeros(new_data_tr.shape[0])
        new_label_tr[:idx_VBF_SR_tr.shape[0] + idx_GGF_SR_tr.shape[0]] = 1
        new_label_vl = np.zeros(new_data_vl.shape[0])
        new_label_vl[:idx_VBF_SR_vl.shape[0] + idx_GGF_SR_vl.shape[0]] = 1
        # new_label_te = np.zeros(new_data_te.shape[0])
        # new_label_te[:n_test] = 1

        if label_tr is None:
            label_tr = new_label_tr
            label_vl = new_label_vl
            # label_te = new_label_te
        else:
            label_tr = np.concatenate([label_tr, new_label_tr])
            label_vl = np.concatenate([label_vl, new_label_vl])
            # label_te = np.concatenate([label_te, new_label_te])

    new_data_te = np.concatenate([
        data_VBF_SR[idx_VBF_SR_te],
        data_VBF_BR[idx_VBF_BR_te],
        data_GGF_SR[idx_GGF_SR_te],
        data_GGF_BR[idx_GGF_BR_te],
    ], axis=0)
    data_te = new_data_te

    new_label_te = np.zeros(new_data_te.shape[0])
    new_label_te[:n_test] = 1
    label_te = new_label_te

    return data_tr, data_vl, data_te, label_tr, label_vl, label_te

def create_test_sample_from(npy_dirs: list, nevents: tuple, ratios=(0.8, 0.2), seed=0):
    # npy_dirs: list of npy directories
    # nevents: tuple of (n_VBF_SR, n_VBF_BR, n_GGF_SR, n_GGF_BR)
    # ratios: tuple of (r_train, r_val)

    npy_dir0 = Path(npy_dirs[0])

    data_VBF_SR = np.load(npy_dir0 / 'VBF_in_SR-data.npy')
    data_VBF_BR = np.load(npy_dir0 / 'VBF_in_BR-data.npy')
    data_GGF_SR = np.load(npy_dir0 / 'GGF_in_SR-data.npy')
    data_GGF_BR = np.load(npy_dir0 / 'GGF_in_BR-data.npy')

    n_VBF_SR, n_GGF_SR, n_VBF_BR, n_GGF_BR = nevents
    n_test = 10000
    n_VBF_SR_test = int(data_VBF_SR.shape[0] / (data_VBF_SR.shape[0] + data_VBF_BR.shape[0]) * n_test)
    n_VBF_BR_test = n_test - n_VBF_SR_test
    n_GGF_SR_test = int(data_GGF_SR.shape[0] / (data_GGF_SR.shape[0] + data_GGF_BR.shape[0]) * n_test)
    n_GGF_BR_test = n_test - n_GGF_SR_test

    np.random.seed(seed)
    print(data_GGF_SR.shape[0], data_GGF_BR.shape[0], data_VBF_SR.shape[0], data_VBF_BR.shape[0])
    idx_VBF_SR = np.random.choice(data_VBF_SR.shape[0], n_VBF_SR + n_VBF_SR_test, replace=False)
    idx_VBF_BR = np.random.choice(data_VBF_BR.shape[0], n_VBF_BR + n_VBF_BR_test, replace=False)
    idx_GGF_SR = np.random.choice(data_GGF_SR.shape[0], n_GGF_SR + n_GGF_SR_test, replace=False)
    idx_GGF_BR = np.random.choice(data_GGF_BR.shape[0], n_GGF_BR + n_GGF_BR_test, replace=False)

    idx_VBF_SR_te = idx_VBF_SR[n_VBF_SR:]
    idx_VBF_BR_te = idx_VBF_BR[n_VBF_BR:]
    idx_GGF_SR_te = idx_GGF_SR[n_GGF_SR:]
    idx_GGF_BR_te = idx_GGF_BR[n_GGF_BR:]

    print(f'Preparing dataset from {npy_dirs}')

    new_data_te = np.concatenate([
        data_VBF_SR[idx_VBF_SR_te],
        data_VBF_BR[idx_VBF_BR_te],
        data_GGF_SR[idx_GGF_SR_te],
        data_GGF_BR[idx_GGF_BR_te],
    ], axis=0)
    data_te = new_data_te

    new_label_te = np.zeros(new_data_te.shape[0])
    new_label_te[:n_test] = 1
    label_te = new_label_te

    return data_te, label_te

In [3]:
def compute_nevent_in_SR_BR(GGF_cutflow_file='../Sample/selection_results_GGF_300_3.1.npy', VBF_cutflow_file='../Sample/selection_results_VBF_300_3.1.npy', L=300, cut_type='mjj'):
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt14TeV
    cross_section_GGF = 54.67 * 1000
    cross_section_VBF = 4.278 * 1000
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageBR
    BR_Haa = 0.00227

    GGF_selection = np.load(GGF_cutflow_file, allow_pickle=True).item()
    VBF_selection = np.load(VBF_cutflow_file, allow_pickle=True).item()

    if cut_type == 'mjj':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['mjj: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['mjj: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['mjj: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['mjj: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'deta':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['deta: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['deta: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['deta: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['deta: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'mjj, deta':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['mjj, deta: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['mjj, deta: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['mjj, deta: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['mjj, deta: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'gluon_jet_2':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['two gluon jet: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['two gluon jet: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['two gluon jet: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['two gluon jet: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'gluon_jet_1':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['one gluon jet: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['one gluon jet: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['one gluon jet: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['one gluon jet: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'quark_jet_2':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['two quark jet: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['two quark jet: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['two quark jet: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['two quark jet: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'quark_jet_1':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['one quark jet: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['one quark jet: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['one quark jet: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['one quark jet: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    elif cut_type == 'quark_gluon_jet_2':
        n_GGF_SR = cross_section_GGF * GGF_selection['cutflow_number']['two quark jet: sig region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_GGF_BR = cross_section_GGF * GGF_selection['cutflow_number']['two gluon jet: bkg region'] / GGF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_SR = cross_section_VBF * VBF_selection['cutflow_number']['two quark jet: sig region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
        n_VBF_BR = cross_section_VBF * VBF_selection['cutflow_number']['two gluon jet: bkg region'] / VBF_selection['cutflow_number']['Total'] * BR_Haa * L
    else:
        raise ValueError('cut_type must be mjj, deta, or mjj, or deta, or gluon_jet')
    return n_VBF_SR, n_GGF_SR, n_VBF_BR, n_GGF_BR

In [4]:
def get_sample_size(y):
    if len(y.shape) == 1:
        ns = (y == 1).sum()
        nb = (y == 0).sum()
    else:
        ns = (y.argmax(axis=1) == 1).sum()
        nb = (y.argmax(axis=1) == 0).sum()
    print(ns, nb)
    return ns, nb

In [5]:
def pt_normalization(X):
    # input shape: (n, res, res, 3)
    mean = np.mean(X, axis=(1, 2), keepdims=True)
    std = np.std(X, axis=(1, 2), keepdims=True)
    epsilon = 1e-8
    std = np.where(std < epsilon, epsilon, std)
    return (X - mean) / std

# Training results

In [6]:
def get_highest_accuracy(y_true, y_pred):
    _, _, thresholds = roc_curve(y_true, y_pred)
    # compute highest accuracy
    thresholds = np.array(thresholds)
    if len(thresholds) > 1000:
        thresholds = np.percentile(thresholds, np.linspace(0, 100, 1001))
    accuracy_scores = []
    for threshold in thresholds:
        accuracy_scores.append(accuracy_score(y_true, y_pred > threshold))

    accuracies = np.array(accuracy_scores)
    return accuracies.max()

In [7]:
# Training parameters
with open('params.json', 'r') as f:
    params = json.load(f)

BATCH_SIZE = params['BATCH_SIZE']
EPOCHS = params['EPOCHS']
patience = params['patience']
min_delta = params['min_delta']
learning_rate = params['learning_rate']

## Original

In [8]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    # config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_phi_aug_15_config_{i:02}.json'
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

KeyboardInterrupt: 

In [None]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f'${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.637 ± 0.007
AUC: 0.686 ± 0.008
Without pre-process ACC and AUC
ACC: 0.625 ± 0.006
AUC: 0.669 ± 0.008
$0.637 \pm 0.007$ & $0.686 \pm 0.008$ & $0.625 \pm 0.006$ & $0.669 \pm 0.008$


## $\phi$-shifting: +5, +10, +15

In [None]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_phi_aug_5_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436


In [None]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.682 ± 0.011
AUC: 0.735 ± 0.013
Without pre-process ACC and AUC
ACC: 0.669 ± 0.011
AUC: 0.720 ± 0.015
 & $0.682 \pm 0.011$ & $0.735 \pm 0.013$ & $0.669 \pm 0.011$ & $0.720 \pm 0.015$


In [None]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_phi_aug_10_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436


In [None]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.685 ± 0.008
AUC: 0.739 ± 0.010
Without pre-process ACC and AUC
ACC: 0.673 ± 0.008
AUC: 0.726 ± 0.010
 & $0.685 \pm 0.008$ & $0.739 \pm 0.010$ & $0.673 \pm 0.008$ & $0.726 \pm 0.010$


In [None]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_phi_aug_15_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436


In [None]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.688 ± 0.007
AUC: 0.743 ± 0.009
Without pre-process ACC and AUC
ACC: 0.674 ± 0.008
AUC: 0.726 ± 0.008
 & $0.688 \pm 0.007$ & $0.743 \pm 0.009$ & $0.674 \pm 0.008$ & $0.726 \pm 0.008$


## Only $\phi$-shifting: +5, +10, +15

In [8]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_only_phi_aug_5_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']


2025-05-23 15:52:26.674635: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-23 15:52:28.731861: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22288 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:d8:00.0, compute capability: 8.6
2025-05-23 15:52:34.952302: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401




2025-05-23 15:52:40.680402: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436


In [9]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.682 ± 0.007
AUC: 0.736 ± 0.009
Without pre-process ACC and AUC
ACC: 0.668 ± 0.007
AUC: 0.718 ± 0.010
 & $0.682 \pm 0.007$ & $0.736 \pm 0.009$ & $0.668 \pm 0.007$ & $0.718 \pm 0.010$


In [10]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_only_phi_aug_10_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436


In [11]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.687 ± 0.010
AUC: 0.740 ± 0.012
Without pre-process ACC and AUC
ACC: 0.675 ± 0.009
AUC: 0.726 ± 0.011
 & $0.687 \pm 0.010$ & $0.740 \pm 0.012$ & $0.675 \pm 0.009$ & $0.726 \pm 0.011$


In [12]:
ACC_preprocess, AUC_preprocess = [], []
ACC_wo_preprocess, AUC_wo_preprocess = [], []
for i in range(1, 11):
    config_path = f'./config_files/quark_jet_2_cut_L_3000_pT_norm_only_phi_aug_15_config_{i:02}.json'
    # Read config file
    with open(config_path, 'r') as f:
        config = json.load(f)

    seed = config['seed']
    luminosity = config['luminosity']
    cut_type = config['cut_type']
    model_name = config['model_name']

    GGF_cutflow_file = config['GGF_cutflow_file']
    VBF_cutflow_file = config['VBF_cutflow_file']

    model_name = config['model_name']


    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_VBF, n_SR_GGF, n_BR_VBF, n_BR_GGF = compute_nevent_in_SR_BR(GGF_cutflow_file, VBF_cutflow_file, luminosity, cut_type)
    n_events = (int(n_SR_VBF), int(n_SR_GGF), int(n_BR_VBF), int(n_BR_GGF))

    npy_paths = ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_preprocess.append(roc_auc_score(y_test, y_pred))

    npy_paths = ['../Sample/data/quark_jet_2_cut/40x40/']
    X_test, y_test = create_test_sample_from(npy_paths, n_events, (r_train, r_val), seed=seed)
    X_test = pt_normalization(X_test)

    # Compute ACC & AUC
    save_model_name = f'./CNN_models/last_model_GGF_VBF_CWoLa_{model_name}/'
    loaded_model = tf.keras.models.load_model(save_model_name)
    y_pred = loaded_model.predict(X_test, batch_size=BATCH_SIZE)

    ACC_wo_preprocess.append(get_highest_accuracy(y_test, y_pred))
    AUC_wo_preprocess.append(roc_auc_score(y_test, y_pred))

78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/pre-processing/40x40/']
78945 83384 286751 45436
Preparing dataset from ['../Sample/data/quark_jet_2_cut/40x40/']
78945 83384 286751 45436


In [13]:
ACC = np.array(ACC_preprocess)
AUC = np.array(AUC_preprocess)

latex_txt = ''

print('Pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'

ACC = np.array(ACC_wo_preprocess)
AUC = np.array(AUC_wo_preprocess)

print('Without pre-process ACC and AUC')
print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
latex_txt += f' & ${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$'
print(latex_txt)

Pre-process ACC and AUC
ACC: 0.687 ± 0.007
AUC: 0.741 ± 0.010
Without pre-process ACC and AUC
ACC: 0.672 ± 0.009
AUC: 0.725 ± 0.012
 & $0.687 \pm 0.007$ & $0.741 \pm 0.010$ & $0.672 \pm 0.009$ & $0.725 \pm 0.012$
