In [17]:
import os
import sys
import json

import numpy as np
sys.path.append('../CNN')
import utils_CNN as utils


from pathlib import Path

In [3]:
def create_mix_sample_from(npy_dirs: list, nevents: tuple, seed=0):
    # npy_dirs: list of npy directories
    # nevents: tuple of (n_sig_SR, n_sig_SB, n_bkg_SR, n_bkg_SB)
    data = None
    label = None

    data_sig_SR = np.load(os.path.join(npy_dirs[0], 'sig_in_SR-data.npy'))
    data_sig_SB = np.load(os.path.join(npy_dirs[0], 'sig_in_SB-data.npy'))
    data_bkg_SR = np.load(os.path.join(npy_dirs[0], 'bkg_in_SR-data.npy'))
    data_bkg_SB = np.load(os.path.join(npy_dirs[0], 'bkg_in_SB-data.npy'))

    n_sig_SR, n_sig_SB, n_bkg_SR, n_bkg_SB = nevents

    np.random.seed(seed)
    idx_sig_SR = np.random.choice(data_sig_SR.shape[0], n_sig_SR, replace=False)
    idx_sig_SB = np.random.choice(data_sig_SB.shape[0], n_sig_SB, replace=False)
    idx_bkg_SR = np.random.choice(data_bkg_SR.shape[0], n_bkg_SR, replace=False)
    idx_bkg_SB = np.random.choice(data_bkg_SB.shape[0], n_bkg_SB, replace=False)

    print(f'Preparing dataset from {npy_dirs}')
    for npy_dir in npy_dirs:

        data_sig_SR = np.load(os.path.join(npy_dir, 'sig_in_SR-data.npy'))
        data_sig_SB = np.load(os.path.join(npy_dir, 'sig_in_SB-data.npy'))
        data_bkg_SR = np.load(os.path.join(npy_dir, 'bkg_in_SR-data.npy'))
        data_bkg_SB = np.load(os.path.join(npy_dir, 'bkg_in_SB-data.npy'))

        new_data = np.concatenate([
            data_sig_SR[idx_sig_SR],
            data_bkg_SR[idx_bkg_SR],
            data_sig_SB[idx_sig_SB],
            data_bkg_SB[idx_bkg_SB]
        ], axis=0)

        if data is None:
            data = new_data
        else:
            data = np.concatenate([data, new_data], axis=0)

        new_label = np.zeros(sum(nevents))
        new_label[:n_sig_SR + n_bkg_SR] = 1

        if label is None:
            label = new_label
        else:
            label = np.concatenate([label, new_label])

    return data, label

In [19]:
npy_dir = Path('./SB_0_npy/')

In [7]:
config_path = '../CNN/config_files/origin_25x25_config_01.json'

In [15]:
# Read config file
with open(config_path, 'r') as f:
    config = json.load(f)

train_npy_paths = ["../Sample/HVmodel/data/origin/25x25"]
val_npy_paths = ["../Sample/HVmodel/data/origin/25x25/val"]

sensitivity = 0.0
luminosity = 139

true_label_path = "../Sample/HVmodel/data/split_val/25x25/mix_sample_test.npy"

In [27]:
for seed in range(123, 1100, 100):
    print(f'Processing seed {seed}')
    if not os.path.exists(npy_dir / f'{seed}'):
        os.makedirs(npy_dir / f'{seed}')

    # Sampling dataset
    r_train, r_val = 0.8, 0.2
    n_SR_S, n_SR_B, n_SB_S, n_SB_B = utils.compute_nevent_in_SR_SB(sensitivity=sensitivity, L=luminosity)

    train_nevents = (np.array([n_SR_S, n_SB_S, n_SR_B, n_SB_B]) * r_train).astype(int)
    X_train, y_train = create_mix_sample_from(train_npy_paths, train_nevents, seed=seed)

    # save the dataset
    np.save(npy_dir / f'{seed}/train-data.npy', X_train)
    np.save(npy_dir / f'{seed}/train-label.npy', y_train)

    val_nevents = (np.array([n_SR_S, n_SB_S, n_SR_B, n_SB_B]) * r_val).astype(int)
    X_val, y_val = create_mix_sample_from(val_npy_paths, val_nevents, seed=seed)

    # save the dataset
    np.save(npy_dir / f'{seed}/val-data.npy', X_val)
    np.save(npy_dir / f'{seed}/val-label.npy', y_val)

    # save the true label
    X_test, y_test = utils.load_samples(true_label_path)
    X_test_B = X_test[y_test == 0]

    np.save(npy_dir / f'{seed}/test-data.npy', X_test_B)

Processing seed 123
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25']
(31324, 25, 25, 2)
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25/val']
Processing seed 223
Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0


KeyboardInterrupt: 