In [1]:
import os
import h5py
import shutil
import numpy as np

from tqdm import tqdm

from utils import rotation, preprocess

# Split SR and SB samples 

In [2]:
def get_dataset_keys(f):
    keys = []
    f.visit(lambda key : keys.append(key) if isinstance(f[key], h5py.Dataset) else None)
    return keys

def create_dataset(f, nevent, MAX_JETS):

    f.create_dataset('J1/MASK', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='|b1')
    f.create_dataset('J1/pt', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')
    f.create_dataset('J1/eta', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')
    f.create_dataset('J1/phi', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')

    f.create_dataset('J2/MASK', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='|b1')
    f.create_dataset('J2/pt', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')
    f.create_dataset('J2/eta', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')
    f.create_dataset('J2/phi', (nevent, MAX_JETS), maxshape=(None, MAX_JETS), dtype='<f4')

    f.create_dataset('EVENT/Mjj', (nevent,), maxshape=(None,), dtype='<f4')
    f.create_dataset('EVENT/signal', (nevent,), maxshape=(None,), dtype='<i8')

In [3]:
def split_SR_SB(h5_path, output_path, SRSB_region=[4400, 4700, 5500, 5800]):

    # read data
    with h5py.File(h5_path, 'r') as f:
        mjj = f['EVENT/Mjj'][:]

        SR_range = (mjj > SRSB_region[1]) & (mjj < SRSB_region[2])
        SB_range = ((mjj > SRSB_region[0]) & (mjj < SRSB_region[1])) | ((mjj > SRSB_region[2]) & (mjj < SRSB_region[3]))
        
        root, _ = os.path.splitext(output_path)
        SR_path = f'{root}_in_SR.h5'
        SB_path = f'{root}_in_SB.h5'

        with h5py.File(SR_path, 'w') as f_SR, h5py.File(SB_path, 'w') as f_SB:

            MAX_JETS = 300 
            create_dataset(f_SR, SR_range.sum(), MAX_JETS)
            create_dataset(f_SB, SB_range.sum(), MAX_JETS)

            keys = get_dataset_keys(f_SR)
            for key in keys:
                f_SR[key][:] = f[key][:][SR_range]
                f_SB[key][:] = f[key][:][SB_range]

In [4]:
h5_path = 'HVmodel/data/split_val/signal.h5'
output_path = 'HVmodel/data/split_val/sig.h5'
split_SR_SB(h5_path, output_path)

h5_path = 'HVmodel/data/split_val/background.h5'
output_path = 'HVmodel/data/split_val/bkg.h5'
split_SR_SB(h5_path, output_path)

# Origin dataset

In [5]:
def to_origin_h5(h5_path, out_h5):

    shutil.copyfile(h5_path, out_h5)

    with h5py.File(out_h5, 'a') as f_out:
        print(out_h5)
        _, eta1, phi1 = preprocess(f_out['J1/pt'][:], f_out['J1/eta'][:], f_out['J1/phi'][:])
        _, eta2, phi2 = preprocess(f_out['J2/pt'][:], f_out['J2/eta'][:], f_out['J2/phi'][:])

        f_out['J1/eta'][:] = eta1
        f_out['J1/phi'][:] = phi1
        f_out['J2/eta'][:] = eta2
        f_out['J2/phi'][:] = phi2


In [6]:
h5_path = 'HVmodel/data/split_val/sig_in_SR.h5'
out_h5 = 'HVmodel/data/origin/sig_in_SR.h5'
to_origin_h5(h5_path, out_h5)

h5_path = 'HVmodel/data/split_val/sig_in_SB.h5'
out_h5 = 'HVmodel/data/origin/sig_in_SB.h5'
to_origin_h5(h5_path, out_h5)

h5_path = 'HVmodel/data/split_val/bkg_in_SR.h5'
out_h5 = 'HVmodel/data/origin/bkg_in_SR.h5'
to_origin_h5(h5_path, out_h5)

h5_path = 'HVmodel/data/split_val/bkg_in_SB.h5'
out_h5 = 'HVmodel/data/origin/bkg_in_SB.h5'
to_origin_h5(h5_path, out_h5)

HVmodel/data/origin/sig_in_SR.h5
HVmodel/data/origin/sig_in_SB.h5
HVmodel/data/origin/bkg_in_SR.h5
HVmodel/data/origin/bkg_in_SB.h5


# Jet rotation

In [7]:
def jet_rotation(h5_path, out_h5, rot_range=np.pi):
    # after pre-process, rotate the jet image with random angle
    # save results in h5 file
    # rot_range: rotation range

    shutil.copyfile(h5_path, out_h5)

    # preprocess the eta and phi
    with h5py.File(out_h5, 'a') as f_out:
        print(out_h5)
        _, eta1, phi1 = preprocess(f_out['J1/pt'][:], f_out['J1/eta'][:], f_out['J1/phi'][:])
        _, eta2, phi2 = preprocess(f_out['J2/pt'][:], f_out['J2/eta'][:], f_out['J2/phi'][:])

        f_out['J1/eta'][:] = eta1
        f_out['J1/phi'][:] = phi1
        f_out['J2/eta'][:] = eta2
        f_out['J2/phi'][:] = phi2

    with h5py.File(out_h5, 'a') as f_out:
        nevent = f_out['EVENT/signal'].shape[0]

        # rotate eta and phi with random angle
        angle = np.random.uniform(-rot_range, rot_range, size=nevent)[:, None]
        eta1_rotat, phi1_rotat = rotation(eta1, phi1, angle)
        angle = np.random.uniform(-rot_range, rot_range, size=nevent)[:, None]
        eta2_rotat, phi2_rotat = rotation(eta2, phi2, angle)

        f_out['J1/eta'][:] = eta1_rotat
        f_out['J1/phi'][:] = phi1_rotat
        f_out['J2/eta'][:] = eta2_rotat
        f_out['J2/phi'][:] = phi2_rotat        

In [8]:
for n in range(1, 6):
    h5_dir = './HVmodel/data/split_val'
    out_dir = f'./HVmodel/data/jet_rotation/{n:02}'

    # create output directory
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    for name in ['sig_in_SR', 'sig_in_SB', 'bkg_in_SR', 'bkg_in_SB']:
        h5_path = os.path.join(h5_dir, f'{name}.h5')
        out_h5 = os.path.join(out_dir, f'{name}.h5')
        jet_rotation(h5_path, out_h5)

./HVmodel/data/jet_rotation/01/sig_in_SR.h5
./HVmodel/data/jet_rotation/01/sig_in_SB.h5
./HVmodel/data/jet_rotation/01/bkg_in_SR.h5
./HVmodel/data/jet_rotation/01/bkg_in_SB.h5
./HVmodel/data/jet_rotation/02/sig_in_SR.h5
./HVmodel/data/jet_rotation/02/sig_in_SB.h5
./HVmodel/data/jet_rotation/02/bkg_in_SR.h5
./HVmodel/data/jet_rotation/02/bkg_in_SB.h5
./HVmodel/data/jet_rotation/03/sig_in_SR.h5
./HVmodel/data/jet_rotation/03/sig_in_SB.h5
./HVmodel/data/jet_rotation/03/bkg_in_SR.h5
./HVmodel/data/jet_rotation/03/bkg_in_SB.h5
./HVmodel/data/jet_rotation/04/sig_in_SR.h5
./HVmodel/data/jet_rotation/04/sig_in_SB.h5
./HVmodel/data/jet_rotation/04/bkg_in_SR.h5
./HVmodel/data/jet_rotation/04/bkg_in_SB.h5
./HVmodel/data/jet_rotation/05/sig_in_SR.h5
./HVmodel/data/jet_rotation/05/sig_in_SB.h5
./HVmodel/data/jet_rotation/05/bkg_in_SR.h5
./HVmodel/data/jet_rotation/05/bkg_in_SB.h5


# Pixelization

In [9]:
res = 25
h5_dir = './HVmodel/data/origin'
npy_dir = f'./HVmodel/data/origin/{res}x{res}'

for name in ['sig_in_SR', 'sig_in_SB', 'bkg_in_SR', 'bkg_in_SB']:
    h5_path = os.path.join(h5_dir, f'{name}.h5')
    npy_path = os.path.join(npy_dir, f'{name}.npy')
    cmd = f'python pixelization.py {h5_path} {npy_path} {res} &'
    print(cmd)


python pixelization.py ./HVmodel/data/origin/sig_in_SR.h5 ./HVmodel/data/origin/25x25/sig_in_SR.npy 25 &
python pixelization.py ./HVmodel/data/origin/sig_in_SB.h5 ./HVmodel/data/origin/25x25/sig_in_SB.npy 25 &
python pixelization.py ./HVmodel/data/origin/bkg_in_SR.h5 ./HVmodel/data/origin/25x25/bkg_in_SR.npy 25 &
python pixelization.py ./HVmodel/data/origin/bkg_in_SB.h5 ./HVmodel/data/origin/25x25/bkg_in_SB.npy 25 &


In [10]:
res = 25
h5_dir = './HVmodel/data/jet_rotation'
npy_dir = f'./HVmodel/data/jet_rotation/{res}x{res}'

for name in ['sig_in_SR', 'sig_in_SB', 'bkg_in_SR', 'bkg_in_SB']:
    h5_path = os.path.join(h5_dir, f'{name}.h5')
    npy_path = os.path.join(npy_dir, f'{name}.npy')
    cmd = f'python pixelization.py {h5_path} {npy_path} {res} &'
    print(cmd)


python pixelization.py ./HVmodel/data/jet_rotation/sig_in_SR.h5 ./HVmodel/data/jet_rotation/25x25/sig_in_SR.npy 25 &
python pixelization.py ./HVmodel/data/jet_rotation/sig_in_SB.h5 ./HVmodel/data/jet_rotation/25x25/sig_in_SB.npy 25 &
python pixelization.py ./HVmodel/data/jet_rotation/bkg_in_SR.h5 ./HVmodel/data/jet_rotation/25x25/bkg_in_SR.npy 25 &
python pixelization.py ./HVmodel/data/jet_rotation/bkg_in_SB.h5 ./HVmodel/data/jet_rotation/25x25/bkg_in_SB.npy 25 &
