# Emulate v16

In [1]:
from kepler.pandas.menu       import ElectronSequence as Chain
from kepler.pandas.readers    import load_hdf
from kepler.pandas.decorators import RingerDecorator
from Gaugi import mkdir_p, progressbar
import numpy as np
import pandas as pd
import collections
import os
from pprint import pprint
from copy import deepcopy
import gc
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

Welcome to JupyROOT 6.16/00
Using all sub packages with ROOT dependence
INFO: Pandarallel will run on 40 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
output_path = 'output/emulation'
mkdir_p(output_path)

## Decorator:

In [3]:
def generator( df ):


    def reshape_to_vortex( input_data):
    
        # NOTE: Do not change this if you dont know what are you doing
        frame =     [ [72,73,74,75,76,77,78,79,80,81],
                      [71,42,43,44,45,46,47,48,49,82],
                      [70,41,20,21,22,23,24,25,50,83],
                      [69,40,19,6 ,7 ,8 ,9 ,26,51,84],
                      [68,39,18,5 ,0 ,1 ,10,27,52,85],
                      [67,38,17,4 ,3 ,2 ,11,28,53,86],
                      [66,37,16,15,14,13,12,29,54,87],
                      [65,36,35,34,33,32,31,30,55,88],
                      [64,63,62,61,60,59,58,57,56,89],
                      [99,98,97,96,95,94,93,92,91,90],
                    ]
        from copy import deepcopy
        zeros_to_complete = np.zeros((input_data.shape[0],100-input_data.shape[1]))
        data = deepcopy(np.hstack([input_data, zeros_to_complete]))
        d = deepcopy(data.reshape( 1,10,10,data.shape[0] ))
        data=data.T
        for i in range(10):
            for j in range(10):
                d[0][i][j][::] = data[ frame[i][j] ][::]
        d=d.T
        return d
   
    def norm1( data ):
        norms = np.abs( data.sum(axis=1) )
        norms[norms==0] = 1
        return data/norms[:,None]
    
    col_names= ['trig_L2_cl_ring_%d'%i for i in range(100)]
    rings = df[col_names].values.astype(np.float32)
    data_rings = norm1(rings)
    data_rings = reshape_to_vortex(data_rings)
    return [data_rings]

In [4]:
tpath = 'output/fitting'

In [5]:
decorators = [
    RingerDecorator('ringer_v16_tight' , tpath + '/ElectronRingerTightTriggerConfig.conf'    , generator),
    RingerDecorator('ringer_v16_medium', tpath + '/ElectronRingerMediumTriggerConfig.conf'   , generator),
    RingerDecorator('ringer_v16_loose' , tpath + '/ElectronRingerLooseTriggerConfig.conf'    , generator),
    RingerDecorator('ringer_v16_vloose', tpath + '/ElectronRingerVeryLooseTriggerConfig.conf', generator),
]

2022-03-06 01:30:19,133 | Py.RingerDecorator                      INFO Reading... output/fitting/ElectronRingerTightTriggerConfig.conf
2022-03-06 01:30:21,294 | Py.RingerDecorator                      INFO Reading... output/fitting/ElectronRingerMediumTriggerConfig.conf
2022-03-06 01:30:22,983 | Py.RingerDecorator                      INFO Reading... output/fitting/ElectronRingerLooseTriggerConfig.conf
2022-03-06 01:30:24,440 | Py.RingerDecorator                      INFO Reading... output/fitting/ElectronRingerVeryLooseTriggerConfig.conf


2022-03-06 01:30:19.244847: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2022-03-06 01:30:19.244883: E tensorflow/stream_executor/cuda/cuda_driver.cc:314] failed call to cuInit: UNKNOWN ERROR (-1)
2022-03-06 01:30:19.244900: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (caloba51): /proc/driver/nvidia/version does not exist
2022-03-06 01:30:19.245115: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-06 01:30:19.279589: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2400000000 Hz
2022-03-06 01:30:19.288939: I tensorflow/compiler/xla/service/service.c

In [6]:
def load_in_loop( paths, decorators ):

    tables = []
    for path in progressbar( paths, prefix='Reading files...'):
        df = load_hdf( path )
        hold_these_columns = []
        for decorator in decorators:
            decorator.apply(df) 
            hold_these_columns.append(decorator.column)
            hold_these_columns.append(decorator.column + '_output')
            
        tables.append(df[hold_these_columns])

    return pd.concat(tables).reset_index(drop=True)

## Read all 2017 bins:

In [7]:
dpath = '/home/jodafons/public/cern_data/new_files/data17_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins'
dpath+= '/data17_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins_et{ET}_eta{ETA}.h5'
paths = []
for et in range(5):
    for eta in range(5):
        paths.append( dpath.format(ET=et,ETA=eta) )

In [8]:
my_df = load_in_loop( paths, decorators )

Reading files...: 100%|██████████| 25/25 [41:29<00:00, 99.57s/it] 


In [9]:
my_df.to_hdf(output_path+'/data17_table_v16.h5', key='df', mode='w')

## Read all 2018 bins:

In [7]:
dpath = '/home/jodafons/public/cern_data/new_files/data18_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins'
dpath+= '/data18_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins_et{ET}_eta{ETA}.h5'
paths = []
for et in range(5):
    for eta in range(5):
        paths.append( dpath.format(ET=et,ETA=eta) )

In [None]:
my_df = load_in_loop( paths, decorators )

Reading files...:  64%|██████▍   | 16/25 [20:05<19:59, 133.27s/it]

In [None]:
my_df.to_hdf(output_path+'/data18_table_v16.h5', key='df', mode='w')

## Read all mc16 boosted bins:

In [None]:
dpath = '/home/jodafons/public/cern_data/new_files/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins.v2'
dpath+= '/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins.v2_et{ET}_eta{ETA}.h5'
paths = []
for et in range(5):
    for eta in range(5):
        paths.append( dpath.format(ET=et,ETA=eta) )

In [None]:
my_df = load_in_loop( paths, decorators )

In [None]:
my_df.to_hdf(output_path+'/mc16_table_v16.h5', key='df', mode='w')