# Neural Ringer Decision threshold fit

This notebook is dedicated to apply the linear correction in the neural network output w.r.t the avgmu and export the best models to ONNX/keras format. Usually, keras versions is used into the prometheus framework. The ONNX version will be used into the athena framework.

**NOTE**: ONNX is a Microsoft API for inference.

In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
from kolmov import crossval_table, get_color_fader, fit_table
#import saphyra
import numpy as np
import pandas as pd
import collections
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from copy import deepcopy
from tensorflow import keras
%config InlineBackend.figure_format = 'retina'

Welcome to JupyROOT 6.16/00
Using all sub packages with ROOT dependence


In [2]:
def create_op_dict(op):
    d = {
              op+'_pd_ref'    : "reference/"+op+"_cutbased/pd_ref#0",
              op+'_fa_ref'    : "reference/"+op+"_cutbased/fa_ref#0",
              op+'_sp_ref'    : "reference/"+op+"_cutbased/sp_ref",
              op+'_pd_val'    : "reference/"+op+"_cutbased/pd_val#0",
              op+'_fa_val'    : "reference/"+op+"_cutbased/fa_val#0",
              op+'_sp_val'    : "reference/"+op+"_cutbased/sp_val",
              op+'_pd_op'     : "reference/"+op+"_cutbased/pd_op#0",
              op+'_fa_op'     : "reference/"+op+"_cutbased/fa_op#0",
              op+'_sp_op'     : "reference/"+op+"_cutbased/sp_op",

              # Counts
              op+'_pd_ref_passed'    : "reference/"+op+"_cutbased/pd_ref#1",
              op+'_fa_ref_passed'    : "reference/"+op+"_cutbased/fa_ref#1",
              op+'_pd_ref_total'     : "reference/"+op+"_cutbased/pd_ref#2",
              op+'_fa_ref_total'     : "reference/"+op+"_cutbased/fa_ref#2",
              op+'_pd_val_passed'    : "reference/"+op+"_cutbased/pd_val#1",
              op+'_fa_val_passed'    : "reference/"+op+"_cutbased/fa_val#1",
              op+'_pd_val_total'     : "reference/"+op+"_cutbased/pd_val#2",
              op+'_fa_val_total'     : "reference/"+op+"_cutbased/fa_val#2",
              op+'_pd_op_passed'     : "reference/"+op+"_cutbased/pd_op#1",
              op+'_fa_op_passed'     : "reference/"+op+"_cutbased/fa_op#1",
              op+'_pd_op_total'      : "reference/"+op+"_cutbased/pd_op#2",
              op+'_fa_op_total'      : "reference/"+op+"_cutbased/fa_op#2",
    }
    return d

tuned_info = collections.OrderedDict( {
              # validation
              "max_sp_val"      : 'summary/max_sp_val',
              "max_sp_pd_val"   : 'summary/max_sp_pd_val#0',
              "max_sp_fa_val"   : 'summary/max_sp_fa_val#0',
              # Operation
              "max_sp_op"       : 'summary/max_sp_op',
              "max_sp_pd_op"    : 'summary/max_sp_pd_op#0',
              "max_sp_fa_op"    : 'summary/max_sp_fa_op#0',
              } )

tuned_info.update(create_op_dict('tight'))
tuned_info.update(create_op_dict('medium'))
tuned_info.update(create_op_dict('loose'))
tuned_info.update(create_op_dict('vloose'))

In [3]:
etbins = [15, 20, 30, 40, 50, 1000000]
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]

## 1) Reading all tunings:


In [4]:
cv  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )

In [5]:
cv.fill('/home/lbarranunes/workspace/ringer_tunings/versions/v20/r0/output/**/*.gz',
        'v20')

2022-08-15 19:09:32,378 | Py.crossval_table                       INFO Reading file for v20 tag from /home/lbarranunes/workspace/ringer_tunings/versions/v20/r0/output/**/*.gz
2022-08-15 19:09:32,378 | Py.crossval_table                       INFO There are 2500 files for this task...
2022-08-15 19:09:32,378 | Py.crossval_table                       INFO Filling the table... 
2022-08-15 19:10:08,624 | Py.crossval_table                       INFO End of fill step, a pandas DataFrame was created...


In [6]:
cv.table()

Unnamed: 0,train_tag,et_bin,eta_bin,model_idx,sort,init,file_name,tuned_idx,max_sp_val,max_sp_pd_val,...,vloose_pd_ref_total,vloose_fa_ref_total,vloose_pd_val_passed,vloose_fa_val_passed,vloose_pd_val_total,vloose_fa_val_total,vloose_pd_op_passed,vloose_fa_op_passed,vloose_pd_op_total,vloose_fa_op_total
0,v20,3,1,0,0,0,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.991214,0.995535,...,2837736,112229,283698,432,283774,11223,2836979,3546,2837736,112229
1,v20,3,1,0,0,1,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.991146,0.996564,...,2837736,112229,283698,426,283774,11223,2836979,3544,2837736,112229
2,v20,3,1,0,0,2,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.990267,0.995785,...,2837736,112229,283698,538,283774,11223,2836979,4567,2837736,112229
3,v20,3,1,0,0,3,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.991193,0.995045,...,2837736,112229,283698,426,283774,11223,2836979,3474,2837736,112229
4,v20,3,1,0,0,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.991409,0.995659,...,2837736,112229,283698,432,283774,11223,2836980,3520,2837736,112229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,v20,2,4,0,9,5,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.981181,0.990023,...,164380,24980,15912,51,16438,2498,158966,584,164380,24980
2496,v20,2,4,0,9,6,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.980942,0.991970,...,164380,24980,15919,50,16438,2498,158973,570,164380,24980
2497,v20,2,4,0,9,7,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.980528,0.990327,...,164380,24980,15942,50,16438,2498,158959,588,164380,24980
2498,v20,2,4,0,9,8,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.981323,0.991118,...,164380,24980,15856,52,16438,2498,158960,582,164380,24980


In [7]:
cv.table().columns

Index(['train_tag', 'et_bin', 'eta_bin', 'model_idx', 'sort', 'init',
       'file_name', 'tuned_idx', 'max_sp_val', 'max_sp_pd_val',
       'max_sp_fa_val', 'max_sp_op', 'max_sp_pd_op', 'max_sp_fa_op',
       'tight_pd_ref', 'tight_fa_ref', 'tight_sp_ref', 'tight_pd_val',
       'tight_fa_val', 'tight_sp_val', 'tight_pd_op', 'tight_fa_op',
       'tight_sp_op', 'tight_pd_ref_passed', 'tight_fa_ref_passed',
       'tight_pd_ref_total', 'tight_fa_ref_total', 'tight_pd_val_passed',
       'tight_fa_val_passed', 'tight_pd_val_total', 'tight_fa_val_total',
       'tight_pd_op_passed', 'tight_fa_op_passed', 'tight_pd_op_total',
       'tight_fa_op_total', 'medium_pd_ref', 'medium_fa_ref', 'medium_sp_ref',
       'medium_pd_val', 'medium_fa_val', 'medium_sp_val', 'medium_pd_op',
       'medium_fa_op', 'medium_sp_op', 'medium_pd_ref_passed',
       'medium_fa_ref_passed', 'medium_pd_ref_total', 'medium_fa_ref_total',
       'medium_pd_val_passed', 'medium_fa_val_passed', 'medium_pd_val_total'

### 1.1) Get best inits and sorts:

In [8]:
best_inits = cv.filter_inits("max_sp_val")
best_sorts = cv.filter_sorts( best_inits , 'max_sp_op')

In [9]:
best_inits

Unnamed: 0,train_tag,et_bin,eta_bin,model_idx,sort,init,file_name,tuned_idx,max_sp_val,max_sp_pd_val,...,vloose_pd_ref_total,vloose_fa_ref_total,vloose_pd_val_passed,vloose_fa_val_passed,vloose_pd_val_total,vloose_fa_val_total,vloose_pd_op_passed,vloose_fa_op_passed,vloose_pd_op_total,vloose_fa_op_total
1504,v20,0,0,0,0,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.980932,0.985912,...,232819,187639,23017,525,23282,18764,230153,5309,232819,187639
1519,v20,0,0,0,1,9,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.979152,0.985396,...,232819,187639,23015,597,23282,18764,230153,5575,232819,187639
1527,v20,0,0,0,2,7,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.978237,0.985439,...,232819,187639,23015,641,23282,18764,230153,5402,232819,187639
1536,v20,0,0,0,3,6,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.978632,0.983764,...,232819,187639,23015,637,23282,18764,230153,5462,232819,187639
1544,v20,0,0,0,4,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.980430,0.986728,...,232819,187639,23015,531,23282,18764,230151,5351,232819,187639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2150,v20,4,4,0,5,0,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.993277,0.997438,...,89786,93785,8625,44,8979,9378,86202,484,89786,93785
2161,v20,4,4,0,6,1,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.991556,0.996881,...,89786,93785,8625,56,8978,9379,86230,569,89786,93785
2179,v20,4,4,0,7,9,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.992174,0.995656,...,89786,93785,8621,45,8978,9379,86180,488,89786,93785
2184,v20,4,4,0,8,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.992653,0.998329,...,89786,93785,8612,48,8978,9379,86212,538,89786,93785


In [10]:
best_sorts

Unnamed: 0,train_tag,et_bin,eta_bin,model_idx,sort,init,file_name,tuned_idx,max_sp_val,max_sp_pd_val,...,vloose_pd_ref_total,vloose_fa_ref_total,vloose_pd_val_passed,vloose_fa_val_passed,vloose_pd_val_total,vloose_fa_val_total,vloose_pd_op_passed,vloose_fa_op_passed,vloose_pd_op_total,vloose_fa_op_total
1570,v20,0,0,0,7,0,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.979604,0.983893,...,232819,187639,23016,582,23282,18764,230152,5249,232819,187639
1818,v20,0,1,0,1,8,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.971438,0.981773,...,141000,143657,13929,722,14100,14366,139283,6884,141000,143657
1644,v20,0,2,0,4,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.947048,0.951716,...,51566,30037,5033,426,5157,3003,50324,3880,51566,30037
1711,v20,0,3,0,1,1,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.970319,0.983638,...,227345,205792,22338,868,22735,20579,223378,7941,227345,205792
2005,v20,0,4,0,0,5,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.953923,0.958794,...,19893,15512,1862,57,1990,1551,18613,712,19893,15512
1078,v20,1,0,0,7,8,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.98746,0.991449,...,1499273,316581,149174,684,149927,31658,1491729,6471,1499273,316581
1474,v20,1,1,0,7,4,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.98157,0.988858,...,896615,227709,89322,956,89661,22771,893215,9189,896615,227709
1169,v20,1,2,0,6,9,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.968573,0.980116,...,283647,47550,27418,170,28365,4755,274277,1551,283647,47550
1313,v20,1,3,0,1,3,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.978887,0.987757,...,1132030,307253,112713,1313,113203,30726,1127149,12338,1132030,307253
2356,v20,1,4,0,5,6,/home/lbarranunes/workspace/ringer_tunings/ver...,0,0.970039,0.984533,...,84054,36522,8048,122,8405,3653,80473,1203,84054,36522


### 1.2) Get best models:

Get all best models for each bin. Expected to be 25 models.

In [11]:
best_models = cv.get_best_models(best_sorts, remove_last=True)
total_models = 0
for etBin, eta_list in enumerate(best_models):
    print(f'In this et we have: {len(eta_list)} models')
    total_models += len(eta_list)
print(f'Total models: {total_models}')

In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
Total models: 25


2022-08-15 19:10:08.868182: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2022-08-15 19:10:08.868217: E tensorflow/stream_executor/cuda/cuda_driver.cc:314] failed call to cuInit: UNKNOWN ERROR (-1)
2022-08-15 19:10:08.868233: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (caloba52): /proc/driver/nvidia/version does not exist
2022-08-15 19:10:08.868505: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-15 19:10:08.899299: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2400000000 Hz
2022-08-15 19:10:08.906720: I tensorflow/compiler/xla/service/service.c

In [12]:
best_models.append(list())
for eta in range(5):
    best_models[5].append({key: deepcopy(value) for key, value in best_models[4][eta].items() if key != 'model'})
    cloned_model = keras.models.clone_model(best_models[4][eta]['model'])
    cloned_model.set_weights(best_models[4][eta]['model'].get_weights())
    best_models[5][-1]['model'] = cloned_model
    best_models[5][-1]['etBinIdx'] = 5
    best_models[5][-1]['etBin'][0] = 100
    best_models[4][eta]['etBin'][-1] = 100

In [13]:
total_models=0
for etBin, eta_list in enumerate(best_models):
    print(f'In this et we have: {len(eta_list)} models')
    total_models += len(eta_list)
print(f'Total models: {total_models}')

In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
In this et we have: 5 models
Total models: 30


In [14]:
for et, eta_refs in enumerate(best_models):
    print(f'Et {et} has {len(eta_refs)} etas')

Et 0 has 5 etas
Et 1 has 5 etas
Et 2 has 5 etas
Et 3 has 5 etas
Et 4 has 5 etas
Et 5 has 5 etas


## 2) Linear correction:

Here we will set all thresholds to operate as the same pd reference from cut-based using the pileup linear correction strategy. As the classifier efficiency has some "dependence" w.r.t the pileup we adopt the linear adjustment to "fix" the trigger efficiency. Here we will "fix" the neural network threshold w.r.t the pileup. 

### 2.1) Get all PD/Fas values:

Read all reference values from the storage.

In [15]:
# calculate all pd/fa from reference file
homepath = os.path.expanduser('~')
dataset = 'data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM1.bkg.VProbes_EGAM7.GRL_v97.30bins'
datapath = os.path.join(homepath, 'data', dataset)
refpath = os.path.join(datapath, 'references')
ref_path = os.path.join(refpath, dataset + '_et{ET}_eta{ETA}.ref.pic.gz')
ref_paths = [[ ref_path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(6)]
ref_matrix = [[ {} for eta in range(5)] for et in range(6)]
references = ['tight_cutbased', 'medium_cutbased' , 'loose_cutbased', 'vloose_cutbased']
from saphyra.core import ReferenceReader
for et_bin in range(6):
    for eta_bin in range(5):
        for name in references:
            refObj = ReferenceReader().load(ref_paths[et_bin][eta_bin])
            _pd = refObj.getSgnPassed(name)/refObj.getSgnTotal(name)
            fa = refObj.getBkgPassed(name)/refObj.getBkgTotal(name)
            ref_matrix[et_bin][eta_bin][name] = {'pd':_pd, 'fa':fa, 'pd_epsilon':0}

In [16]:
for et, eta_refs in enumerate(ref_matrix):
    print(f'Et {et} has {len(eta_refs)} etas')

Et 0 has 5 etas
Et 1 has 5 etas
Et 2 has 5 etas
Et 3 has 5 etas
Et 4 has 5 etas
Et 5 has 5 etas


### 2.2) Create data generator:

Since each tuning models is fed by a different data organization, we need to create a generator to open the data file, prepare the matrix and apply some pre-processing (if needed).

In [17]:
def generator( path ):
    def norm1( data ):
        norms = np.abs( data.sum(axis=1) )
        norms[norms==0] = 1
        return data/norms[:,None]
    from Gaugi import load
    d = load(path)
    feature_names = d['features'].tolist()

    # How many events?
    n = d['data'].shape[0]
    
    # extract rings
    data_rings = norm1(d['data'][:,1:101])
    target = d['target']
    avgmu = d['data'][:,0]
    
    return [data_rings], target, avgmu

In [18]:
filepath = os.path.join(datapath, dataset + '_et{ET}_eta{ETA}.npz')
#[[ path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(5)]
paths = [[ filepath.format(ET=et,ETA=eta) for eta in range(5)] for et in range(6)]

In [19]:
for et, eta_refs in enumerate(paths):
    print(f'Et {et} has {len(eta_refs)} etas')

Et 0 has 5 etas
Et 1 has 5 etas
Et 2 has 5 etas
Et 3 has 5 etas
Et 4 has 5 etas
Et 5 has 5 etas


In [20]:
fit_etbins = deepcopy(etbins)
fit_etbins.insert(-1, 100)

In [21]:
# create the table class
ct  = fit_table( generator, fit_etbins , etabins, 0.02, 0.5, 16, 60, xmin_percentage=0.05, xmax_percentage=99.95 )

### 2.3) Apply linear correction:

**NOTE**: Take about 25 minutes.

In [22]:
# Fill it
ct.fill(paths, best_models, ref_matrix, 'correction_v20_data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose')


Applying ATLAS style settings...
Fitting... |------------------------------------------------------------| 0/30

  "Even though the tf.config.experimental_run_functions_eagerly "


Fitting... |############################################################| 30/30
Fitting... ... finished task in 3688.557367s.


In [23]:
ct.table().head()

Unnamed: 0,name,et_bin,eta_bin,reference_signal_passed,reference_signal_total,reference_signal_eff,reference_background_passed,reference_background_total,reference_background_eff,signal_passed,...,signal_eff,background_passed,background_total,background_eff,signal_corrected_passed,signal_corrected_total,signal_corrected_eff,background_corrected_passed,background_corrected_total,background_corrected_eff
0,tight_cutbased,0,0,227619,232819,0.977666,23318,187639,0.124271,227545,...,0.977347,3689,187639,0.01966,227541,232819,0.97733,3601,187639,0.019191
1,medium_cutbased,0,0,227780,232819,0.97836,24336,187639,0.129701,227723,...,0.978112,3748,187639,0.019975,227688,232819,0.977961,3662,187639,0.019516
2,loose_cutbased,0,0,229996,232819,0.987876,31867,187639,0.169837,229959,...,0.987716,5081,187639,0.027079,229971,232819,0.987767,4994,187639,0.026615
3,vloose_cutbased,0,0,230152,232819,0.988548,32748,187639,0.174527,230117,...,0.988394,5226,187639,0.027851,230140,232819,0.988493,5139,187639,0.027388
4,tight_cutbased,0,1,137861,141000,0.977742,31938,143657,0.222321,137816,...,0.977418,5018,143657,0.03493,137797,141000,0.977284,4891,143657,0.034046


In [24]:
ct.table().to_csv('ct_table.csv')

### 2.3) Create beamer report:

In [25]:
ct.dump_beamer_table(ct.table(), best_models, 'data17_13TeV v20 tuning', 
                                              'correction_v20_data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.pdf')

2022-08-15 20:11:42,414 | Py.BeamerTexReportTemplate1             INFO Started creating beamer file correction_v20_data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.pdf latex code...


rm: cannot remove '*.aux': No such file or directory
rm: cannot remove '*.out': No such file or directory
rm: cannot remove '*.snm': No such file or directory
rm: cannot remove '*.toc': No such file or directory
rm: cannot remove '*.nav': No such file or directory


## 3) Export all tunings:

In [26]:
model_name_format = 'data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v20.electron{op}.et%d_eta%d'
config_name_format = 'ElectronRinger{op}TriggerConfig.conf'
for idx, op in enumerate(['Tight','Medium','Loose','VeryLoose']):
    ct.export(best_models, 
              model_name_format.format(op=op), 
              config_name_format.format(op=op), 
              references[idx], 
              to_onnx='new')

Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2022-08-15 20:11:51,713 | Py.fit_table                            INFO Export all tuning configuration to ElectronRingerTightTriggerConfig.conf.
2022-08-15 20:11:59,339 | Py.fit_table                            INFO Export all tuning configuration to ElectronRingerMediumTriggerConfig.conf.
2022-08-15 20:12:06,980 | Py.fit_table                            INFO Export all tuning configuration to ElectronRingerLooseTriggerConfig.conf.
2022-08-15 20:12:14,539 | Py.fit_table                            INFO Export all tuning configuration to ElectronRingerVeryLooseTriggerConfig.conf.


2022-08-15 20:11:43.950203: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-08-15 20:11:43.950406: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2022-08-15 20:11:43.956425: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: graph_to_optimize
2022-08-15 20:11:43.956456: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: function_optimizer did nothing. time = 0.005ms.
2022-08-15 20:11:43.956464: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: function_optimizer did nothing. time = 0.003ms.
2022-08-15 20:11:44.037980: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-08-15 20:11:44.038141: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2022-08-15 20:11:44.044956: I tenso

In [27]:
with open('finished.txt', 'w') as finish_file:
    finish_file.write('finished')