# Set references for v9 tuning and export to ONNX

This notebook is dedicated to apply the linear correction in the neural network output w.r.t the avgmu and export the v9 best models to ONNX/keras format. Usually, keras versions is used into the prometheus framework. The ONNX version will be used into the athena framework.

**NOTE**: ONNX is a Microsoft API for inference.

**NOTE**: We will export all tunings from v9 r0 derivation

In [1]:
from kolmov import crossval_table, get_color_fader, fit_table
import saphyra
import numpy as np
import pandas as pd
import collections
import os
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from copy import deepcopy
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

Welcome to JupyROOT 6.16/00
Using all sub packages with ROOT dependence

Applying ATLAS style settings...


### 2.3) Apply linear correction:

In [2]:
def create_op_dict(op):
    d = {
              op+'_pd_ref'    : "reference/"+op+"_cutbased/pd_ref#0",
              op+'_fa_ref'    : "reference/"+op+"_cutbased/fa_ref#0",
              op+'_sp_ref'    : "reference/"+op+"_cutbased/sp_ref",
              op+'_pd_val'    : "reference/"+op+"_cutbased/pd_val#0",
              op+'_fa_val'    : "reference/"+op+"_cutbased/fa_val#0",
              op+'_sp_val'    : "reference/"+op+"_cutbased/sp_val",
              op+'_pd_op'     : "reference/"+op+"_cutbased/pd_op#0",
              op+'_fa_op'     : "reference/"+op+"_cutbased/fa_op#0",
              op+'_sp_op'     : "reference/"+op+"_cutbased/sp_op",

              # Counts
              op+'_pd_ref_passed'    : "reference/"+op+"_cutbased/pd_ref#1",
              op+'_fa_ref_passed'    : "reference/"+op+"_cutbased/fa_ref#1",
              op+'_pd_ref_total'     : "reference/"+op+"_cutbased/pd_ref#2",
              op+'_fa_ref_total'     : "reference/"+op+"_cutbased/fa_ref#2",
              op+'_pd_val_passed'    : "reference/"+op+"_cutbased/pd_val#1",
              op+'_fa_val_passed'    : "reference/"+op+"_cutbased/fa_val#1",
              op+'_pd_val_total'     : "reference/"+op+"_cutbased/pd_val#2",
              op+'_fa_val_total'     : "reference/"+op+"_cutbased/fa_val#2",
              op+'_pd_op_passed'     : "reference/"+op+"_cutbased/pd_op#1",
              op+'_fa_op_passed'     : "reference/"+op+"_cutbased/fa_op#1",
              op+'_pd_op_total'      : "reference/"+op+"_cutbased/pd_op#2",
              op+'_fa_op_total'      : "reference/"+op+"_cutbased/fa_op#2",
    }
    return d

tuned_info = collections.OrderedDict( {
              # validation
              "max_sp_val"      : 'summary/max_sp_val',
              "max_sp_pd_val"   : 'summary/max_sp_pd_val#0',
              "max_sp_fa_val"   : 'summary/max_sp_fa_val#0',
              # Operation
              "max_sp_op"       : 'summary/max_sp_op',
              "max_sp_pd_op"    : 'summary/max_sp_pd_op#0',
              "max_sp_fa_op"    : 'summary/max_sp_fa_op#0',
              } )

tuned_info.update(create_op_dict('tight'))
tuned_info.update(create_op_dict('medium'))
tuned_info.update(create_op_dict('loose'))
tuned_info.update(create_op_dict('vloose'))

In [3]:
etbins = [15, 20, 30, 40, 50, 1000000]
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]

## 1) Reading all tunings:


In [4]:
cv  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )

In [5]:
cv.fill(  '/home/jodafons/public/tuning_data/Zee/v9/r1/*/*/*.gz', 'v9')

Reading v9...: 100%|██████████| 2500/2500 [00:46<00:00, 53.66it/s] 


2021-11-14 13:14:13,288 | Py.crossval_table                       INFO Reading file for v9 tag from /home/jodafons/public/tuning_data/Zee/v9/r1/*/*/*.gz
2021-11-14 13:14:59,943 | Py.crossval_table                       INFO End of fill step, a pandas DataFrame was created...


### 1.1) Get best inits and sorts:

In [6]:
best_inits = cv.filter_inits("max_sp_val")
best_sorts = cv.filter_sorts( best_inits , 'max_sp_op')

## 2) Linear correction:

Here we will set all thresholds to operate as the same pd reference from cut-based using the pileup linear correction strategy. As the classifier efficiency has some "dependence" w.r.t the pileup we adopt the linear adjustment to "fix" the trigger efficiency. Here we will "fix" the neural network threshold w.r.t the pileup. 

In [7]:
etbins = [15, 20, 30, 40, 50, 1000000]
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]

In [8]:
best_models = cv.get_best_models(best_sorts, remove_last=True)

In [9]:
def my_generator( path , pidname):
    
    import pandas as pd
    from kepler import load
    df = load(path)
    
    # NOTE: Offline filter lhvloose -> lhmedium (as the training procedure)
    df = df.loc[ ((df[pidname]==True) & (df.target==1.0)) | (df.target==0) ]

    col_names= ['trig_L2_cl_ring_%d'%i for i in range(100)]
    rings = df[col_names].values.astype(np.float32)
    
    def norm1( data ):
        norms = np.abs( data.sum(axis=1) )
        norms[norms==0] = 1
        return data/norms[:,None]
    
    avgmu = df['avgmu'].values.astype(np.float32)
    target = df['target'].values.astype(np.int16)
    rings = norm1(rings)
    reta = df['trig_L2_cl_reta'].values.astype(np.float32) / 1.0
    eratio = df['trig_L2_cl_eratio'].values.astype(np.float) / 1.0
    f1 = df['trig_L2_cl_f1'].values.astype(np.float) / 0.6
    f3 = df['trig_L2_cl_f3'].values.astype(np.float) / 0.04
    weta2 = df['trig_L2_cl_weta2'].values.astype(np.float) / 0.02
    wstot = df['trig_L2_cl_wstot'].values.astype(np.float) / 1.0
    eratio[eratio>10.0]=0.0
    eratio[eratio>1.]=1.0
    wstot[wstot<-99]=0
    
    f1 = f1.reshape((-1,1))
    f3 = f3.reshape((-1,1))
    reta = reta.reshape((-1,1))
    eratio = eratio.reshape((-1,1))
    weta2 = weta2.reshape((-1,1))
    wstot = wstot.reshape((-1,1))
    showers = np.concatenate( (reta,eratio,f1,f3,weta2,wstot), axis=1)  
    return [rings, showers], target, avgmu

def my_medium_generator(path):
    return my_generator(path, 'el_lhmedium')

In [10]:
path = '/home/jodafons/public/cern_data/new_files/data17_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins/'
path+= 'data17_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins_et{ET}_eta{ETA}.npz'
paths = [[ path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(5)]
generators = [[ my_generator for eta in range(5)] for et in range(5)]

In [11]:
# calculate all pd/fa from reference file
ref_path = '/home/jodafons/public/cern_data/new_files/data17_13TeV.AllPeriods.sgn.probes_lhvloose_EGAM1.bkg.vprobes_vlhvloose_EGAM7.GRL_v97.25bins/'
ref_path += 'references/data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM1.bkg.VProbes_EGAM7.GRL_v97_et{ET}_eta{ETA}.ref.pic.gz'
ref_paths = [[ ref_path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(5)]
ref_matrix = [[ {} for eta in range(5)] for et in range(5)]
references = [ 'tight_cutbased', 'medium_cutbased', 'loose_cutbased', 'vloose_cutbased' ]

from saphyra.core import ReferenceReader
for et_bin in range(5):
    for eta_bin in range(5):
        for name in references:
            refObj = ReferenceReader().load(ref_paths[et_bin][eta_bin])
            pd = refObj.getSgnPassed(name)/refObj.getSgnTotal(name)
            fa = refObj.getBkgPassed(name)/refObj.getBkgTotal(name)
            ref_matrix[et_bin][eta_bin][name] = {'pd':pd, 'fa':fa, 'pd_epsilon':0.0, 'label':name.replace('_cutbased','')}

In [12]:
# create the table class
ct  = fit_table(etbins , etabins)

### 2.2) Fill histograms:

**NOTE**: Do not run this cell if you have the histogram files ready in your workspace.

In [15]:
ct.fill(my_medium_generator, paths, best_models, 'histograms.root' , 0.05, 0.5, 16, 60, 
        xmin_percentage=0.05, 
        xmax_percentage=99.95, 
        verbose=False)


Applying ATLAS style settings...


Filling... : 100%|████████████████████| 25/25 [03:22<00:00,  8.10s/it]


In [16]:
table = ct.calculate( 'histograms.root', best_models, ref_matrix, 'output')

Fitting... : 100%|████████████████████| 25/25 [00:42<00:00,  1.71s/it]


### 2.4) Create beamer report:

In [17]:
ct.dump_beamer_table(table, best_models, 'data17_13TeV v9 tuning', 
                     'correction_v9_data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose_25bins.pdf')

2021-11-14 13:50:18,489 | Py.BeamerTexReportTemplate1             INFO Started creating beamer file correction_v9_data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose_25bins.pdf latex code...


## 3) Export all tunings:

In [18]:
model_name_format = 'data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electron{op}.et%d_eta%d'
config_name_format = 'ElectronRinger{op}TriggerConfig.conf'
for idx, op in enumerate(['Tight','Medium','Loose','VeryLoose']):
    ct.export(best_models, 
              model_name_format.format(op=op), 
              config_name_format.format(op=op), 
              references[idx], 
              to_onnx=True)

Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et0_eta0.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et0_eta1.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et0_eta2.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et0_eta3.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et0_eta4.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et1_eta0.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et1_eta1.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_v9.electronTight.et1_eta2.onnx
Saving ONNX file as models/data17_13TeV_EGAM1_probes_lhm

tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -> 12
The maximum opset needed by this model is only 11.
tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -> 12
The maximum opset needed by this model is only 11.
tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -> 12
The maximum opset needed by this model is only 11.
tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -> 12
The maximum opset needed by this model is only 11.
tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -> 12
The maximum opset needed by this model is only 11.
tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 19 -

In [17]:
import tensorflow as tf
model = best_models[0][0]['model']
tf.keras.utils.plot_model(model, to_file='model_v9_plot.pdf', 
                          show_shapes=True, 
                          show_layer_names=True,
                          show_dtype=False,
                          rankdir='TB',
                          expand_nested=True,
                          dpi=200)

# v9 rg
from saphyra import *
import tensorflow as tf
from tensorflow.keras import layers
input  = layers.Input(shape=(100,), name = 'Input')
dense  = layers.Dense(5, activation='relu', name='dense_layer')(input)
dense  = layers.Dense(1,activation='linear', name='output_for_inference')(dense)
output = layers.Activation('sigmoid', name='output_for_training')(dense)
model = tf.keras.Model(input, output, name = "model")
tf.keras.utils.plot_model(model, to_file='model_v9_rg_plot.pdf', 
                          show_shapes=True, 
                          show_layer_names=True,
                          show_dtype=False,
                          rankdir='TB',
                          expand_nested=True,
                          dpi=200)

input  = layers.Input(shape=(6,), name = 'Input')
dense  = layers.Dense(5, activation='relu', name='dense_layer')(input)
dense  = layers.Dense(1,activation='linear', name='output_for_inference')(dense)
output = layers.Activation('sigmoid', name='output_for_training')(dense)
model = tf.keras.Model(input, output, name = "model")
tf.keras.utils.plot_model(model, to_file='model_v9_ss_plot.pdf', 
                          show_shapes=True, 
                          show_layer_names=True,
                          show_dtype=False,
                          rankdir='TB',
                          expand_nested=True,
                          dpi=200)



TypeError: plot_model() got an unexpected keyword argument 'show_dtype'