# Set references for v1 tuning using the linear function

This notebook is dedicated to apply the linear correction in the neural network output w.r.t the avgmu. 

**NOTE**: This is an extra study.

**NOTE**: Get all models with 2 neurons.

In [1]:
from kolmov import crossval_table, get_color_fader, fit_table
import saphyra
import numpy as np
import pandas as pd
import collections
import os
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

Welcome to JupyROOT 6.25/01
Using all sub packages with ROOT dependence


In [2]:
etbins = [0.0, 7.0, 10.0, 15.0]
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]

## 1) Reading all tunings:

Since I don't have v6 tuning files available, I will reload the production files and get the models for each bin.

In [3]:
def create_op_dict(op):
    d = {
              op+'_pd_ref'    : "reference/"+op+"_cutbased/pd_ref#0",
              op+'_fa_ref'    : "reference/"+op+"_cutbased/fa_ref#0",
              op+'_sp_ref'    : "reference/"+op+"_cutbased/sp_ref",
              op+'_pd_val'    : "reference/"+op+"_cutbased/pd_val#0",
              op+'_fa_val'    : "reference/"+op+"_cutbased/fa_val#0",
              op+'_sp_val'    : "reference/"+op+"_cutbased/sp_val",
              op+'_pd_op'     : "reference/"+op+"_cutbased/pd_op#0",
              op+'_fa_op'     : "reference/"+op+"_cutbased/fa_op#0",
              op+'_sp_op'     : "reference/"+op+"_cutbased/sp_op",

              # Counts
              op+'_pd_ref_passed'    : "reference/"+op+"_cutbased/pd_ref#1",
              op+'_fa_ref_passed'    : "reference/"+op+"_cutbased/fa_ref#1",
              op+'_pd_ref_total'     : "reference/"+op+"_cutbased/pd_ref#2",
              op+'_fa_ref_total'     : "reference/"+op+"_cutbased/fa_ref#2",
              op+'_pd_val_passed'    : "reference/"+op+"_cutbased/pd_val#1",
              op+'_fa_val_passed'    : "reference/"+op+"_cutbased/fa_val#1",
              op+'_pd_val_total'     : "reference/"+op+"_cutbased/pd_val#2",
              op+'_fa_val_total'     : "reference/"+op+"_cutbased/fa_val#2",
              op+'_pd_op_passed'     : "reference/"+op+"_cutbased/pd_op#1",
              op+'_fa_op_passed'     : "reference/"+op+"_cutbased/fa_op#1",
              op+'_pd_op_total'      : "reference/"+op+"_cutbased/pd_op#2",
              op+'_fa_op_total'      : "reference/"+op+"_cutbased/fa_op#2",
    }
    return d

tuned_info = collections.OrderedDict( {
              # validation
              "max_sp_val"      : 'summary/max_sp_val',
              "max_sp_pd_val"   : 'summary/max_sp_pd_val#0',
              "max_sp_fa_val"   : 'summary/max_sp_fa_val#0',
              # Operation
              "max_sp_op"       : 'summary/max_sp_op',
              "max_sp_pd_op"    : 'summary/max_sp_pd_op#0',
              "max_sp_fa_op"    : 'summary/max_sp_fa_op#0',
              } )

tuned_info.update(create_op_dict('tight'))
tuned_info.update(create_op_dict('medium'))
tuned_info.update(create_op_dict('loose'))
tuned_info.update(create_op_dict('vloose'))

In [4]:
cv_v1  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )
cv_v1.fill(  '/Volumes/castor/tuning_data/Jpsi/v1/r0/*/*/*.gz', 'v1')
best_inits_v1 = cv_v1.filter_inits("max_sp_val")
best_sorts_v1 = cv_v1.filter_sorts( best_inits_v1.loc[best_inits_v1.model_idx==0] , 'max_sp_op')
best_models = cv_v1.get_best_models(best_sorts_v1, remove_last=True)

2021-07-03 13:04:21,146 | Py.crossval_table                       INFO Reading file for v1 tag from /Volumes/castor/tuning_data/Jpsi/v1/r0/*/*/*.gz
2021-07-03 13:04:21,146 | Py.crossval_table                       INFO There are 1500 files for this task...
2021-07-03 13:04:21,146 | Py.crossval_table                       INFO Filling the table... 
2021-07-03 13:04:45,353 | Py.crossval_table                       INFO End of fill step, a pandas DataFrame was created...


## 2) Linear correction:

Here we will set all thresholds to operate as the same pd reference from cut-based using the pileup linear correction strategy. As the classifier efficiency has some "dependence" w.r.t the pileup we adopt the linear adjustment to "fix" the trigger efficiency. Here we will "fix" the neural network threshold w.r.t the pileup. 

### 2.1) Get all PD/Fas values:

Read all reference values from the storage.

In [5]:
# calculate all pd/fa from reference file
ref_path = '/Volumes/castor/cern_data/files/Jpsiee/data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM2.bkg.VProbes_EGAM7.GRL_v97/references/'
ref_path+= 'data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM2.bkg.VProbes_EGAM7.GRL_v97_et{ET}_eta{ETA}.ref.pic.gz'
relax = 0.4
ref_paths = [[ ref_path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(3) ]
ref_matrix = [[ {} for eta in range(5)] for et in range(3)]
references = ['tight_cutbased', 'medium_cutbased' , 'loose_cutbased', 'vloose_cutbased']
from saphyra.core import ReferenceReader
for et_bin in range(3):
    for eta_bin in range(5):
        for name in references:
            refObj = ReferenceReader().load(ref_paths[et_bin][eta_bin])
            pd = refObj.getSgnPassed(name)/refObj.getSgnTotal(name)
            fa = refObj.getBkgPassed(name)/refObj.getBkgTotal(name)
            pd = (1-pd)*relax + pd
            ref_matrix[et_bin][eta_bin][name] = {'pd':pd, 'fa':fa}

### 2.2) Create data generator:

Since each tuning models is fed by a different data organization, we need to create a generator to open the data file, prepare the matrix and apply some pre-processing (if needed).

In [6]:
def generator( path ):
    def norm1( data ):
        norms = np.abs( data.sum(axis=1) )
        norms[norms==0] = 1
        return data/norms[:,None]
    from Gaugi import load
    d = load(path)
    feature_names = d['features'].tolist()

    # How many events?
    n = d['data'].shape[0]
    
    # extract rings
    data_rings = norm1(d['data'][:,1:101])
    target = d['target']
    avgmu = d['data'][:,0]
    
    return [data_rings], target, avgmu

In [7]:
path = '/Volumes/castor/cern_data/files/Jpsiee/data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM2.bkg.VProbes_EGAM7.GRL_v97/'
path+= 'data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM2.bkg.VProbes_EGAM7.GRL_v97_et{ET}_eta{ETA}.npz'
paths = [[ path.format(ET=et,ETA=eta) for eta in range(5)] for et in range(3)]

In [8]:
# create the table class
from ROOT import kBlackBody
ct  = fit_table( generator, etbins , etabins, 0.001, 1.5, 16.5, 45.5, 
                 xmin_percentage=0.05, xmax_percentage=99.95, palette=kBlackBody )

### 2.3) Apply linear correction:

**NOTE**: Take about 25 minutes.

In [9]:
# Fill it
ct.fill(paths, best_models, ref_matrix,
        'correction_v1_probes_lhmedium_EGAM2_vetoProbes_EGAM7',
         except_these_bins = [(0,2),(0,4), (1,2),(1,4),(2,2),(2,4)])


Applying ATLAS style settings...
Fitting... |############################################################| 15/15
Fitting... ... finished task in 180.574907s.


2021-07-03 13:04:49.004181: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


In [10]:
ct.table().head()

Unnamed: 0,name,et_bin,eta_bin,reference_signal_passed,reference_signal_total,reference_signal_eff,reference_background_passed,reference_background_total,reference_background_eff,signal_passed,...,signal_eff,background_passed,background_total,background_eff,signal_corrected_passed,signal_corrected_total,signal_corrected_eff,background_corrected_passed,background_corrected_total,background_corrected_eff
0,tight_cutbased,0,0,28174,28455,0.990132,69344,215903,0.321181,28174,...,0.990125,38708,215903,0.179284,28181,28455,0.990371,42602,215903,0.19732
1,medium_cutbased,0,0,28174,28455,0.990132,69344,215903,0.321181,28174,...,0.990125,38708,215903,0.179284,28181,28455,0.990371,42602,215903,0.19732
2,loose_cutbased,0,0,28078,28455,0.986758,64940,215903,0.300783,28078,...,0.986751,32702,215903,0.151466,28079,28455,0.986786,34927,215903,0.161772
3,vloose_cutbased,0,0,28271,28455,0.993548,72834,215903,0.337346,28271,...,0.993534,48898,215903,0.226481,28292,28455,0.994272,61289,215903,0.283873
4,tight_cutbased,0,1,8512,8736,0.974382,86022,179082,0.48035,8512,...,0.974359,28256,179082,0.157782,8512,8736,0.974359,30129,179082,0.168241


### 2.3) Create beamer report:

In [11]:
ct.dump_beamer_table(ct.table(), best_models, 'data17_13TeV v1 tuning (Jpsiee)', 
                     'correction_v1_data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetoProbes.pdf')

2021-07-03 13:07:48,014 | Py.BeamerTexReportTemplate1             INFO Started creating beamer file correction_v1_data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetoProbes.pdf latex code...


## Export tunings:

In [12]:

model_name_format = 'data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electron{op}.et%d_eta%d'
config_name_format = 'ElectronJpsieeRinger{op}TriggerConfig_RingsOnly.conf'
for idx, op in enumerate(['Tight','Medium','Loose','VeryLoose']):
    ct.export(best_models, 
              model_name_format.format(op=op), 
              config_name_format.format(op=op), 
              references[idx], 
              to_onnx=True)

Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et0_eta0.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et0_eta1.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et0_eta2.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et0_eta3.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et0_eta4.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et1_eta0.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et1_eta1.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhmedium_EGAM7_vetolhvloose.model_v1.electronTight.et1_eta2.onnx
Saving ONNX file as models/data17_13TeV_EGAM2_probes_lhm

Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
Instructions for updating