## Imports and Declarations

In [1]:
import os
import sys
import logging
from datetime import datetime
from kolmov import crossval_table, get_color_fader, fit_table
from saphyra.core import ReferenceReader
import numpy as np
import pandas as pd
import collections
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from copy import deepcopy
from argparse import ArgumentParser
from tensorflow import keras
from collections import OrderedDict

Welcome to JupyROOT 6.16/00
Using all sub packages with ROOT dependence


In [2]:
RINGS_LAYERS = OrderedDict(
    presample = list(range(0,8)),
    em1 = list(range(8,72)),
    em2 = list(range(72, 80)),
    em3 = list(range(80,88)),
    had1 = list(range(88,92)),
    had2 = list(range(92,96)),
    had3 = list(range(96,100)),
)

def create_op_dict(op):
    d = {
              op+'_pd_ref'    : "reference/"+op+"_cutbased/pd_ref#0",
              op+'_fa_ref'    : "reference/"+op+"_cutbased/fa_ref#0",
              op+'_sp_ref'    : "reference/"+op+"_cutbased/sp_ref",
              op+'_pd_val'    : "reference/"+op+"_cutbased/pd_val#0",
              op+'_fa_val'    : "reference/"+op+"_cutbased/fa_val#0",
              op+'_sp_val'    : "reference/"+op+"_cutbased/sp_val",
              op+'_pd_op'     : "reference/"+op+"_cutbased/pd_op#0",
              op+'_fa_op'     : "reference/"+op+"_cutbased/fa_op#0",
              op+'_sp_op'     : "reference/"+op+"_cutbased/sp_op",

              # Counts
              op+'_pd_ref_passed'    : "reference/"+op+"_cutbased/pd_ref#1",
              op+'_fa_ref_passed'    : "reference/"+op+"_cutbased/fa_ref#1",
              op+'_pd_ref_total'     : "reference/"+op+"_cutbased/pd_ref#2",
              op+'_fa_ref_total'     : "reference/"+op+"_cutbased/fa_ref#2",
              op+'_pd_val_passed'    : "reference/"+op+"_cutbased/pd_val#1",
              op+'_fa_val_passed'    : "reference/"+op+"_cutbased/fa_val#1",
              op+'_pd_val_total'     : "reference/"+op+"_cutbased/pd_val#2",
              op+'_fa_val_total'     : "reference/"+op+"_cutbased/fa_val#2",
              op+'_pd_op_passed'     : "reference/"+op+"_cutbased/pd_op#1",
              op+'_fa_op_passed'     : "reference/"+op+"_cutbased/fa_op#1",
              op+'_pd_op_total'      : "reference/"+op+"_cutbased/pd_op#2",
              op+'_fa_op_total'      : "reference/"+op+"_cutbased/fa_op#2",
    }
    return d

In [3]:
def generator(path):
        
    def norm1( data ):
        norms = np.abs( data.sum(axis=1) )
        norms[norms==0] = 1
        return data/norms[:,None]

    from Gaugi import load
    import numpy as np
    d = load(path)
    data = norm1(d['data'][:,1:101])
    
    target =  d['target']
    avgmu = d['data'][:, 0]
    data_rings = list()
    for layer_name, layer_idxs in RINGS_LAYERS.items():
        data_rings.append(data[:,layer_idxs])

    return data_rings, target, avgmu

def add_extra_bin_models(best_models: list):
    best_models.append(list())
    for eta in range(5):
        best_models[5].append({key: deepcopy(value) for key, value in best_models[4][eta].items() if key != 'model'})
        cloned_model = keras.models.clone_model(best_models[4][eta]['model'])
        cloned_model.set_weights(best_models[4][eta]['model'].get_weights())
        best_models[5][-1]['model'] = cloned_model
        best_models[5][-1]['etBinIdx'] = 5
        best_models[5][-1]['etBin'][0] = 100
        best_models[4][eta]['etBin'][-1] = 100

## Arguments

In [4]:
dataset = "data17_13TeV.AllPeriods.sgn.probes_lhmedium_EGAM1.bkg.VProbes_EGAM7.GRL_v97.30bins"
model_path = "/home/lucas.nunes/workspace/ringer_tunings/versions/vInception/v2/jobs_dev01/output"
output_dir = "/home/lucas.nunes/workspace/ringer_tunings/versions/vInception/v2/jobs_dev01/output_fitted"
extra_bin = True
model_version = "vInceptionPerLayer.100"

In [5]:
etbins = [15, 20, 30, 40, 50, 1000000]
n_ets = len(etbins)-1
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]
n_etas = len(etabins)-1
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [6]:
start_time = datetime.today().strftime('%Y_%m_%d_%H_%M_%S')
log_name = 'decision_threshold_fit_log'
logger = logging.getLogger(log_name)
logger.setLevel(logging.INFO)
log_filename = f'{start_time}_{log_name}_pid_{os.getpid()}.log'
file_handler = logging.FileHandler(log_filename, mode='a+')
stdout_handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s;%(message)s')
file_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(stdout_handler)

In [7]:
tuned_info = collections.OrderedDict( {
              # validation
              "max_sp_val"      : 'summary/max_sp_val',
              "max_sp_pd_val"   : 'summary/max_sp_pd_val#0',
              "max_sp_fa_val"   : 'summary/max_sp_fa_val#0',
              # Operation
              "max_sp_op"       : 'summary/max_sp_op',
              "max_sp_pd_op"    : 'summary/max_sp_pd_op#0',
              "max_sp_fa_op"    : 'summary/max_sp_fa_op#0',
              } )

tuned_info.update(create_op_dict('tight'))
tuned_info.update(create_op_dict('medium'))
tuned_info.update(create_op_dict('loose'))
tuned_info.update(create_op_dict('vloose'))

## Reading tunings and selecting the best ones

In [8]:
logger.info('Reading crossval table')
cv  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )
cv.fill(model_path, model_version)
cv.table().to_csv(os.path.join(output_dir, 'cross_val_table.csv'))
logger.info('Dumped crossval table')
best_inits = cv.filter_inits("max_sp_val")  #Selects best the best modelo from each init from each fold
best_inits.to_csv(os.path.join(output_dir, 'best_inits_table.csv'))
logger.info('Dumped best inits table')
best_sorts = cv.filter_sorts( best_inits , 'max_sp_op') #Selects the best init from all folds
best_sorts.to_csv(os.path.join(output_dir, 'best_sorts.csv'))
logger.info('Dumped best sorts table')
best_models = cv.get_best_models(best_sorts, remove_last=True)  #Loads the best models and removes the activation layer
logger.info('Loaded best models')
if extra_bin:
    logger.info('Adding extra bin to models')
    add_extra_bin_models(best_models)
    logger.info('Added extra bin to models')

2023-03-05 17:15:06,274;Reading crossval table
2023-03-05 17:15:23,768;Dumped crossval table
2023-03-05 17:15:23,818;Dumped best inits table
2023-03-05 17:15:23,829;Dumped best sorts table
2023-03-05 17:15:29,140;Loaded best models
2023-03-05 17:15:29,143;Adding extra bin to models
2023-03-05 17:15:30,144;Added extra bin to models
2023-03-05 17:15:06,550 | Py.crossval_table                       INFO Reading file for vInceptionPerLayer.100 tag from /home/lucas.nunes/workspace/ringer_tunings/versions/vInception/v2/jobs_dev01/output
2023-03-05 17:15:06,550 | Py.crossval_table                       INFO There are 1250 files for this task...
2023-03-05 17:15:06,550 | Py.crossval_table                       INFO Filling the table... 
2023-03-05 17:15:23,654 | Py.crossval_table                       INFO End of fill step, a pandas DataFrame was created...


In [9]:
for et_row in best_models:
    print(len(et_row))

5
5
5
5
5
5


## Loading reference files

In [10]:
homepath = os.path.expanduser('~')
datapath = os.path.join(homepath, 'data', dataset)
refpath = os.path.join(datapath, 'references')
ref_filepath = os.path.join(refpath, dataset + '_et{ET}_eta{ETA}.ref.pic.gz')
ref_ets = n_ets+1 if extra_bin else n_ets
ref_matrix = [[ {} for eta in range(n_etas)] for et in range(ref_ets)]
references = ['tight_cutbased', 'medium_cutbased' , 'loose_cutbased', 'vloose_cutbased']
for et_bin in range(ref_ets):
    for eta_bin in range(n_etas):
        for name in references:
            logger.info(f'Loading reference for et bin:{et_bin} eta bin:{eta_bin} name {name}')
            refObj = ReferenceReader().load(ref_filepath.format(ET=et_bin,ETA=eta_bin))
            _pd = refObj.getSgnPassed(name)/refObj.getSgnTotal(name)
            fa = refObj.getBkgPassed(name)/refObj.getBkgTotal(name)
            ref_matrix[et_bin][eta_bin][name] = {'pd':_pd, 'fa':fa, 'pd_epsilon':0}

2023-03-05 17:15:30,173;Loading reference for et bin:0 eta bin:0 name tight_cutbased
2023-03-05 17:15:30,176;Loading reference for et bin:0 eta bin:0 name medium_cutbased
2023-03-05 17:15:30,179;Loading reference for et bin:0 eta bin:0 name loose_cutbased
2023-03-05 17:15:30,181;Loading reference for et bin:0 eta bin:0 name vloose_cutbased
2023-03-05 17:15:30,183;Loading reference for et bin:0 eta bin:1 name tight_cutbased
2023-03-05 17:15:30,186;Loading reference for et bin:0 eta bin:1 name medium_cutbased
2023-03-05 17:15:30,189;Loading reference for et bin:0 eta bin:1 name loose_cutbased
2023-03-05 17:15:30,191;Loading reference for et bin:0 eta bin:1 name vloose_cutbased
2023-03-05 17:15:30,194;Loading reference for et bin:0 eta bin:2 name tight_cutbased
2023-03-05 17:15:30,197;Loading reference for et bin:0 eta bin:2 name medium_cutbased
2023-03-05 17:15:30,198;Loading reference for et bin:0 eta bin:2 name loose_cutbased
2023-03-05 17:15:30,201;Loading reference for et bin:0 eta b

## Fitting thresholds

In [11]:
fit_etbins = etbins.copy()
fit_etabins = etabins.copy()
if extra_bin:
    fit_etbins.insert(-1, 100)
data_filepath = os.path.join(datapath, dataset + '_et{ET}_eta{ETA}.npz')
paths = [[ data_filepath.format(ET=et,ETA=eta) for eta in range(len(fit_etabins)-1)] for et in range(len(fit_etbins)-1)]
ct  = fit_table(generator, fit_etbins , fit_etabins, 0.02, 0.5, 16, 60, xmin_percentage=0.05, xmax_percentage=99.95)
fit_name = f'correction_{model_version}_{dataset}'
len(paths), len(best_models), len(ref_matrix), len(fit_name)

(6, 6, 6, 116)

In [None]:
logger.info('Starting fitting thresholds')
ct.fill(paths, best_models, ref_matrix, fit_name)
logger.info('Finished fitting thresholds')
fit_table = ct.table()
fit_table.to_csv(os.path.join(output_dir, 'threshold_table.csv'))
logger.info('Dumped fitting table')
ct.dump_beamer_table(ct.table(), best_models, f'{dataset} {model_version} tuning', 
    fit_name + '.pdf')
logger.info('Dumped beamer table')

2023-03-05 17:15:30,577;Starting fitting thresholds

Applying ATLAS style settings...
Fitting... |------------------------------------------------------------| 0/30

  "Even though the tf.config.experimental_run_functions_eagerly "


Fitting... |####################################------------------------| 18/30

In [None]:
best_models[0][0]

## Exporting

In [None]:
config_name_format = 'ElectronRinger{op}TriggerConfig.conf'
for idx, op in enumerate(['Tight','Medium','Loose','VeryLoose']):
    logger.info(f'Exporting model {op}')
    model_name_format = f'data17_13TeV_EGAM1_probes_lhmedium_EGAM7_vetolhvloose.model_{model_version}.electron{op}' + '.et%d_eta%d'
    config_name_format = f'ElectronRinger{op}TriggerConfig.conf'
    ct.export(best_models, 
              model_name_format, 
              config_name_format, 
              references[idx], 
              to_onnx='new')
logger.info('Finished execution')