# Compare v10: r1 vs r2

This notebook is dedicated to export the v10 model to ONNX and keras versiosn. Usually, keras versions is used into the prometheus framework. The ONNX version will be used into the athena framework.

**NOTE**: ONNX is a Microsoft API for inference.

**NOTE**: We will export only models from r2 derivation.

In [1]:
from saphyra import crossval_table, get_color_fader
import saphyra
import numpy as np
import pandas as pd
import collections
import os
import matplotlib
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'



In [2]:
def create_op_dict(op):
    d = {
              op+'_pd_ref'    : "reference/"+op+"_cutbased/pd_ref#0",
              op+'_fa_ref'    : "reference/"+op+"_cutbased/fa_ref#0",
              op+'_sp_ref'    : "reference/"+op+"_cutbased/sp_ref",
              op+'_pd_val'    : "reference/"+op+"_cutbased/pd_val#0",
              op+'_fa_val'    : "reference/"+op+"_cutbased/fa_val#0",
              op+'_sp_val'    : "reference/"+op+"_cutbased/sp_val",
              op+'_pd_op'     : "reference/"+op+"_cutbased/pd_op#0",
              op+'_fa_op'     : "reference/"+op+"_cutbased/fa_op#0",
              op+'_sp_op'     : "reference/"+op+"_cutbased/sp_op",

              # Counts
              op+'_pd_ref_passed'    : "reference/"+op+"_cutbased/pd_ref#1",
              op+'_fa_ref_passed'    : "reference/"+op+"_cutbased/fa_ref#1",
              op+'_pd_ref_total'     : "reference/"+op+"_cutbased/pd_ref#2",
              op+'_fa_ref_total'     : "reference/"+op+"_cutbased/fa_ref#2",
              op+'_pd_val_passed'    : "reference/"+op+"_cutbased/pd_val#1",
              op+'_fa_val_passed'    : "reference/"+op+"_cutbased/fa_val#1",
              op+'_pd_val_total'     : "reference/"+op+"_cutbased/pd_val#2",
              op+'_fa_val_total'     : "reference/"+op+"_cutbased/fa_val#2",
              op+'_pd_op_passed'     : "reference/"+op+"_cutbased/pd_op#1",
              op+'_fa_op_passed'     : "reference/"+op+"_cutbased/fa_op#1",
              op+'_pd_op_total'      : "reference/"+op+"_cutbased/pd_op#2",
              op+'_fa_op_total'      : "reference/"+op+"_cutbased/fa_op#2",
    }
    return d

tuned_info = collections.OrderedDict( {
              # validation
              "max_sp_val"      : 'summary/max_sp_val',
              "max_sp_pd_val"   : 'summary/max_sp_pd_val#0',
              "max_sp_fa_val"   : 'summary/max_sp_fa_val#0',
              # Operation
              "max_sp_op"       : 'summary/max_sp_op',
              "max_sp_pd_op"    : 'summary/max_sp_pd_op#0',
              "max_sp_fa_op"    : 'summary/max_sp_fa_op#0',
              } )

tuned_info.update(create_op_dict('tight'))
tuned_info.update(create_op_dict('medium'))
tuned_info.update(create_op_dict('loose'))
tuned_info.update(create_op_dict('vloose'))

In [3]:
etbins = [15, 20, 30, 40, 50, 1000000]
etabins = [0.0, 0.8, 1.37, 1.54, 2.37, 2.50]

## 1) Reading all tunings:

Let's select the last tuning for v10 version with:

- r1: Selected topology used during the deep learning chain with seixas: Rings->Conv1D(16,relu)->Conv1D(32,relu)->Flatten()->Dense(32,relu)->Dense(1)->Sigmoid();

- r2: Selected topology after some studies to avoid overfitting: Rings->Conv1D(4,relu)->Conv1D(8,relu)->Flatten()->Dense(16,relu)->Dense(1)->Sigmoid();

**Note**: The r2 tag presents L2 regularization in the last layer.

In [10]:
cv_r1  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )
cv_r1.fill( '/Volumes/castor/tuning_data/Zee/v10/*.r1/*/*.gz', 'v10-r1')

In [11]:
cv_r2  = crossval_table( tuned_info, etbins = etbins , etabins = etabins )
cv_r2.fill( '/Volumes/castor/tuning_data/Zee/v10/*.r2/*/*.gz', 'v10-r2')

## 2) Compare r1 and r2:

In [15]:
best_inits_r1 = cv_r1.filter_inits("max_sp_val")
best_sorts_r1 = cv_r1.filter_sorts( best_inits_r1 , 'max_sp_op')
best_inits_r1.head()

Unnamed: 0,train_tag,et_bin,eta_bin,model_idx,sort,init,file_name,tuned_idx,max_sp_val,max_sp_pd_val,...,vloose_pd_ref_total,vloose_fa_ref_total,vloose_pd_val_passed,vloose_fa_val_passed,vloose_pd_val_total,vloose_fa_val_total,vloose_pd_op_passed,vloose_fa_op_passed,vloose_pd_op_total,vloose_fa_op_total
1,v10-r1,0,0,0,0,1,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.982008,0.984323,...,232819,187639,23016,480,23282,18764,230150,5144,232819,187639
12,v10-r1,0,0,0,1,0,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.981328,0.986599,...,232819,187639,23015,518,23282,18764,230153,4708,232819,187639
15,v10-r1,0,0,0,2,1,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.978441,0.985311,...,232819,187639,23016,644,23282,18764,230152,5032,232819,187639
16,v10-r1,0,0,0,3,0,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.979901,0.986685,...,232819,187639,23015,562,23282,18764,230152,4941,232819,187639
19,v10-r1,0,0,0,4,1,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.980906,0.986341,...,232819,187639,23016,520,23282,18764,230152,4947,232819,187639


In [16]:
best_inits_r2 = cv_r2.filter_inits("max_sp_val")
best_sorts_r2 = cv_r2.filter_sorts( best_inits_r2 , 'max_sp_op')
best_inits_r2.head()

Unnamed: 0,train_tag,et_bin,eta_bin,model_idx,sort,init,file_name,tuned_idx,max_sp_val,max_sp_pd_val,...,vloose_pd_ref_total,vloose_fa_ref_total,vloose_pd_val_passed,vloose_fa_val_passed,vloose_pd_val_total,vloose_fa_val_total,vloose_pd_op_passed,vloose_fa_op_passed,vloose_pd_op_total,vloose_fa_op_total
23,v10-r2,0,0,0,0,3,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.9818,0.989047,...,232819,187639,23015,473,23282,18764,230153,5269,232819,187639
46,v10-r2,0,0,0,1,1,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.980456,0.985654,...,232819,187639,23015,553,23282,18764,230153,5637,232819,187639
2,v10-r2,0,0,0,2,0,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.977895,0.984537,...,232819,187639,23015,674,23282,18764,230152,5631,232819,187639
10,v10-r2,0,0,0,3,3,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.979387,0.98501,...,232819,187639,23015,584,23282,18764,230153,5457,232819,187639
13,v10-r2,0,0,0,4,0,/Volumes/castor/tuning_data/Zee/v10/user.jodaf...,0,0.980104,0.986341,...,232819,187639,23016,556,23282,18764,230153,5523,232819,187639


### 2.1) Make table:

In [19]:
best_inits = pd.concat( [best_inits_r1, best_inits_r2] )
for op in ['tight','medium','loose','vloose']:
    cv_r1.dump_beamer_table( best_inits ,  [op], 'tuning_v10_r1_vs_r2_'+op, title = op+' Tunings (v10): r1 vs r2', tags = ['v10-r1','v10-r2'])