In [1]:
from __future__ import print_function

import numpy as np
import numexpr as ne
import awkward
from awkward import JaggedArray
import numba

from numpy import max,sqrt,log

In [2]:
def numbaize(fstr,varlist):    
    lstr = "lambda %s: %s" % (",".join(varlist), fstr)
    func = eval(lstr)    
    return numba.njit(func)

In [3]:
#extract first line of JEC file, setup function template
jec_fname = '/Users/lagray/Downloads/Fall17_17Nov2017_V32_MC/Fall17_17Nov2017_V32_MC_L1FastJet_AK4Calo.txt'
jec_f = open(jec_fname,'r')
layoutstr = jec_f.readline().strip().strip('{}')
jec_f.close()


name = jec_fname.split('/')[-1].split('.')[0]
print(name)
    
    
layout = layoutstr.split()
if not layout[0].isdigit():
    raise Exception('First column of JEC descriptor must be a digit!')

nBinnedVars = int(layout[0])
nBinColumns = 2*nBinnedVars
nEvalVars   = int(layout[nBinnedVars+1])
formula     = layout[nBinnedVars+nEvalVars+2]
nParms      = 0
while( formula.count('[%i]'%nParms) ): 
    formula = formula.replace('[%i]'%nParms,'p%i'%nParms)
    nParms += 1
#protect function names with vars in them 
funcs_to_cap = ['max','exp']
for f in funcs_to_cap:
    formula = formula.replace(f,f.upper())

templatevars = ['x','y','z','w','t','s']
varnames = [layout[i+nBinnedVars+2] for i in range(nEvalVars)]
for find,replace in zip(templatevars,varnames):
    formula = formula.replace(find,replace)
#restore max
for f in funcs_to_cap:
    formula = formula.replace(f.upper(),f)
nFuncColumns = 2*nEvalVars + nParms
nTotColumns = nFuncColumns + 1

func = numbaize(formula,['p%i'%i for i in range(nParms)]+[varnames[i] for i in range(nEvalVars)])

Fall17_17Nov2017_V32_MC_L1FastJet_AK4Calo


In [4]:
#parse the columns
minMax = ['Min','Max']
columns = []
dtypes = []
offset = 1
for i in range(nBinnedVars):
    columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
    dtypes.extend(['<f8','<f8'])
columns.append('NVars')
dtypes.append('<i8')
offset += nBinnedVars + 1
for i in range(nEvalVars):
    columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
    dtypes.extend(['<f8','<f8'])
for i in range(nParms):
    columns.append('p%i'%i)
    dtypes.append('<f8')
    
pars = np.genfromtxt(jec_fname,
                     dtype=tuple(dtypes),
                     names=tuple(columns),
                     skip_header=1,
                     unpack=True,
                     encoding='ascii'
                     )

print(pars.size)

82


In [5]:
#the first bin is always usual for JECs
#the next bins may vary in number, so they're jagged arrays... yay
bins = {}
offset_col = 0
offset_name = 1
bin_order = []
for i in range(nBinnedVars):
    binMins = None
    binMaxs = None
    if i == 0:
        binMins = np.unique(pars[columns[0]])
        binMaxs = np.unique(pars[columns[1]])
        bins[layout[i+offset_name]] = np.union1d(binMins,binMaxs)
    else:
        counts = np.zeros(0,dtype=np.int)
        allBins = np.zeros(0,dtype=np.double)
        for binMin in bins[bin_order[0]][:-1]: 
            binMins = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col]])
            binMaxs = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col+1]])
            theBins = np.union1d(binMins,binMaxs)            
            allBins = np.append(allBins,theBins)
            counts  = np.append(counts,theBins.size)
        bins[layout[i+offset_name]] = JaggedArray.fromcounts(counts,allBins)    
    bin_order.append(layout[i+offset_name])
    offset_col += 1

In [6]:
#skip nvars to the variable columns
#the columns here define clamps for the variables defined in columns[]
# ----> clamps can be different from bins
# ----> if there is more than one binning variable this array is jagged
# ----> just make it jagged all the time
binshapes = tuple([bins[thebin].size-1 for thebin in bin_order])
clamp_mins = {}
clamp_maxs = {}
var_order = []
offset_col = 2*nBinnedVars+1
offset_name = nBinnedVars + 2
jagged_counts = np.ones(bins[bin_order[0]].size-1,dtype=np.int)
if len(bin_order) > 1:
    jagged_counts = np.maximum(bins[bin_order[1]].counts - 1,0) #need counts-1 since we only care about Nbins
for i in range(nEvalVars):    
    clamp_mins[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col]]))
    clamp_maxs[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col+1]]))
    var_order.append(layout[i+offset_name])
    offset_col += 1

In [7]:
#now get the parameters, which we will look up with the clamps
parms = []
parm_order = []
offset_col = 2*nBinnedVars+1 + 2*nEvalVars
for i in range(nParms):
    parms.append(JaggedArray.fromcounts(jagged_counts,pars[columns[i+offset_col]]))
    parm_order.append('p%i'%(i))

In [8]:
print('bins list     :',bin_order)
print('bins')
print(bins)
print()


print('var list      :',var_order)
print('var clamps LUT')
print(clamp_mins)
print(clamp_maxs)
print()

print('parameter list:',parm_order)
print('parameters LUT')
print(parms)
print()

print('formula       :',formula)

bins list     : ['JetEta']
bins
{'JetEta': array([-5.191, -4.889, -4.716, -4.538, -4.363, -4.191, -4.013, -3.839,
       -3.664, -3.489, -3.314, -3.139, -2.964, -2.853, -2.65 , -2.5  ,
       -2.322, -2.172, -2.043, -1.93 , -1.83 , -1.74 , -1.653, -1.566,
       -1.479, -1.392, -1.305, -1.218, -1.131, -1.044, -0.957, -0.879,
       -0.783, -0.696, -0.609, -0.522, -0.435, -0.348, -0.261, -0.174,
       -0.087,  0.   ,  0.087,  0.174,  0.261,  0.348,  0.435,  0.522,
        0.609,  0.696,  0.783,  0.879,  0.957,  1.044,  1.131,  1.218,
        1.305,  1.392,  1.479,  1.566,  1.653,  1.74 ,  1.83 ,  1.93 ,
        2.043,  2.172,  2.322,  2.5  ,  2.65 ,  2.853,  2.964,  3.139,
        3.314,  3.489,  3.664,  3.839,  4.013,  4.191,  4.363,  4.538,
        4.716,  4.889,  5.191])}

var list      : ['Rho', 'JetPt', 'JetA']
var clamps LUT
{'JetA': <JaggedArray [[0.] [0.] [0.] ... [0.] [0.] [0.]] at 000125c32710>, 'JetPt': <JaggedArray [[1.] [1.] [1.] ... [1.] [1.] [1.]] at 000125c32690>, 'Rho'