In [1]:
from __future__ import print_function
import numpy as np
import numba
import awkward
import uproot_methods

JaggedWithLorentz = awkward.Methods.mixin(uproot_methods.classes.TLorentzVector.ArrayMethods, awkward.JaggedArray)

np.random.seed(4)
nrows = 100000

def p4_pt(p4):
    # p4 is [n,4] numpy array
    # returns [n] numpy array
    if isinstance(p4, awkward.array.jagged.JaggedArray):
        outjagged = p4.flatten()
        outjagged = np.sqrt(outjagged[:,0]**2 + outjagged[:,1]**2)
        outjagged = awkward.array.jagged.JaggedArray.fromcounts(p4.counts,outjagged)
        return outjagged
    return np.sqrt(p4[:,0]**2 + p4[:,1]**2)

def p4_eta(p4):
    if isinstance(p4, awkward.array.jagged.JaggedArray):
        temp = p4.flatten()
        p42_temp = temp**2
        p3_temp = np.sqrt(np.sum(p42_temp[:,:3], axis=-1))
        outjagged = temp[:,2]/p3_temp
        outjagged = awkward.array.jagged.JaggedArray.fromcounts(p4.counts,outjagged)
        return outjagged
    
    p42 = p4**2
    p3 = np.sqrt(np.sum(p42[:,:3], axis=-1))
    return p4[:,2]/p3

In [2]:
def comb(counts, content):    
    out_counts = counts*(counts-1)//2
    out = np.empty((np.sum(out_counts), ) + content.shape[1:])
    i = 0
    n = 0
    for c in counts:
        for i1 in range(i, i+c):
            for i2 in range(i1+1, i+c):
                out[n] = content[i1]+content[i2]
                n += 1
        i += c
    return (out_counts, out)

comb_fast = numba.njit(comb)

def awk_comb(awk):
    c = awk.pairs(same=False)
    return c["0"] + c["1"]

def py_comb(awk):
    counts, content = comb(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

def py_comb_fast(awk):
    counts, content = comb_fast(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

In [3]:
nwide = 4
counts = np.minimum(np.random.exponential(2, size=nrows).astype(int), 20)
content = np.random.normal(size=np.sum(counts)*nwide).reshape((-1, nwide))
print(content.shape)

#awk_vector = JaggedWithLorentz.fromcounts(counts, uproot_methods.TLorentzVectorArray(content[:,0],
#                                                                                     content[:,1],
#                                                                                     content[:,2],
#                                                                                     content[:,3]))
awk_vector = awkward.JaggedArray.fromcounts(counts,content)


print(awk_vector)


(154681, 4)
[[[ 0.82966453 -0.6493784   0.4638781   1.08256688]
 [ 0.42353076 -0.3056094  -1.89587598 -1.06317631]
 [ 0.00435886 -0.25589477 -0.41163191  0.22379419]
 [-0.22135075  0.44120887 -0.61399284  0.2242599 ]
 [ 0.51644472  0.25011381  1.05840583 -0.31042362]
 [-1.36356791 -1.50411469  0.20415976  0.27091941]] [[-0.55307481  0.52722201  0.2501546  -0.23382642]] [[-0.94890447  0.54689286 -0.669688    1.4998046 ]
 [ 0.11349835  2.47502909 -0.48507223 -1.38820333]
 [-1.18270866 -0.08598209  0.18268521  0.55910591]
 [-0.57820059 -0.90292866 -0.0556341  -0.46287223]
 [-0.05197759 -0.16675332 -1.39198953  0.27531389]
 [ 0.09409506 -0.67674997  0.22688327  0.39173521]
 [ 0.38476455  2.45429354 -0.43685351  1.45636854]] ... [] [] [[-0.40646968 -3.61691334  2.33074252  0.33312046]
 [ 1.38970752  0.52335243 -0.71255895 -0.3382541 ]
 [-1.2427996   1.70273496 -0.22219555 -0.13674452]
 [-1.16540572 -1.41314466  0.1183768   0.74704469]
 [ 2.08417771 -1.23632849 -0.13528222 -0.16965481]
 [ 2.

In [4]:
print(awk_vector,'\n\n', awkward.array.jagged.JaggedArray)

[[[ 0.82966453 -0.6493784   0.4638781   1.08256688]
 [ 0.42353076 -0.3056094  -1.89587598 -1.06317631]
 [ 0.00435886 -0.25589477 -0.41163191  0.22379419]
 [-0.22135075  0.44120887 -0.61399284  0.2242599 ]
 [ 0.51644472  0.25011381  1.05840583 -0.31042362]
 [-1.36356791 -1.50411469  0.20415976  0.27091941]] [[-0.55307481  0.52722201  0.2501546  -0.23382642]] [[-0.94890447  0.54689286 -0.669688    1.4998046 ]
 [ 0.11349835  2.47502909 -0.48507223 -1.38820333]
 [-1.18270866 -0.08598209  0.18268521  0.55910591]
 [-0.57820059 -0.90292866 -0.0556341  -0.46287223]
 [-0.05197759 -0.16675332 -1.39198953  0.27531389]
 [ 0.09409506 -0.67674997  0.22688327  0.39173521]
 [ 0.38476455  2.45429354 -0.43685351  1.45636854]] ... [] [] [[-0.40646968 -3.61691334  2.33074252  0.33312046]
 [ 1.38970752  0.52335243 -0.71255895 -0.3382541 ]
 [-1.2427996   1.70273496 -0.22219555 -0.13674452]
 [-1.16540572 -1.41314466  0.1183768   0.74704469]
 [ 2.08417771 -1.23632849 -0.13528222 -0.16965481]
 [ 2.68699331  0.

In [5]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_vector)

#%timeit awk_comb(awk_vector)
#%timeit py_comb(awk_vector)
#%timeit py_comb_fast(awk_vector)
#%timeit comb_fast(awk_vector.counts, awk_vector.content)

In [6]:
import weights as weights
import pickle
import zlib
import numpy as np

# import a bunch of correction histograms
weightsext = weights.extractor()
correctionDescriptions = open("newCorrectionFiles.txt").readlines()
weightsext.add_weight_sets(correctionDescriptions)
weightsext.finalize()
weights_eval = weightsext.make_evaluator()

In [7]:
content = np.random.normal(size=np.sum(counts))
print(content.shape)

awk_scalar = awkward.JaggedArray.fromcounts(counts, content)

#now calculate the SFs
pts = p4_pt(awk_vector)
etas = p4_eta(awk_vector)
awk_scalefacts = weights_eval["phoScaleFactor_MediumId_POG"](etas,pts)

print(np.sum(awk_scalar.counts),' ', np.sum(awk_scalefacts.counts))

(154681,)
[<JaggedArray [[ 0.40295833 -0.96408658 -0.84923668 -0.77936412  0.87911131  0.10005716] [0.31113369] [-0.52166869 -0.19213312  0.1522606  -0.05181846 -0.99221919  0.31513985 -0.17319045] ... [] [] [ 0.5392747  -0.43261533 -0.10482289  0.06449198 -0.0557392   0.00185873]] at 00061543a390>, <JaggedArray [[1.05358224 0.52227905 0.25593189 0.49362072 0.57382233 2.03019173] [0.76410392] [1.09522212 2.47763009 1.18582995 1.07219219 0.17466637 0.68326012 2.48427062] ... [] [] [3.63968126 1.48498645 2.10804583 1.8317064  2.42328389 2.77460912]] at 0006141bce90>]

0 <class 'awkward.array.jagged.JaggedArray'>
<type 'numpy.ndarray'>
1 <class 'awkward.array.jagged.JaggedArray'>
<type 'numpy.ndarray'>
[-2.5   -2.    -1.566 -1.444 -0.8    0.     0.8    1.444  1.566  2.
  2.5  ] (5, 10)
[ 20.  35.  50.  90. 150. 500.] (5, 10)
[[0.96595746 0.9744624  0.9744624  0.9631728  0.97834915 0.96595746] [0.96595746] [0.9631728  0.9631728  0.96595746 0.9631728  0.9744624  0.96595746 0.9631728 ] ... [

In [8]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_scalar)

#%timeit awk_comb(awk_scalar)
#%timeit py_comb(awk_scalar)
#%timeit py_comb_fast(awk_scalar)
#%timeit comb_fast(awk_scalar.counts, awk_scalar.content)

In [9]:
pairs = awk_vector[awk_scalar>2].pairs(same=False)
psum = np.sum((pairs["0"]+pairs["1"]).flatten(), axis=1)
psum.shape

(120,)

In [10]:
awk_table = awkward.JaggedArray.fromcounts(counts, awkward.Table({'p4': awk_vector.content, 
                                                                  'mva': awk_scalar.content,
                                                                  'SF': awk_scalefacts.content}))

In [11]:
print(type(awk_table))

sel_muons1 = awk_table.filter(lambda e: e["mva"] > 0.1)
sel_muons2 = awk_table.filter(lambda e: e["mva"] > 0.1)
sel_muons3 = awk_table.filter(lambda e: e["mva"] > 0.1)

cross_test2 = sel_muons1.cross(sel_muons2)
cross_test2 = cross_test2.filter(lambda e: e['0'] != e['1'])

cross_test3 = sel_muons1.cross(sel_muons2).cross(sel_muons3)

print(sel_muons1,'\n',sel_muons2,'\n',sel_muons3)
print(cross_test2)
print(cross_test3)

<class 'awkward.array.jagged.JaggedArray'>


ValueError: operands could not be broadcast together with shapes (171936,) (171936,4) (171936,) 

In [None]:
cands = awk_table.filter(lambda e: e["mva"] > 0.1).pairs(same=False)
print(cands['0']['p4'])
print(cands['0']['SF'])
print(cands['1']['SF'])
print(cands['0']['SF']*cands['1']['SF'])

In [None]:
out = cands[cands['0']['mva'] > cands['1']['mva']].apply(lambda p: p['0']['p4'] + p['1']['p4'])
out_flat = out.flatten()

In [None]:
print(cands["0"]["p4"].flatten())
print(type(out_flat))
pts_flat = p4_pt(cands["0"]["p4"].flatten())
etas_firsts_flat = p4_eta(cands["0"]["p4"].flatten())
pts_seconds_flat = p4_pt(cands["1"]["p4"].flatten())
etas_seconds_flat = p4_eta(cands["1"]["p4"].flatten())
print('try flat eval')
weights_flat = weights_eval["phoScaleFactor_MediumId_POG"](etas_flat,pts_flat)
print('try jagged eval')

print('pts',type(pts),'\n',pts_flat,'\n',pts.counts,'\n',pts.flatten())
print('etas',type(etas),'\n',etas_flat,'\n',etas.counts,'\n',etas.flatten())
print('flat weights:\n',type(weights_flat),'\n',weights_flat)
print('jagged weights:\n',type(weights_jagged),'\n',weights_jagged.flatten())

In [None]:
physt.h1(t.array("muonp4")                          # get the muon 4-vectors
          .filter(lambda muon: abs(muon.eta) < 1)   # select central muons (select particles, not events)
          .pairs(same=False)                        # form all non-duplicate pairs
          .apply(lambda a, b: a + b)                # compute Z candidates from as 4-vector sums
          .maxby(lambda z: z.pt)                    # select one per event, the highest pT
          .flatten()                                # flatten [x] → x and [] → nothing (ignore empty events)
          .mass,                                    # compute the masses of what remains
         bins=100).plot()