In [20]:
from __future__ import print_function
import numpy as np
import numba
import awkward
import uproot_methods

JaggedWithLorentz = awkward.Methods.mixin(uproot_methods.classes.TLorentzVector.ArrayMethods, awkward.JaggedArray)

np.random.seed(4)
nrows = 100000

def p4_pt(p4):
    # p4 is [n,4] numpy array
    # returns [n] numpy array
    if isinstance(p4, awkward.array.jagged.JaggedArray):
        outjagged = p4.flatten()
        outjagged = np.sqrt(outjagged[:,0]**2 + outjagged[:,1]**2)
        outjagged = awkward.array.jagged.JaggedArray.fromcounts(p4.counts,outjagged)
        return outjagged
    return np.sqrt(p4[:,0]**2 + p4[:,1]**2)

def p4_eta(p4):
    if isinstance(p4, awkward.array.jagged.JaggedArray):
        temp = p4.flatten()
        p42_temp = temp**2
        p3_temp = np.sqrt(np.sum(p42_temp[:,:3], axis=-1))
        outjagged = temp[:,2]/p3_temp
        outjagged = awkward.array.jagged.JaggedArray.fromcounts(p4.counts,outjagged)
        return outjagged
    
    p42 = p4**2
    p3 = np.sqrt(np.sum(p42[:,:3], axis=-1))
    return p4[:,2]/p3

In [21]:
def comb(counts, content):    
    out_counts = counts*(counts-1)//2
    out = np.empty((np.sum(out_counts), ) + content.shape[1:])
    i = 0
    n = 0
    for c in counts:
        for i1 in range(i, i+c):
            for i2 in range(i1+1, i+c):
                out[n] = content[i1]+content[i2]
                n += 1
        i += c
    return (out_counts, out)

comb_fast = numba.njit(comb)

def awk_comb(awk):
    c = awk.pairs(same=False)
    return c["0"] + c["1"]

def py_comb(awk):
    counts, content = comb(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

def py_comb_fast(awk):
    counts, content = comb_fast(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

In [23]:
nwide = 4
counts = np.minimum(np.random.exponential(2, size=nrows).astype(int), 20)
content = np.random.normal(size=np.sum(counts)*nwide).reshape((-1, nwide))
print(content.shape)

#awk_vector = JaggedWithLorentz.fromcounts(counts, uproot_methods.TLorentzVectorArray(content[:,0],
#                                                                                     content[:,1],
#                                                                                     content[:,2],
#                                                                                     content[:,3]))
awk_vector = awkward.JaggedArray.fromcounts(counts,content)


print(awk_vector)


(155044, 4)
[[] [[ 1.04207277  0.1267892   0.92282871  0.09448219]
 [-0.29999851  0.27206072 -0.0161241  -0.57967003]
 [ 1.30875069 -0.05433927 -0.05991065  1.32216588]
 [-0.64690012  2.15030163 -0.01200628 -1.4149374 ]] [] ... [[-0.88807425  0.48696454 -0.85691186 -1.28675248]
 [-0.45752417  0.71110524 -0.98889041  1.74406337]
 [-0.06262356  0.02991623  0.64694375  1.55028643]] [[ 1.37035845  0.77993088  0.30737432 -0.44359613]] []]


In [24]:
print(awk_vector,'\n\n', awkward.array.jagged.JaggedArray)

[[] [[ 1.04207277  0.1267892   0.92282871  0.09448219]
 [-0.29999851  0.27206072 -0.0161241  -0.57967003]
 [ 1.30875069 -0.05433927 -0.05991065  1.32216588]
 [-0.64690012  2.15030163 -0.01200628 -1.4149374 ]] [] ... [[-0.88807425  0.48696454 -0.85691186 -1.28675248]
 [-0.45752417  0.71110524 -0.98889041  1.74406337]
 [-0.06262356  0.02991623  0.64694375  1.55028643]] [[ 1.37035845  0.77993088  0.30737432 -0.44359613]] []] 

 <class 'awkward.array.jagged.JaggedArray'>


In [25]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_vector)

#%timeit awk_comb(awk_vector)
#%timeit py_comb(awk_vector)
#%timeit py_comb_fast(awk_vector)
#%timeit comb_fast(awk_vector.counts, awk_vector.content)

In [26]:
import weights as weights
import pickle
import zlib
import numpy as np

# import a bunch of correction histograms
weightsext = weights.extractor()
correctionDescriptions = open("newCorrectionFiles.txt").readlines()
weightsext.add_weight_sets(correctionDescriptions)
weightsext.finalize()
weights_eval = weightsext.make_evaluator()

In [27]:
content = np.random.normal(size=np.sum(counts))
print(content.shape)

awk_scalar = awkward.JaggedArray.fromcounts(counts, content)

#now calculate the SFs
pts = p4_pt(awk_vector)
etas = p4_eta(awk_vector)
awk_scalefacts = weights_eval["phoScaleFactor_MediumId_POG"](etas,pts)

print(np.sum(awk_scalar.counts),' ', np.sum(awk_scalefacts.counts))

(155044,)
[<JaggedArray [[] [ 0.66024152 -0.03978214 -0.0456898  -0.00534674] [] ... [-0.64590175 -0.76003238  0.99429498] [0.19133867] []] at 000619800b90>, <JaggedArray [[] [1.04975767 0.40498907 1.30987829 2.24550148] [] ... [1.01282295 0.84557615 0.06940239] [1.57676075] []] at 000619800c50>]

0 <class 'awkward.array.jagged.JaggedArray'>
<type 'numpy.ndarray'>
1 <class 'awkward.array.jagged.JaggedArray'>
<type 'numpy.ndarray'>
[-2.5   -2.    -1.566 -1.444 -0.8    0.     0.8    1.444  1.566  2.
  2.5  ] (5, 10)
[ 20.  35.  50.  90. 150. 500.] (5, 10)
[[] [0.96595746 0.9631728  0.9631728  0.9631728 ] [] ... [0.9631728  0.9631728  0.97834915] [0.96595746] []]

155044   155044


In [8]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_scalar)

#%timeit awk_comb(awk_scalar)
#%timeit py_comb(awk_scalar)
#%timeit py_comb_fast(awk_scalar)
#%timeit comb_fast(awk_scalar.counts, awk_scalar.content)

In [9]:
pairs = awk_vector[awk_scalar>2].pairs(same=False)
psum = np.sum((pairs["0"]+pairs["1"]).flatten(), axis=1)
psum.shape

(70,)

In [13]:
awk_table = awkward.JaggedArray.fromcounts(counts, awkward.Table({'p4': awk_vector.content, 
                                                                  'mva': awk_scalar.content,
                                                                  'SF': awk_scalefacts.content}))
print(type(awk_table))

<class 'awkward.array.jagged.JaggedArray'>


In [19]:
cands = awk_table.filter(lambda e: e["mva"] > 0.1).pairs(same=False)
print(cands['0']['p4'])
print(cands['0']['SF'])
print(cands['1']['SF'])
print(cands['0']['SF']*cands['1']['SF'])

[[[ 0.08844541 -1.66709033 -0.39556349  1.08641194]] [] [] ... [] [] []]
[[0.9631728] [] [] ... [] [] []]
[[0.9744624] [] [] ... [] [] []]
[[0.9385757] [] [] ... [] [] []]


In [None]:
out = cands[cands['0']['mva'] > cands['1']['mva']].apply(lambda p: p['0']['p4'] + p['1']['p4'])
out_flat = out.flatten()

In [None]:
print(cands["0"]["p4"].flatten())
print(type(out_flat))
pts_flat = p4_pt(cands["0"]["p4"].flatten())
etas_firsts_flat = p4_eta(cands["0"]["p4"].flatten())
pts_seconds_flat = p4_pt(cands["1"]["p4"].flatten())
etas_seconds_flat = p4_eta(cands["1"]["p4"].flatten())
print('try flat eval')
weights_flat = weights_eval["phoScaleFactor_MediumId_POG"](etas_flat,pts_flat)
print('try jagged eval')

print('pts',type(pts),'\n',pts_flat,'\n',pts.counts,'\n',pts.flatten())
print('etas',type(etas),'\n',etas_flat,'\n',etas.counts,'\n',etas.flatten())
print('flat weights:\n',type(weights_flat),'\n',weights_flat)
print('jagged weights:\n',type(weights_jagged),'\n',weights_jagged.flatten())

In [None]:
physt.h1(t.array("muonp4")                          # get the muon 4-vectors
          .filter(lambda muon: abs(muon.eta) < 1)   # select central muons (select particles, not events)
          .pairs(same=False)                        # form all non-duplicate pairs
          .apply(lambda a, b: a + b)                # compute Z candidates from as 4-vector sums
          .maxby(lambda z: z.pt)                    # select one per event, the highest pT
          .flatten()                                # flatten [x] → x and [] → nothing (ignore empty events)
          .mass,                                    # compute the masses of what remains
         bins=100).plot()