In [1]:
import numpy as np
import awkward
import uproot_methods
awkward.version.version

'0.5.1'

In [2]:
nevents = 50000
np.random.seed(42)
counts = np.minimum(np.random.exponential(2, size=nevents).astype(int), 20)

def fix_argpairs(a, s):
    # fixes a bug in awkward, will be obviated soon
    a0 = a._0 - s.starts
    a1 = a._1 - s.starts
    # ... n?
    return awkward.Table({'0': a0, '1': a1})


make_jagged = lambda a: awkward.JaggedArray.fromcounts(counts, a)
nobj = np.sum(counts)
px = np.random.normal(0., 20., size=nobj)
py = np.random.normal(0., 20., size=nobj)
pz = np.sqrt(px**2+py**2)*np.sinh(np.random.uniform(-2.5, 2.5, size=nobj))
pE = np.sqrt(px*px+py*py+pz*pz+0.000511**2)
# turn into jagged arrays
p4 = uproot_methods.TLorentzVectorArray.from_cartesian(make_jagged(px), make_jagged(py), make_jagged(pz), make_jagged(pE))

q = make_jagged(np.random.randint(2, size=nobj)*2-1)
idvar = make_jagged(np.random.normal(size=nobj))

# structured style
objects = make_jagged(awkward.Table({
        'p4': p4.content,
        'q': q.content,
        'idvar': idvar.content,
    }))

In [3]:
# object vs scalar fancy indexing
%timeit p4[p4.counts==2].sum()
%timeit q[q.counts==2].prod()

1000 loops, best of 3: 1.7 ms per loop
100 loops, best of 3: 16.5 ms per loop


In [4]:
%%timeit
# stack two categories
cat1 = ((p4.pt>20.).counts==2) & (q.prod()==-1)
cat2 = ((p4.pt>20.).counts==2) & (q.prod()==1)
cands1 = p4[cat1][:,0]+p4[cat1][:,1]
cands2 = p4[cat2][:,0]+p4[cat2][:,1]
np.hstack([cands1.mass, cands2.mass])

10 loops, best of 3: 26.2 ms per loop


In [5]:
%%timeit
# create only leading candidates
cat = ((idvar>0.1).counts==2) & (q.prod()==-1)
cands = p4[cat][:,0]+p4[cat][:,1]
cands.mass

100 loops, best of 3: 1.95 ms per loop


In [6]:
%%timeit
# create distincts
cat = ((idvar>0.1).counts==2) & (q.prod()==-1)
p = p4[cat].distincts()
(p.at(0) + p.at(1)).mass[:,0]

100 loops, best of 3: 5.51 ms per loop


In [7]:
%%timeit
# create candidates via independent columnar operations
p4_pairs = p4.distincts()
q_pairs = q.distincts()
idvar_pairs = idvar.distincts()
good = (idvar_pairs.at(0) > 0.1) & (idvar_pairs.at(1) > 0.1) & (q_pairs.at(0)*q_pairs.at(1) == -1)
zp4 = p4_pairs.at(0) + p4_pairs.at(1)
zp4.mass[good]

10 loops, best of 3: 106 ms per loop


In [8]:
%%timeit
# create candidates from structure
zcands = objects.distincts()
good = (zcands.at(0)['idvar'] > 0.1) & (zcands.at(1)['idvar'] > 0.1) & (zcands.at(0)['q']*zcands.at(1)['q'] == -1)
zp4 = zcands.at(0)['p4'] + zcands.at(1)['p4']
zp4.mass[good]

10 loops, best of 3: 59.3 ms per loop
