In [1]:
from __future__ import print_function
import numpy as np
import numba
import awkward
import uproot_methods

np.random.seed(4)
nrows = 50000

In [2]:
def comb(counts, content):
    out_counts = counts*(counts-1)//2
    out = np.empty((np.sum(out_counts), ) + content.shape[1:])
    i = 0
    n = 0
    for c in counts:
        for i1 in range(i, i+c):
            for i2 in range(i1+1, i+c):
                out[n] = content[i1]+content[i2]
                n += 1
        i += c
    return (out_counts, out)

comb_fast = numba.njit(comb)

def awk_comb(awk):
    c = awk.pairs(same=False)
    return c["0"] + c["1"]

def py_comb(awk):
    counts, content = comb(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

def py_comb_fast(awk):
    counts, content = comb_fast(awk.counts, awk.content)
    return awkward.JaggedArray.fromcounts(counts, content)

In [3]:
nwide = 4
counts = np.minimum(np.random.exponential(2, size=nrows).astype(int), 20)
content = np.random.normal(size=np.sum(counts)*nwide).reshape((-1, nwide))
print(content.shape)

awk_vector = awkward.JaggedArray.fromcounts(counts, content)

(77794, 4)


In [5]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_vector)

%timeit awk_comb(awk_vector)
%timeit py_comb(awk_vector)
%timeit py_comb_fast(awk_vector)
%timeit comb_fast(awk_vector.counts, awk_vector.content)

10 loops, best of 3: 25.8 ms per loop
10 loops, best of 3: 149 ms per loop
100 loops, best of 3: 13.6 ms per loop
100 loops, best of 3: 13.1 ms per loop


In [4]:
content = np.random.normal(size=np.sum(counts))
print(content.shape)

awk_scalar = awkward.JaggedArray.fromcounts(counts, content)

(77794,)


In [7]:
# force pre-compilation of numba functions
_ = py_comb_fast(awk_scalar)

%timeit awk_comb(awk_scalar)
%timeit py_comb(awk_scalar)
%timeit py_comb_fast(awk_scalar)
%timeit comb_fast(awk_scalar.counts, awk_scalar.content)

100 loops, best of 3: 16.3 ms per loop
10 loops, best of 3: 80.3 ms per loop
1000 loops, best of 3: 728 µs per loop
1000 loops, best of 3: 573 µs per loop


In [5]:
pairs = awk_vector[awk_scalar>2].pairs(same=False)
psum = np.sum((pairs["0"]+pairs["1"]).flatten(), axis=1)
psum.shape

(70,)

In [11]:
awk_table = awkward.JaggedArray.fromcounts(counts, awkward.Table({'p4': awk_vector.content, 'mva': awk_scalar.content}))

In [30]:
cands = awk_table.filter(lambda e: e["mva"] > 0.1).pairs(same=False)

In [40]:
cands[cands['0']['mva'] > cands['1']['mva']].apply(lambda p: p['0']['p4'] + p['1']['p4']).flatten()

array([[-0.70347653, -1.86507534,  1.52660793, -0.30144823],
       [-0.30629157, -1.74131868,  2.56904999, -0.64787003],
       [-1.19569064,  0.08437995,  0.44131212,  0.84862772],
       ...,
       [ 2.82717337, -1.65248704, -2.56233534, -0.53740854],
       [ 1.86148622, -0.20414302, -1.52229017,  0.2067014 ],
       [ 1.97859094,  1.14343086,  2.03619349,  3.32994635]])

array([[-0.64924406, -0.19854086, -0.97910599,  1.17957954],
       [ 1.4007266 , -1.80694058, -0.45575892,  1.1740093 ],
       [-0.62125129, -1.55699383,  0.79410251, -0.8170691 ]])

In [None]:
physt.h1(t.array("muonp4")                          # get the muon 4-vectors
          .filter(lambda muon: abs(muon.eta) < 1)   # select central muons (select particles, not events)
          .pairs(same=False)                        # form all non-duplicate pairs
          .apply(lambda a, b: a + b)                # compute Z candidates from as 4-vector sums
          .maxby(lambda z: z.pt)                    # select one per event, the highest pT
          .flatten()                                # flatten [x] → x and [] → nothing (ignore empty events)
          .mass,                                    # compute the masses of what remains
         bins=100).plot()