# Profiling the reader with a realistic example

In [None]:
import ctypes
import time
import glob

import numba
import ROOT
c = ROOT.TCanvas()

The function we'll use as a test is going to make a histogram of Z mass for a large dataset. To keep the histogramming part from slowing down the I/O part, we need direct access to histogram filling.

In [None]:
h = ROOT.TH1I("h", "", 1000, 0, 500)

# make a function (to get a function pointer) that fills h
ROOT.gInterpreter.Declare("void h_Fill(double x) { h->Fill(x); }")

# turn it into an integer and pass it from C++ to Python
ROOT.gInterpreter.Declare("size_t ptr_h_Fill = reinterpret_cast<size_t>(h_Fill);")

# use Python's ctypes to interpret that integer as a callable function
h_Fill = ctypes.CFUNCTYPE(None, ctypes.c_double)(ROOT.ptr_h_Fill)

`fillmany_pyroot` is a Python function that calls a PyROOT proxy.

`fillmany_direct` is a LLVM-compiled function that directly calls `h_Fill`.

In [None]:
def fillmany_pyroot(howmany):
    for i in xrange(howmany):
        h.Fill(250)   # note the dot!

@numba.njit
def fillmany_direct(howmany):
    for i in xrange(howmany):
        h_Fill(250)   # note the underscore!

# call it once to compile it (instead of explicitly spelling out the signature)
fillmany_direct(0)

Compiled code without proxies is faster.

In [None]:
%time fillmany_pyroot(10000000)

In [None]:
%time fillmany_direct(10000000)

It also has the desired effect of filling the histogram.

In [None]:
h.Draw(); c.Draw()

Okay! We're ready to run a sample analysis!

In [None]:
# first clear the histogram
h.Reset(); h.Draw(); c.Draw()

We could do the mass calculation in compiled Python.

Then again, we could do it in ROOT.

In [None]:
ROOT.TLorentzVector  # cause ROOT to dyload the library

ROOT.gInterpreter.Declare("""
TLorentzVector v1, v2;
double mass(double pt1, double eta1, double phi1, double pt2, double eta2, double phi2) {
    v1.SetPtEtaPhiM(pt1, eta1, phi1, 0);
    v2.SetPtEtaPhiM(pt2, eta2, phi2, 0);
    return (v1 + v2).M();
}

size_t ptr_mass = reinterpret_cast<size_t>(mass);
""")

mass = ctypes.CFUNCTYPE(ctypes.c_double,                                    # return type
                        ctypes.c_double, ctypes.c_double, ctypes.c_double,  # parameter types
                        ctypes.c_double, ctypes.c_double, ctypes.c_double
                       )(ROOT.ptr_mass)

Load the first file for testing.

In [None]:
f = ROOT.TFile("/mnt/data/DYJetsToLL_M_50_HT_100to200_13TeV_2/DYJetsToLL_M_50_HT_100to200_13TeV_2_0.root")
t = f.Get("Events")

The following was built up by experimentation on the first file, then wrapped up as a function (with Numba signature).

In [None]:
#           return type     (argument types)
@numba.njit(numba.types.none(numba.types.int32[:],
                             numba.types.float32[:],
                             numba.types.float32[:],
                             numba.types.float32[:]))
def fillbatch(counter, pt, eta, phi):
    muindex = 0
    for entry in range(len(counter)):
        numMuons = counter[entry]              # counter tells us how many muons there are

        for i in range(numMuons):              # example of looping too complex for Numpy or SQL
            mu1 = muindex + i
            for j in range(i + 1, numMuons):
                mu2 = muindex + j
                h_Fill(mass(pt[mu1], eta[mu1], phi[mu1], pt[mu2], eta[mu2], phi[mu2]))

        muindex += numMuons                    # muindex steps forward with non-uniform stride

It's pretty fast.

In [None]:
startTime = time.time()
for start, end, counter, pt, eta, phi in t.GetNumpyIterator("Muon_", "Muon.pt", "Muon.eta", "Muon.phi"):
    fillbatch(counter, pt, eta, phi)
print(time.time() - startTime)

So we do a scan over all the files (21.4 GB). See next page for results.

In [None]:
h.Reset()

startTime = time.time()
totalEvents = 0
totalBytes = 0
totalTime = 0.0
for fname in sorted(glob.glob("/mnt/data/*/*.root")):
    f = ROOT.TFile(fname)
    t = f.Get("Events")

    for start, end, counter, pt, eta, phi in t.GetNumpyIterator("Muon_", "Muon.pt", "Muon.eta", "Muon.phi"):
        fillbatch(counter, pt, eta, phi)
        totalEvents += end - start
        totalBytes += counter.sum() * 4 * 4  # one int32 and three float32

totalTime = time.time() - startTime
ev_per_s = totalEvents/totalTime/1e6
mb_per_s = totalBytes/totalTime/1024**2

In [None]:
print("""\nTotal events: {}, total time: {} s, total size: {} MB
Single-threaded rate: {:.1f} MHz, {:.1f} MB/s""".format(
    totalEvents, totalTime, totalBytes/1024**2, ev_per_s, mb_per_s))

c.SetLogy(); h.Draw(); c.Draw()