# Profiling on a fast disk

(This is why we're using an AWS instance; it's an `i2.2xlarge` with a large, fast SSD.)

In [None]:
import time
import glob

import numpy
import ROOT

Start with some large files.

In [None]:
# filelist = sorted(glob.glob("/mnt/data/SingleElectronRun2015D_16Dec2015_v1/SingleElectron/CRAB3/160418_185159/000*/*.root"))
filelist = sorted(glob.glob("/mnt/data/SingleElectron_uncompressed/000*/*.root"))
fname = filelist[0]

In [None]:
f = ROOT.TFile(fname)
t = f.Get("Events")

Take all 4-byte branches of electrons (data were triggered on electrons).

In [None]:
branches = ["Electron_"]
for base in "Electron", "Info":    
    for b in t.GetBranch(base).GetListOfBranches():
        try:
            if t.GetNumpyIteratorInfo(b)[0][1].itemsize == 4:   # if the type has 4 bytes
                branches.append(b.GetName())                    # put it in the list
        except ValueError:                                      # unless it's an unhandled type
            pass                                                # (such as std::bitset)

In [None]:
len(branches)

In [None]:
set(x[1] for x in t.GetNumpyIteratorInfo(*branches))

Maybe you want to (X) out of presentation mode to see the output better.

In [None]:
totalEvents = 0
totalBytes = 0
startTime = time.time()

for findex, fname in enumerate(filelist):
    f = ROOT.TFile(fname)
    t = f.Get("Events")

    for stuff in t.GetNumpyIterator(*branches, return_new_buffers=False):
        start, end = stuff[:2]
        data = stuff[2:]
        totalEvents += end - start
        totalBytes += sum(4 * len(x) for x in data)        # all the branches are 4 bytes wide

    if findex % 10 == 0:                                   # print out every 10 files
        elapsedTime = time.time() - startTime
        ev_per_s = totalEvents / elapsedTime
        mb_per_s = totalBytes / elapsedTime / 1024**2
        print("events: {}\ttime: {}\trate: {} kHz, {} MB/s".format(
            totalEvents, elapsedTime, ev_per_s*1e-3, mb_per_s))

   * If we do this over recently accessed data, we see the effect of Linux file-page caching (scaling up to 100 MB/s).
   * If we let it run for a while, the starting rate of 45 MB/s sinks to 15 MB/s or less.
   * This slow-down is not "cured" unless we re-start the process (Python+ROOT). However, I don't see any evidence of a memory leak in `htop`.
   * Moreover, the disk's raw sequential rate is 380 MB/s.

In [14]:
!time cat /mnt/data/DYJetsToLL_M_50_HT_200to400_13TeV_2/* > /dev/null

0.00user 1.14system 0:09.44elapsed 12%CPU (0avgtext+0avgdata 1860maxresident)k
7265312inputs+0outputs (0major+138minor)pagefaults 0swaps


In [16]:
!du -ms /mnt/data/DYJetsToLL_M_50_HT_200to400_13TeV_2

3548	/mnt/data/DYJetsToLL_M_50_HT_200to400_13TeV_2


In [17]:
3548 / (9.44)

375.8474576271187