# Profiling on a fast disk

(This is why we're using an AWS instance; it's an `i2.2xlarge` with a large, fast SSD.)

In [None]:
import time
import glob

import numpy
import ROOT

Start with some large files.

In [None]:
# filelist = sorted(glob.glob("/mnt/data/SingleElectronRun2015D_16Dec2015_v1/SingleElectron/CRAB3/160418_185159/000*/*.root"))
filelist = sorted(glob.glob("/mnt/data/SingleElectron_uncompressed/000*/*.root"))
fname = filelist[0]

In [None]:
f = ROOT.TFile(fname)
t = f.Get("Events")

Take all 4-byte branches of electrons (data were triggered on electrons).

In [None]:
branches = ["Electron_"]
for base in "Electron", "Info":    
    for b in t.GetBranch(base).GetListOfBranches():
        try:
            if t.GetNumpyIteratorInfo(b)[0][1].itemsize == 4:
                branches.append(b.GetName())
        except ValueError:
            pass

In [None]:
len(branches)

In [None]:
set(x[1] for x in t.GetNumpyIteratorInfo(*branches))

In [None]:
totalEvents = 0
totalBytes = 0
startTime = time.time()

for findex, fname in enumerate(filelist):
    f = ROOT.TFile(fname)
    t = f.Get("Events")

    for stuff in t.GetNumpyIterator(*branches, return_new_buffers=False):
        start, end = stuff[:2]
        data = stuff[2:]
        totalEvents += end - start
        totalBytes += sum(4 * len(x) for x in data)        # all the branches are 4 bytes wide

    if findex % 10 == 0:                                   # print out every 10 files
        elapsedTime = time.time() - startTime
        ev_per_s = totalEvents / elapsedTime
        mb_per_s = totalBytes / elapsedTime / 1024**2
        print("events: {}\ttime: {}\trate: {} kHz, {} MB/s".format(
            totalEvents, elapsedTime, ev_per_s*1e-3, mb_per_s))

This rate is oddly decreasing (not a memory leak: memory use stays low).

Also, it's quite a bit less than the raw I/O rate of 340 MB/s.

In [None]:
!time cat /mnt/data/DYJetsToLL_M_50_HT_100to200_13TeV_2/* > /dev/null

In [None]:
!du -ms /mnt/data/DYJetsToLL_M_50_HT_100to200_13TeV_2

In [None]:
6140 / 17.96