In [1]:
# ---------- Time hipo/extensions/python wrapper ----------#
# Setup directions from your /work/clas12/users/$USER directory:
# mkdir test_gavalian_hipo
# cd test_gavalian_hipo
# git clone --recurse-submodules https://github.com/gavalian/hipo
# cd hipo
# make
# cd examples
# make
# ./writefile.exe
# cd ../extensions/python
# python3 readfile.py

# Add the hipo python API to your python path
import sys
import os
HIPODIR="/work/clas12/users/"+os.environ['USER']+"/test_gavalian_hipo/hipo/" #NOTE: Set this to wherever you install hipo
sys.path.append(HIPODIR+'extensions/python/')

# Set your input file name
filename  = HIPODIR+'examples/example_output.hipo' # 25K events
use_rec_banks = False
# filename  = '/path/to/nSidis_005521.hipo' # 2.88M Events
# use_rec_banks = True
filenames = [filename]

# Set maximum number of events to loop in case file is too long
max_events = 10**5

# Set step size for hipopy.hipopy.iterate batches
step = 1000

In [2]:
# Set bank to entry maps depending on input file
banks_to_entries = {}
if not use_rec_banks: banks_to_entries = {
    'event::detector': [
        'pindex',
        'detectorid',
        'x',
        'y',
        'z',
        'time',
        'energy',
    ],
    'event::particle': [
        'px',
        'py',
        'pz',
        'pid',
    ],
}

if use_rec_banks: banks_to_entries = {
    'REC::Calorimeter': ['index',
                        'pindex',
                        'detector',
                        'sector',
                        'layer',
                        'energy',
                        'time',
                        'path',
                        'chi2',
                        'x',
                        'y',
                        'z',
                        'hx',
                        'hy',
                        'hz',
                        'lu',
                        'lv',
                        'lw',
                        'du',
                        'dv',
                        'dw',
                        'm2u',
                        'm2v',
                        'm2w',
                        'm3u',
                        'm3v',
                        'm3w',
                        'status'],
    'REC::Cherenkov':  ['index',
                        'pindex',
                        'detector',
                        'sector',
                        'nphe',
                        'time',
                        'path',
                        'chi2',
                        'x',
                        'y',
                        'z',
                        'dtheta',
                        'dphi',
                        'status'],
    'REC::CovMat': ['index',
                    'pindex',
                    'C11',
                    'C12',
                    'C13',
                    'C14',
                    'C15',
                    'C22',
                    'C23',
                    'C24',
                    'C25',
                    'C33',
                    'C34',
                    'C35',
                    'C44',
                    'C45',
                    'C55'],
    'REC::Event':  ['category',
                    'topology',
                    'beamCharge',
                    'liveTime',
                    'startTime',
                    'RFTime',
                    'helicity',
                    'helicityRaw',
                    'procTime'],
    'REC::Particle': ['pid',
                        'px',
                        'py',
                        'pz',
                        'vx',
                        'vy',
                        'vz',
                        'vt',
                        'charge',
                        'beta',
                        'chi2pid',
                        'status'],
    'REC::Scintillator': ['index',
                        'pindex',
                        'detector',
                        'sector',
                        'layer',
                        'component',
                        'energy',
                        'time',
                        'path',
                        'chi2',
                        'x',
                        'y',
                        'z',
                        'hx',
                        'hy',
                        'hz',
                        'status',
                        'dedx'],
    'REC::Track': ['index',
                    'pindex',
                    'detector',
                    'sector',
                    'status',
                    'q',
                    'chi2',
                    'NDF'],
    'REC::Traj': ['pindex',
                'index',
                'detector',
                'layer',
                'x',
                'y',
                'z',
                'cx',
                'cy',
                'cz',
                'path'],
    'RICH::tdc': ['sector',
                'layer',
                'component',
                'order',
                'TDC'],
    'RUN::config': ['run',
                'event',
                'unixtime',
                'trigger',
                'timestamp',
                'type',
                'mode',
                'torus',
                'solenoid']

}


In [3]:
# example of reading hipo file from python
from hipolib import hreader

pathToSLibs = HIPODIR+'slib/'

# the directory with shared libraries provided
reader = hreader(pathToSLibs)
reader.open(filename)

# define banks that will be read with each next() call
banknames = list(banks_to_entries.keys())
print("banknames = ",banknames)
for bankname in banknames:
    reader.define(bankname)
    
#---------- Define functions to time ----------#

# Loop the reader defined above up to max_events
def runLoop():
    counter = 0
    while(reader.next()==True):
        counter += 1
        for bankname in banks_to_entries:
            for entryname in banks_to_entries[bankname]:
                array_pindex = reader.getEntry(bankname,entryname)
        if counter>=max_events: break

# Open file AND loop up to max_events
def runFull():
    # the directory with shared libraries provided
    reader = hreader(pathToSLibs)
    reader.open(filename)
    
    # define banks that will be read with each next() call
    for bankname in banknames:
        reader.define(bankname)

    # Loop events
    counter = 0
    while(reader.next()==True):
        counter += 1
        for bankname in banks_to_entries:
            for entryname in banks_to_entries[bankname]:
                array_pindex = reader.getEntry(bankname,entryname)
        if counter>=max_events: break

file open handle =  1
banknames =  ['event::detector', 'event::particle']


In [4]:
%timeit -n 1 -r 1 runLoop()
# NOTE: Since you are using the same reader you can only loop this ONCE.
# Otherwise you'd have to initiate another reader or set your reader back at the beginning of the file.

6.17 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [5]:
%timeit runFull()

file open handle =  2
file open handle =  3
file open handle =  4
file open handle =  5
file open handle =  6
file open handle =  7
file open handle =  8
file open handle =  9
6.05 s ± 62.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
#---------- Time hipopy batch iteration ----------#
# Setup directions:
# Use latest version `pip install hipopy==1.3.3`

import hipopy.hipopy as hp

# Set banks to be read
banks = list(banks_to_entries.keys())
print("banks = ",banks)

# Open file with iterator
mychain = hp.iterate(filenames,banks=banks,step=step,experimental=True).__iter__()

#---------- Define functions to time ----------#

# Loop the iterator defined above in its entirety
def runLoop():
    while mychain.has_events:
        datadict = mychain.__next__()
        pass

# Open file and loop in its entirety
def runFull():
    for batch in hp.iterate(filenames,banks=banks,step=step,experimental=True):
        pass

banks =  ['event::detector', 'event::particle']


In [7]:
%timeit -n 1 -r 1 runLoop()
# NOTE: Since you are using the same iterator you can only loop this ONCE.
# Otherwise you'd have to initiate another iterator or set your iterator back at the beginning of the file.

211 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
%timeit runFull()

198 ms ± 510 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
