<a target="_blank" href="https://colab.research.google.com/github/mfmceneaney/hipopy/blob/main/tutorials/HipopyTutorial.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
!pip install hipopy



In [2]:
import hipopy.hipopy as hp
import numpy as np
import awkward as ak

In [3]:
#----- Write a file -----#
filename = 'new.hipo'
bank     = "NEW::bank"
dtypes   = ["D","D","D","I"] #NOTE: dtype can be "D": double, "F": float, "S": short, "I": int, "L": long
names    = ["px","py","pz","pid"]
namesAndTypes = {names[i]:dtypes[i] for i in range(len(names))}

# Open file
file = hp.create(filename)
file.newTree(bank,namesAndTypes)
file.open() # IMPORTANT: Open AFTER calling newTree, otherwise the banks will not be written.

In [4]:
#----- Write a file -----#
rows = 7 # Chooose a #
nbatches = 10 # Choose a #
step = 5 # Choose a # (events per batch)
maxpid = 200 # Choose a #

# Write batches of events to file
for _ in range(nbatches):

    # Create toy data
    moms = np.random.random(size=(step,len(names)-1,rows))
    pids = np.random.randint(maxpid,size=(step,1,rows))
    data = [[moms[i,j].tolist() if j<len(names)-1 else pids[i,0].tolist() for j in range(len(names))] for i in range(step)]

    # Write to file
    file.extend({
        bank : data #NOTE: data must have shape (nEvents,nNames,nRows)
    })

# Close file
file.close() # Can also use file.write()

!ls -lrth $filename

-rw-r--r--  1 mfm45  staff    11K Apr 17 14:08 new.hipo


In [5]:
#----- Add a new bank to an existing file -----#
filename = "new.hipo" # Recreate this in your $PWD
bank     = "NEW::bank2"
dtype    = "D" #NOTE: For now all the bank entries have to have the same type.
names    = ["energy","mass"]
namesAndTypes = {e:dtype for e in names}

# Open the file
file = hp.recreate(filename)
file.newTree(bank,namesAndTypes)
file.open() # IMPORTANT!  Open AFTER calling newTree, otherwise the banks will not be written!

In [6]:
#----- Add a new bank to an existing file -----#
rows = 7 # Chooose a #
nbatches = 10 # Choose a #
step = 5 # Choose a #

# Write events to file
for _ in range(nbatches):
   data = np.random.random(size=(step,len(names),rows))
   file.extend({
      bank : data
   })

# Close the file
file.close() #IMPORTANT! ( Can also use file.write() )

!ls -lrth $filename

-rw-r--r--  1 mfm45  staff    17K Apr 17 14:08 new.hipo


In [7]:
#----- Read a single file -----#
filename = 'new.hipo'
bank = "NEW::bank"

file = hp.open(filename,mode="r")
file.show()
file.showBank(bank)
file.readBank(bank) #IMPORTANT! Call readBank BEFORE you loop through the file.

Dictionary :
	Schema : name = NEW::bank , schemaString = {NEW::bank/1/1}{px/D,py/D,pz/D,pid/I}
	Schema : name = NEW::bank2 , schemaString = {NEW::bank2/2/1}{energy/D,mass/D}

Schema : name = NEW::bank , schemaString = {NEW::bank/1/1}{px/D,py/D,pz/D,pid/I}



In [8]:
#----- Read a single file -----#
item1 = "px"
item2 = "pid"
nevents = 1

# Loop through events in file
for counter, event in enumerate(file):
    data1 = file.getDoubles(bank,item1)
    data2 = file.getInts(bank,item2)
    print("counter         = ",counter)
    print("event.keys()    = ",event.keys())
    print("event["+bank+"_"+item1+"] = ",event[bank+"_"+item1])
    print("event["+bank+"_"+item2+"] = ",event[bank+"_"+item2])
    print("type(event["+bank+"_"+item1+"][0][0]) = ",type(event[bank+"_"+item1][0][0]))
    print("type(event["+bank+"_"+item2+"][0][0]) = ",type(event[bank+"_"+item2][0][0]))
    print("file.getDoubles(\""+bank+"\",\""+item1+"\") = ",data1)
    print("file.getInts(\""+bank+"\",\""+item2+"\") = ",data2)
    if counter == nevents-1: break

counter         =  0
event.keys()    =  dict_keys(['NEW::bank_px', 'NEW::bank_py', 'NEW::bank_pz', 'NEW::bank_pid', 'NEW::bank2_energy', 'NEW::bank2_mass'])
event[NEW::bank_px] =  [[0.4435988302161542, 0.34144865816041237, 0.6448050690804019, 0.49493386861115085, 0.6244117275542378, 0.966861520475238, 0.22724438829627225]]
event[NEW::bank_pid] =  [[103, 197, 177, 132, 87, 25, 88]]
type(event[NEW::bank_px][0][0]) =  <class 'float'>
type(event[NEW::bank_pid][0][0]) =  <class 'int'>
file.getDoubles("NEW::bank","px") =  [0.4435988302161542, 0.34144865816041237, 0.6448050690804019, 0.49493386861115085, 0.6244117275542378, 0.966861520475238, 0.22724438829627225]
file.getInts("NEW::bank","pid") =  [103, 197, 177, 132, 87, 25, 88]


In [9]:
#----- Write another file -----#
filename2 = 'new2.hipo'
bank     = "NEW::bank"
dtypes   = ["D","D","D","I"] #NOTE: dtype can be "D": double, "F": float, "S": short, "I": int, "L": long
names    = ["px","py","pz","pid"]
namesAndTypes = {names[i]:dtypes[i] for i in range(len(names))}
rows = 7 # Chooose a #
nbatches = 10 # Choose a #
step = 5 # Choose a # (events per batch)
maxpid = 200 # Choose a #

# Open file
file = hp.create(filename2)
file.newTree(bank,namesAndTypes)
file.open() # IMPORTANT:  Open AFTER calling newTree, otherwise the banks will not be written!

# Write batches of events to file
for _ in range(nbatches):

    # Create toy data
    moms = np.random.random(size=(step,len(names)-1,rows))
    pids = np.random.randint(maxpid,size=(step,1,rows))
    data = [[moms[i,j].tolist() if j<len(names)-1 else pids[i,0].tolist() for j in range(len(names))] for i in range(step)]
    
    file.extend({
        bank : data #NOTE: data must have shape (nEvents,nNames,nRows)
    })

# Close file
file.close() # Can also use file.write()

!ls -lrth *.hipo

-rw-r--r--  1 mfm45  staff    17K Apr 17 14:08 new.hipo
-rw-r--r--  1 mfm45  staff    11K Apr 17 14:08 new2.hipo


In [10]:
#----- Iterate a set of files -----#
filenames = ['*.hipo']
banks = ["NEW::bank"]
counter = 0
step = 100

# Loop through batches of step # events in the chain.
for batch in hp.iterate(filenames,banks,step=step): # If you don't specify banks, ALL banks will be read.
    print(batch.keys()) # Keys are <bank>_<entry>
    if "NEW::bank_px" in batch: 
        data = ak.Array(batch["NEW::bank_px"])
        print(data)
        print(np.shape(data))
    counter += 1
    if counter % step == 0: print("counter = ",counter)

dict_keys(['NEW::bank_px', 'NEW::bank_py', 'NEW::bank_pz', 'NEW::bank_pid'])
[[0.444, 0.341, 0.645, 0.495, 0.624, 0.967, ... 0.874, 0.604, 0.222, 0.41, 0.582]]
(50, 7)
dict_keys(['NEW::bank_px', 'NEW::bank_py', 'NEW::bank_pz', 'NEW::bank_pid'])
[[0.74, 0.319, 0.607, 0.0102, 0.0968, 0.573, ... 0.0938, 0.264, 0.4, 0.17, 0.665]]
(50, 7)
