# Writing a hepfile from Scratch

The method shown in this tutorial is what is done under the hood in the `hepfile.dict_tools.dictlike_to_hepfile` method. This may be preferable for those users who want more control over the structure of their hepfile.

In [1]:
# imports
import time
import numpy as np
import sys

from hepfile import write as writer
from hepfile import load

Once hepfile.write is imported, we need to start by initializing the empty data structure.

In [2]:
start = time.time()
data = writer.initialize()

Now that the empty data structure is initialized, we must set up groups and datasets. Groups hold datasets and datasets hold data about that group. You can also just create higher level datasets that are considered "singletons" which do not correspond to a specific group.

In [3]:
# create the groups
writer.create_group(data,'jet',counter='njet')
writer.create_group(data,'muons',counter='nmuon')

# add datasets to different groups
writer.create_dataset(data,['e','px','py','pz'],group='jet',dtype=float)
writer.create_dataset(data,['algorithm'],group='jet',dtype=int)
writer.create_dataset(data,['words'],group='jet',dtype=str)
writer.create_dataset(data,['e','px','py','pz'],group='muons',dtype=float)

# add a higher level dataset that doesn't have a group, a "singleton"
writer.create_dataset(data,['METpx','METpy'],dtype=float)

0

To start adding data to the groups and datasets we have to generate an empty bucket.

In [4]:
bucket = writer.create_single_bucket(data)

Now that we have the hepfile structure set up, we can generate random data and insert it into the hepfile.

In [5]:
rando_words = ["hi", "bye", "ciao", "aloha"]

for i in range(0,10000):

    #hepfile.clear_event(event)

    njet = 17
    bucket['jet/njet'] = njet

    for n in range(njet):
        bucket['jet/e'].append(np.random.random())
        bucket['jet/px'].append(np.random.random())
        bucket['jet/py'].append(np.random.random())
        bucket['jet/pz'].append(np.random.random())

        bucket['jet/algorithm'].append(np.random.randint(-1,1))

        bucket['jet/words'].append(np.random.choice(rando_words))

    bucket['METpx'] = np.random.random()
    bucket['METpy'] = np.random.random()

    #hepfile.pack(data,event,EMPTY_OUT_BUCKET=False)
    return_value = writer.pack(data,bucket,STRICT_CHECKING=True)
    if return_value != 0:
        exit()

Finally, we are ready to write this random data to a file called `output.hdf5`!

In [6]:
print("Writing the file...")
#hdfile = hepfile.write_to_file('output.hdf5',data)
hdfile = writer.write_to_file('output_from_scratch.hdf5',data,comp_type='gzip',comp_opts=9,verbose=True)

Writing the file...
Writing _SINGLETONS_GROUP_/COUNTER to file
	Converting list to array...
	Writing to file...
Writing to file _SINGLETONS_GROUP_/COUNTER as type <class 'int'>
Writing METpx to file
	Converting list to array...
	Converting array to single precision...
	Writing to file...
Writing to file METpx as type <class 'numpy.float32'>
Writing METpy to file
	Converting list to array...
	Converting array to single precision...
	Writing to file...
Writing to file METpy as type <class 'numpy.float32'>
Writing jet/njet to file
	Converting list to array...
	Writing to file...
Writing to file jet/njet as type <class 'int'>
Writing jet/e to file
	Converting list to array...
	Converting array to single precision...
	Writing to file...
Writing to file jet/e as type <class 'numpy.float32'>
Writing jet/px to file
	Converting list to array...
	Converting array to single precision...
	Writing to file...
Writing to file jet/px as type <class 'numpy.float32'>
Writing jet/py to file
	Converting l

In [7]:
# output time it took to run write the file
write_time = time.time()
print(f'Time it took to write: {write_time-start}')

Time it took to write: 2.295724868774414


In [8]:
load('output_from_scratch.hdf5')

Building the indices...

Built the indices!
Data is read in and input file is closed.


({'_MAP_DATASETS_TO_COUNTERS_': {'_SINGLETONS_GROUP_': '_SINGLETONS_GROUP_/COUNTER',
   'jet': 'jet/njet',
   'muons': 'muons/nmuon',
   'jet/e': 'jet/njet',
   'jet/px': 'jet/njet',
   'jet/py': 'jet/njet',
   'jet/pz': 'jet/njet',
   'jet/algorithm': 'jet/njet',
   'jet/words': 'jet/njet',
   'muons/e': 'muons/nmuon',
   'muons/px': 'muons/nmuon',
   'muons/py': 'muons/nmuon',
   'muons/pz': 'muons/nmuon',
   'METpx': '_SINGLETONS_GROUP_/COUNTER',
   'METpy': '_SINGLETONS_GROUP_/COUNTER'},
  '_MAP_DATASETS_TO_INDEX_': {'_SINGLETONS_GROUP_': '_SINGLETONS_GROUP_/COUNTER_INDEX',
   'jet': 'jet/njet_INDEX',
   'muons': 'muons/nmuon_INDEX',
   'jet/e': 'jet/njet_INDEX',
   'jet/px': 'jet/njet_INDEX',
   'jet/py': 'jet/njet_INDEX',
   'jet/pz': 'jet/njet_INDEX',
   'jet/algorithm': 'jet/njet_INDEX',
   'jet/words': 'jet/njet_INDEX',
   'muons/e': 'muons/nmuon_INDEX',
   'muons/px': 'muons/nmuon_INDEX',
   'muons/py': 'muons/nmuon_INDEX',
   'muons/pz': 'muons/nmuon_INDEX',
   'METpx': '_SI

In [9]:
read_time = time.time()
print(f'Time it took to read: {read_time-write_time}')

Time it took to read: 0.07789897918701172
