In [1]:
%load_ext autoreload
%autoreload 2

import h5py as h5
import hepfile as hf
from hepfile import dict_tools, awkward_tools

import numpy as np
import os

# Make the file and add some data

In [2]:
# Write some data to a file

event1 = {
    'jet': {
        'px': [1,2,3],
        'py': [1,2,3]
     },
    'muons': {
        'px': [1,2,3],
        'py': [1,2,3]
     },
    'nParticles': 3
    }

event2 = {
    'jet': {
        'px': [3,4,6,7],
        'py': [3,4,6,7]
     },
    'muons': {
        'px': [3,4,6,7],
        'py': [3,4,6,7],
        },
    'nParticles': 4
    }

to_write_to_hepfile = [event1, event2]

out_filename = 'output_from_dict.hdf5'
data = dict_tools.dictlike_to_hepfile(to_write_to_hepfile, out_filename)
data.show()



Writing the hdf5 file from the awkward array...
Metadata added
[{jet: {px: [1, ..., 3], py: [...]}, muons: {px: ..., ...}, ...},
 {jet: {px: [3, ..., 7], py: [...]}, muons: {px: ..., ...}, ...}]


In [3]:
d = awkward_tools.awkward_to_hepfile(data, write_hepfile=False)
ds = set(d.keys())

x, _ = hf.load(out_filename)
xs = set(x.keys())

Building the indices...

Built the indices!
Data is read in and input file is closed.


In [4]:
filename = 'test2.h5'

if os.path.exists(filename):
    os.remove(filename)
else:
    print("The file does not exist")

hf.write_to_file(filename, x, verbose=True)

Writing nParticles to file
	Writing to file...
Writing to file nParticles as type int64
Writing jet/njet to file
	Writing to file...
Writing to file jet/njet as type int64
Writing jet/px to file
	Writing to file...
Writing to file jet/px as type int64
Writing jet/py to file
	Writing to file...
Writing to file jet/py as type int64
Writing muons/nmuons to file
	Writing to file...
Writing to file muons/nmuons as type int64
Writing muons/px to file
	Writing to file...
Writing to file muons/px as type int64
Writing muons/py to file
	Writing to file...
Writing to file muons/py as type int64
_SINGLETONS_GROUP_/COUNTER       has 2            entries
jet/njet                         has 2            entries
muons/nmuons                     has 2            entries
Metadata added


<Closed HDF5 file>

# Add a header

In [5]:
header_dict = {}
header_dict['text'] = []
header_dict['text'].append('These are some words for the header')
header_dict['text'].append('These are more words')
header_dict['text'].append('These are very important words')

mydict = header_dict

hdoutfile = h5.File(filename,'a')

hf.write_file_header(filename, header_dict)

Header data added


<Closed HDF5 file>

# Read the header two different ways

In [6]:
header = hf.get_file_header(filename)

# Print the header 
for key in header.keys():
    print(key,header[key])


# Now do it for a dataframe    
header = hf.get_file_header(filename, return_type='df')

header


text ['These are some words for the header' 'These are more words'
 'These are very important words']


Unnamed: 0,text
0,These are some words for the header
1,These are more words
2,These are very important words


In [7]:
s = hf.print_file_header(filename)

################################################################
###                      Hepfile Header                      ###
################################################################
################################################################
text:			These are some words for the header
			These are more words
			These are very important words



# Add a header with a different structure

In [8]:
# Or a different approach

header_dict = {}
header_dict['field'] = ['Name', 'beam energy', 'Date', 'beam type']
header_dict['values'] = ['Oppenheimer', 13, '1/5/2023', 'protons']
header_dict['comments'] = ['Name of person creating this file', 'Beam energy in TeV', 'Date of data collection', 'Particle used in beams']
header_dict['dtype'] = ['str', 'float', 'str', 'str']

hdoutfile = h5.File(filename,'a')

hf.write_file_header(filename, header_dict)

Header data added


<Closed HDF5 file>

# Read that header

In [9]:
header = hf.get_file_header(filename)

# Print the header 
for key in header.keys():
    print(key,header[key])


# Now do it for a dataframe    
header = hf.get_file_header(filename, return_type='df')

header


comments ['Name of person creating this file' 'Beam energy in TeV'
 'Date of data collection' 'Particle used in beams']
dtype ['str' 'float' 'str' 'str']
field ['Name' 'beam energy' 'Date' 'beam type']
values ['Oppenheimer' '13' '1/5/2023' 'protons']


Unnamed: 0,comments,dtype,field,values
0,Name of person creating this file,str,Name,Oppenheimer
1,Beam energy in TeV,float,beam energy,13
2,Date of data collection,str,Date,1/5/2023
3,Particle used in beams,str,beam type,protons


In [10]:
s = hf.print_file_header(filename)

################################################################
###                      Hepfile Header                      ###
################################################################
################################################################
comments:			Name of person creating this file
			Beam energy in TeV
			Date of data collection
			Particle used in beams
dtype:			str
			float
			str
			str
field:			Name
			beam energy
			Date
			beam type
values:			Oppenheimer
			13
			1/5/2023
			protons



In [11]:
s

'################################################################\n###                      Hepfile Header                      ###\n################################################################\n################################################################\ncomments:\t\t\tName of person creating this file\n\t\t\tBeam energy in TeV\n\t\t\tDate of data collection\n\t\t\tParticle used in beams\ndtype:\t\t\tstr\n\t\t\tfloat\n\t\t\tstr\n\t\t\tstr\nfield:\t\t\tName\n\t\t\tbeam energy\n\t\t\tDate\n\t\t\tbeam type\nvalues:\t\t\tOppenheimer\n\t\t\t13\n\t\t\t1/5/2023\n\t\t\tprotons\n'