In [1]:
%load_ext autoreload
%autoreload 1

import hepfile as hf
import awkward as ak

  if group == "_SINGLETONS_GROUP_" and dataset is not "COUNTER":


In [2]:
def awkward_to_hepfile(ak_array:ak.Record, outfile:str=None, write_hepfile:bool=True, **kwargs) -> dict:
    '''
    Converts a dictionary of awkward arrays to a hepfile

    Args:
        ak_array (Awkward Array): dictionary of Awkward Arrays to write to a hepfile
        outfile (str): path to write output hdf5 file to
        write_hepfile (bool): if True, writes data to outfile. If False, just converts to hepfile format and returns
        **kwargs (None): Passed to `hepfile.write.write_to_file`

    Returns:
        Dictionary of hepfile data
    '''

    # perform IO checks

    hf.awkward_tools._is_valid_awkward(ak_array)
    
    if write_hepfile == True and outfile is None:
        raise IOError('Please provide an outfile path if write_hepfile=True!')

    if write_hepfile == False and outfile is not None:
        raise Warning('You set write_hepfile to False but provided an output file path. This output file path will not be used!')
    
    data = hf.initialize()
    singleton = False

    for group in ak_array.fields:
        
        counter = f'n{group}'
        counter_key = f'{group}/{counter}'
        
        if len(ak_array[group].fields) == 0:
            singleton = True
            
            dtype = hf.awkward_tools._get_awkward_type(ak_array[group])
            hf.create_dataset(data, group, dtype=dtype)

            data[group] = ak_array[group]
            continue
    
        hf.create_group(data, group, counter=counter)
        for ii, dataset in enumerate(ak_array[group].fields):
            
            dtype = hf.awkward_tools._get_awkward_type(ak_array[group][dataset])
            hf.create_dataset(data, dataset, group=group, dtype=dtype)
            
            # check if dataset name has /'s in it
            if dataset.find('/') >= 0:
                dataset_name = dataset.replace('/', '-')
            else:
                dataset_name = dataset
                
            name = f'{group}/{dataset_name}'
            for data_subset in ak_array[group][dataset]:
                data[name].append(data_subset)
                if ii == 0:
                    data[counter_key].append(len(data_subset))            
            
            data[name] = ak.flatten(ak.Array(data[name]))
    
        data[counter_key] = ak.Array(data[counter_key])
    
    if len(data['_GROUPS_']['_SINGLETONS_GROUP_']) > 1:
        data['_SINGLETONS_GROUP_/COUNTER'] = [1]*len(data[data['_GROUPS_']['_SINGLETONS_GROUP_'][1]])

    if write_hepfile:
        print("Writing the hdf5 file from the awkward array...")
        hdfile = hf.write_to_file(outfile,data)

    return data


In [3]:
# test dictionary
d = [
    {
    'jet': {
        'px': [1,2,3],
        'py': [1,2,3]
     },
    'muons': {
        'px': [1,2,3],
        'py': [1,2,3]
     },
    'other': 'this'
    },
    {
    'jet': {
        'px': [3,4,6,7],
        'py': [3,4,6,7]
     },
    'muons': {
        'px': [3,4,6,7],
        'py': [3,4,6,7],
        },
    'other': 'this'
    }
]

awk = ak.Array(d)

In [4]:
path = 'test.h5'
awkward_to_hepfile(awk, path)

Adding group [1mjet[0m
Adding a counter for [1mjet[0m as [1mnjet[0m
Adding dataset [1mpx[0m to the dictionary under group [1mjet[0m.
Adding dataset [1mpy[0m to the dictionary under group [1mjet[0m.
Adding group [1mmuons[0m
Adding a counter for [1mmuons[0m as [1mnmuons[0m
Adding dataset [1mpx[0m to the dictionary under group [1mmuons[0m.
Adding dataset [1mpy[0m to the dictionary under group [1mmuons[0m.
Adding dataset [1mother[0m to the dictionary as a SINGLETON.
Writing the hdf5 file from the awkward array...
{'_SINGLETONS_GROUP_/COUNTER': <class 'int'>, 'jet/njet': <class 'int'>, 'jet/px': <class 'numpy.int64'>, 'jet/py': <class 'numpy.int64'>, 'muons/nmuons': <class 'int'>, 'muons/px': <class 'numpy.int64'>, 'muons/py': <class 'numpy.int64'>, 'other': <class 'str'>}
_SINGLETONS_GROUP_/COUNTER       has 2            entries
jet/njet                         has 2            entries
muons/nmuons                     has 2            entries
Metadata added


{'_GROUPS_': {'_SINGLETONS_GROUP_': ['COUNTER', 'other'],
  'jet': ['njet', 'px', 'py'],
  'muons': ['nmuons', 'px', 'py']},
 '_MAP_DATASETS_TO_COUNTERS_': {'_SINGLETONS_GROUP_': '_SINGLETONS_GROUP_/COUNTER',
  'jet': 'jet/njet',
  'jet/px': 'jet/njet',
  'jet/py': 'jet/njet',
  'muons': 'muons/nmuons',
  'muons/px': 'muons/nmuons',
  'muons/py': 'muons/nmuons',
  'other': '_SINGLETONS_GROUP_/COUNTER'},
 '_LIST_OF_COUNTERS_': ['_SINGLETONS_GROUP_/COUNTER',
  'jet/njet',
  'muons/nmuons'],
 '_SINGLETONS_GROUP_/COUNTER': [1, 1],
 '_MAP_DATASETS_TO_DATA_TYPES_': {'_SINGLETONS_GROUP_/COUNTER': int,
  'jet/njet': int,
  'jet/px': numpy.int64,
  'jet/py': numpy.int64,
  'muons/nmuons': int,
  'muons/px': numpy.int64,
  'muons/py': numpy.int64,
  'other': str},
 '_PROTECTED_NAMES_': ['_PROTECTED_NAMES_',
  '_GROUPS_',
  '_MAP_DATASETS_TO_COUNTERS_',
  '_MAP_DATASETS_TO_DATA_TYPES__LIST_OF_COUNTERS_',
  '_SINGLETONS_GROUP_/COUNTER'],
 'jet/njet': <Array [3, 4] type='2 * int64'>,
 'jet/px': <Ar