In [2]:
%reload_ext autoreload
%autoreload 2

from collections import namedtuple
import h5py
import os
import re

from genNested import genNested

In [4]:
# genNested('nested', shape=(10,10))
genNested('nested_int_one_d', N=2, fillRange=True, shape=(1000,)*1)
# genNested('nested_int_high_d', N=2, fillRange=True, shape=(50,)*4)

# get metadata as list

In [19]:
Meta = namedtuple('Meta', ('kind', 'name', 'uri'))

def getMeta(group, prefix='/'):    
    return [Meta(
        'group' if isinstance(val, h5py.Group) else 'dataset', 
        key,
        os.path.join(prefix, key)
    ) for key,val in group.items()]

def getMetaAll(group, prefix='/', meta=None):
    if meta is None: meta = []
    
    for key,val in group.items():
        uri = os.path.join(prefix, key)
        if isinstance(val, h5py.Group):
            meta.append(Meta('group', key, uri))
            getMetaAll(val, uri, meta)
        else:
            meta.append(Meta('dataset', key, uri))
    
    return meta

In [17]:
prefix='/leaf01/leaf02'

with h5py.File('nested.hdf5', 'r') as f:
    meta = getMeta(f[prefix], prefix)

print(meta)

[Meta(kind='dataset', name='data02', uri='/leaf01/leaf02/data02'), Meta(kind='group', name='leaf03', uri='/leaf01/leaf02/leaf03')]


In [20]:
with h5py.File('nested.hdf5', 'r') as f:
    metaAll = getMetaAll(f)

print(metaAll)

[Meta(kind='dataset', name='data00', uri='/data00'), Meta(kind='group', name='leaf01', uri='/leaf01'), Meta(kind='dataset', name='data01', uri='/leaf01/data01'), Meta(kind='group', name='leaf02', uri='/leaf01/leaf02'), Meta(kind='dataset', name='data02', uri='/leaf01/leaf02/data02'), Meta(kind='group', name='leaf03', uri='/leaf01/leaf02/leaf03'), Meta(kind='dataset', name='data03', uri='/leaf01/leaf02/leaf03/data03'), Meta(kind='group', name='leaf04', uri='/leaf01/leaf02/leaf03/leaf04'), Meta(kind='dataset', name='data04', uri='/leaf01/leaf02/leaf03/leaf04/data04'), Meta(kind='group', name='leaf05', uri='/leaf01/leaf02/leaf03/leaf04/leaf05')]


# metadata generator

In [30]:
MetaHdf = namedtuple('Meta', ('kind', 'name', 'uri'))

_emptyUriRe = re.compile('//')
def uriJoin(*parts):
    return _emptyUriRe.sub('/', '/'.join(parts))

def genMetaHdf(group, prefix='/'):
    return (MetaHdf(
        'group' if isinstance(val, h5py.Group) else 'dataset',
        key,
        uriJoin(prefix, key)
    ) for key,val in group.items())

def genMetaAllHdf(group, prefix='/'):
    yield from genMetaHdf(group, prefix)
    
    for key,val in group.items():
        if isinstance(val, h5py.Group):
            yield from genMetaAllHdf(val, uriJoin(prefix, key))

In [27]:
prefix='/leaf01/leaf02'

with h5py.File('nested.hdf5', 'r') as f:
    for m in genMetaHdf(f[prefix], prefix):
        print(m)

Meta(kind='dataset', name='data02', uri='/leaf01/leaf02/data02')
Meta(kind='group', name='leaf03', uri='/leaf01/leaf02/leaf03')


In [31]:
with h5py.File('nested.hdf5', 'r') as f:
    for m in genMetaAllHdf(f):
        print(m)

Meta(kind='dataset', name='data00', uri='/data00')
Meta(kind='group', name='leaf01', uri='/leaf01')
Meta(kind='dataset', name='data01', uri='/leaf01/data01')
Meta(kind='group', name='leaf02', uri='/leaf01/leaf02')
Meta(kind='dataset', name='data02', uri='/leaf01/leaf02/data02')
Meta(kind='group', name='leaf03', uri='/leaf01/leaf02/leaf03')
Meta(kind='dataset', name='data03', uri='/leaf01/leaf02/leaf03/data03')
Meta(kind='group', name='leaf04', uri='/leaf01/leaf02/leaf03/leaf04')
Meta(kind='dataset', name='data04', uri='/leaf01/leaf02/leaf03/leaf04/data04')
Meta(kind='group', name='leaf05', uri='/leaf01/leaf02/leaf03/leaf04/leaf05')


# misc

In [5]:
with h5py.File('nested.hdf5', 'r') as f:
    print(f.name)
    for key,val in f['/leaf01'].items():
        print(key)
        print(val)

/
data01
<HDF5 dataset "data01": shape (20, 10), type "<i8">
leaf02
<HDF5 group "/leaf01/leaf02" (2 members)>


In [11]:
m = Meta()
m.kind = 'group'
print(m)

AttributeError: can't set attribute