In [1]:
import aliases

In [200]:
###############################################################################
''''''
###############################################################################


import ast as _ast
import pickle as _pickle
import os as _os
from collections import deque as _deque
from collections.abc import Collection as _Collection
from functools import partial as _partial #, lru_cache as _lru_cache
import pickle as _pickle
from abc import ABC as _ABC, abstractmethod as _abstractmethod
import fnmatch as _fnmatch

import h5py as _h5py

from everest.h5anchor import disk as _disk
from everest.utilities import classtools as _classtools
from everest.utilities import reseed as _reseed

_osjoin = _os.path.join


def resolve_eval(strn):
    out = _ast.literal_eval(strn)
    typout = type(out)
    if issubclass(typout, (list, tuple, frozenset)):
        return typout(resolve(sub) for sub in out)
    return out

stringresmeths = dict(
    _bytes_ = lambda x: pickle.loads(ast.literal_eval(x)),
    _eval_ = resolve_eval,
    _string_ = lambda x: x,
    )

def resolve_str(strn, /):
    for key, meth in stringresmeths.items():
        if strn.startswith(key):
            return meth(strn[len(key):])
    return strn

def resolve_attrs(attrs):
    return {key: resolve(attr) for key, attr in attrs.items()}

def resolve_dataset(dset):
    return dset[()]

def resolve_group(grp):
    out = {key: resolve(item) for key, item in grp.items()}
    out.update(resolve_attrs(grp.attrs))
    return out

resmeths = {
    str: resolve_str,
    _h5py.AttributeManager: resolve_attrs,
    _h5py.Dataset: resolve_dataset,
    _h5py.Group: resolve_group,
    }

def resolve(obj):
    for key, meth in resmeths.items():
        if isinstance(obj, key):
            return meth(obj)
    return obj


def process_query(strn):
    split = strn.split('/')
    if len(split) > 1:
        for sub in split:
            yield from process_query(sub)
    elif isreg(strn):
        yield Reg(strn)
    elif isfnmatch(strn):
        yield FnMatch(strn)
    else:
        yield strn

def record_manifest_sub(h5grp, manifest, prename, name):
    try:
        h5grp = h5grp[name]
    except KeyError:
        return
    if not (isgrp := isinstance(h5grp, _h5py.Group)):
        name = '#' + name
    fullname = f"{prename}/{name}"
    manifest.append(fullname)
    manifest.extend((f"{fullname}.{attname}" for attname in h5grp.attrs))
    if isgrp:
        for name in h5grp:
            record_manifest_sub(h5grp, manifest, fullname, name)

def record_manifest(h5grp, manifest = None):
    manifest = _deque() if manifest is None else manifest
    manifest.append('/')
    manifest.extend((f"/.{attname}" for attname in h5grp.attrs))
    for i, name in enumerate(h5grp):
        record_manifest_sub(h5grp, manifest, '', name)
        if not i % 1000:
            print(i, name)


class _Reader(_ABC):
    
    @property
    def manifest(self):
        try:
            return self._manifest
        except AttributeError:
            manifest = self._manifest = list(self.get_manifest())
            return manifest

    @_abstractmethod
    def read(self):
        '''Should return all the contents of the frm file.'''
        raise TypeError(
            "This method is abstract and should never be called."
            )

    @_abstractmethod
    def get_manifest(self):
        '''Should return a list of filepath-like strings from the frm file.'''
        raise TypeError(
            "This method is abstract and should never be called."
            )

    @_abstractmethod
    def getitem_path(self, key):
        '''Should return the target of a unique address in the frm file.'''
        raise TypeError(
            "This method is abstract and should never be called."
            )

    def getitem_pattern(self, key):
        return Pattern(self, key)

    def getitem_collection(self, coll):
        manifest = self.manifest
        selection = [key for key in coll if key in manifest]
        if len(selection) < len(coll):
            raise KeyError
        if isinstance(self, Selection):
            return Selection(self.reader, selection)
        return Selection(self, selection)

    def __getitem__(self, arg):
        if isinstance(arg, tuple):
            raise TypeError(type(arg))
        if isinstance(arg, str):
            if arg in self.manifest:
                return self.getitem_path(arg)
            return self.getitem_pattern(arg)
        if isinstance(arg, _Collection):
            return self.getitem_collection(arg)
        if isinstance(arg, (int, slice)):
            return self[self.manifest[arg]]
        raise TypeError(type(arg))

    def __contains__(self, key):
        return key in self.manifest

    def __len__(self):
        return len(self.manifest)

    @_abstractmethod
    def __repr__(self):
        '''Should return an unambiguous string representation of the object.'''
        raise TypeError(
            "This method is abstract and should never be called."
            )

    def __hash__(self):
        try:
            return self._hashint
        except AttributeError:
            hashint = self._hashint = _reseed.randint(repr(self))
            return hashint


class Reader(_Reader, _disk.H5Manager):

    mode = 'r'

    def get_manifest(self):
        manfilepath = _os.path.join(self.path, self.name + '.pkl')
        try:
            with open(manfilepath, mode = 'rb') as file:
                manifest = _pickle.loads(file.read())
        except FileNotFoundError:
            manifest = _deque()
            with self as h5file:
                record_manifest(h5file, manifest)
            with open(manfilepath, mode = 'wb') as file:
                file.write(_pickle.dumps(manifest))
        return manifest

    def read(self, manifest = None):
        if manifest is None:
            manifest = self.manifest
        return dict(zip(
            manifest,
            (self[path] for path in manifest),
            ))

    def getitem_path(self, key):
        with self as h5file:
            key = key.replace('#', '').replace('.', '/.')
            if '.' in key:
                key, attrkey = (
                    _os.path.dirname(key), _os.path.basename(key).strip('.')
                    )
                raw = h5file[key].attrs[attrkey]
            else:
                raw = h5file[key]
            return resolve(raw)

    def __repr__(self):
        return f"{type(self).__name__}({self.h5filename})"


class _Derived(_Reader):

    def __init__(self, source):
        self.source = source
        if isinstance(source, _Derived):
            self._getitem_path = source._getitem_path
            self.reader = source.reader
        else:
            self._getitem_path = source.getitem_path
            self.reader = source

    def read(self):
        return self.reader.read(manifest = self.manifest)

    def getitem_path(self, key):
        return self._getitem_path(key)


class Pattern(_Derived):

    def __init__(self, reader, pattern):
        self.pattern = pattern
        super().__init__(reader)

    def get_manifest(self):
        return _fnmatch.filter(self.reader.manifest, self.pattern)

    def __repr__(self):
        return f"{repr(self.reader)}[{self.pattern}])"


class Selection(_Derived):

    def __init__(self, reader, selection):
        self.selection = selection
        super().__init__(reader)

    def get_manifest(self):
        return self.selection

    def __repr__(self):
        return f"{repr(self.reader)}[{self.selection}])"

# class Transform(_Derived):

#     def __init__(self, reader, operator):


###############################################################################
###############################################################################

In [201]:
reader = Reader('allout', aliases.datadir)

In [202]:
subreader = reader['/a*/*.H']

In [206]:
selection = subreader[:1000]

In [208]:
%%time
selection.read()

CPU times: user 2.52 s, sys: 653 ms, total: 3.17 s
Wall time: 3.37 s


{'/aabaesweuh-luubrbriups/inputs.H': 1.778,
 '/aabeowauj-psauthkrousn/inputs.H': 10.0,
 '/aabiaspiot-siangiasto/inputs.H': 0.01,
 '/aabichebr-iasteswosm/inputs.H': 5.623,
 '/aablaieakwee-kaekbotr/inputs.H': 0.01,
 '/aablatzuzh-thuarhtseav/inputs.H': 3.1622776601683795,
 '/aablauoal-dwaokrieseu/inputs.H': 1.0,
 '/aablauuilaa-bruobkiutr/inputs.H': 1.0,
 '/aabliakoadw-uolaoeobii/inputs.H': 0.316,
 '/aabliapeetr-choaprzhesl/inputs.H': 1.778,
 '/aabliiauvuu-eerhaflion/inputs.H': 0.316,
 '/aabliuchuuph-iashiiieblue/inputs.H': 0.01,
 '/aabliuoikwou-luupsweun/inputs.H': 0.562,
 '/aabloauuswai-stuelthoogr/inputs.H': 0.032,
 '/aabloighut-griitrlaodr/inputs.H': 1.778,
 '/aabloogau-sleaswuothuo/inputs.H': 10.0,
 '/aabloouihoo-thoatgoowr/inputs.H': 0.056,
 '/aabluekluph-wrewrakaa/inputs.H': 0.01,
 '/aablueuusnau-sciatwuichia/inputs.H': 3.162,
 '/aablugreb-ijaeayai/inputs.H': 0.0,
 '/aabluiouphue-shufriekri/inputs.H': 0.562,
 '/aabluoeekloo-iuscirhof/inputs.H': 0.01,
 '/aablutioh-spaureowreu/inputs.

In [163]:
selection.reader

Reader(/home/morpheus/workspace/mount/data/allout.frm)[/aab*/*.H])

In [98]:
subreader.manifest

['/aabaesweuh-luubrbriups/inputs.H',
 '/aabeowauj-psauthkrousn/inputs.H',
 '/aabiaspiot-siangiasto/inputs.H',
 '/aabichebr-iasteswosm/inputs.H',
 '/aablaieakwee-kaekbotr/inputs.H',
 '/aablatzuzh-thuarhtseav/inputs.H',
 '/aablauoal-dwaokrieseu/inputs.H',
 '/aablauuilaa-bruobkiutr/inputs.H',
 '/aabliakoadw-uolaoeobii/inputs.H',
 '/aabliapeetr-choaprzhesl/inputs.H',
 '/aabliiauvuu-eerhaflion/inputs.H',
 '/aabliuchuuph-iashiiieblue/inputs.H',
 '/aabliuoikwou-luupsweun/inputs.H',
 '/aabloauuswai-stuelthoogr/inputs.H',
 '/aabloighut-griitrlaodr/inputs.H',
 '/aabloogau-sleaswuothuo/inputs.H',
 '/aabloouihoo-thoatgoowr/inputs.H',
 '/aabluekluph-wrewrakaa/inputs.H',
 '/aablueuusnau-sciatwuichia/inputs.H',
 '/aablugreb-ijaeayai/inputs.H',
 '/aabluiouphue-shufriekri/inputs.H',
 '/aabluoeekloo-iuscirhof/inputs.H',
 '/aablutioh-spaureowreu/inputs.H',
 '/aaboibrousk-skaospchuagh/inputs.H',
 '/aabooceog-uibeekliach/inputs.H',
 '/aabreegluag-smoamtzoer/inputs.H',
 '/aabreilefl-ueklaiaqee/inputs.H',
 '

In [101]:
%%time
out = subreader.read()

CPU times: user 202 ms, sys: 15.5 ms, total: 217 ms
Wall time: 219 ms


In [100]:
out

{'/aabaesweuh-luubrbriups/inputs.H': 1.778,
 '/aabeowauj-psauthkrousn/inputs.H': 10.0,
 '/aabiaspiot-siangiasto/inputs.H': 0.01,
 '/aabichebr-iasteswosm/inputs.H': 5.623,
 '/aablaieakwee-kaekbotr/inputs.H': 0.01,
 '/aablatzuzh-thuarhtseav/inputs.H': 3.1622776601683795,
 '/aablauoal-dwaokrieseu/inputs.H': 1.0,
 '/aablauuilaa-bruobkiutr/inputs.H': 1.0,
 '/aabliakoadw-uolaoeobii/inputs.H': 0.316,
 '/aabliapeetr-choaprzhesl/inputs.H': 1.778,
 '/aabliiauvuu-eerhaflion/inputs.H': 0.316,
 '/aabliuchuuph-iashiiieblue/inputs.H': 0.01,
 '/aabliuoikwou-luupsweun/inputs.H': 0.562,
 '/aabloauuswai-stuelthoogr/inputs.H': 0.032,
 '/aabloighut-griitrlaodr/inputs.H': 1.778,
 '/aabloogau-sleaswuothuo/inputs.H': 10.0,
 '/aabloouihoo-thoatgoowr/inputs.H': 0.056,
 '/aabluekluph-wrewrakaa/inputs.H': 0.01,
 '/aablueuusnau-sciatwuichia/inputs.H': 3.162,
 '/aablugreb-ijaeayai/inputs.H': 0.0,
 '/aabluiouphue-shufriekri/inputs.H': 0.562,
 '/aabluoeekloo-iuscirhof/inputs.H': 0.01,
 '/aablutioh-spaureowreu/inputs.

In [58]:
%%time
subreader['/aabaesweuh-luubrbriups/inputs.H']

CPU times: user 2.88 ms, sys: 0 ns, total: 2.88 ms
Wall time: 2.87 ms


1.778

In [59]:
3 * 60

180

In [15]:
%%time
manifest = reader.manifest

CPU times: user 979 ms, sys: 606 ms, total: 1.59 s
Wall time: 1.59 s


In [18]:
import fnmatch as _fnmatch

In [24]:
out = _fnmatch.filter(reader.manifest, '*.H')

In [17]:
reader.manifest

('/',
 '/aabaesweuh-luubrbriups',
 '/aabaesweuh-luubrbriups.case',
 '/aabaesweuh-luubrbriups.hashID',
 '/aabaesweuh-luubrbriups.inputsHash',
 '/aabaesweuh-luubrbriups.instanceHash',
 '/aabaesweuh-luubrbriups.schema',
 '/aabaesweuh-luubrbriups.supertype',
 '/aabaesweuh-luubrbriups.type',
 '/aabaesweuh-luubrbriups.typeHash',
 '/aabaesweuh-luubrbriups/checkpoints',
 '/aabaesweuh-luubrbriups/checkpoints/#chron',
 '/aabaesweuh-luubrbriups/checkpoints/#chron.extendable',
 '/aabaesweuh-luubrbriups/checkpoints/#chron.indices',
 '/aabaesweuh-luubrbriups/checkpoints/#count',
 '/aabaesweuh-luubrbriups/checkpoints/#count.extendable',
 '/aabaesweuh-luubrbriups/checkpoints/#count.indices',
 '/aabaesweuh-luubrbriups/checkpoints/#temperatureDotField',
 '/aabaesweuh-luubrbriups/checkpoints/#temperatureDotField.extendable',
 '/aabaesweuh-luubrbriups/checkpoints/#temperatureDotField.indices',
 '/aabaesweuh-luubrbriups/checkpoints/#temperatureField',
 '/aabaesweuh-luubrbriups/checkpoints/#temperatureField

In [None]:
_pickle.dump

In [None]:
manifest = tuple(manifest)

In [None]:
from collections import deque as _deque
from functools import partial as _partial

# class Manifest(_deque):
#     _prename = ''
#     @property
#     def prename(self):
#         return self._prename
#     @prename.setter
#     def prename(self, val):
#         self._prename = val
#     def proc_val(self, val):
#         return f"{self.prename}/{val}"
#     def append(self, val):
#         super().append(self.proc_val(val))
#     def extend(self, vals):
#         super().extend(self.proc_val(val) for val in vals)
#     def __setitem__(self, name, val):
#         super().__setitem__(name, self.proc_val(val))



In [None]:
%%time
manifest = _deque()
with reader:
    record_manifest(reader.h5file['aabaesweuh-luubrbriups'], manifest)

In [None]:
manifest

In [None]:
with reader:
    out = reader.h5file['/velvisc/strainRate_outer_av']

In [None]:
14 * 60000 / 1000 / 60

In [None]:
manifest

In [None]:
import h5py
import os

myfile1 = h5py.File('test1.h5', mode = 'w')
myfile2 = h5py.File('test2.h5', mode = 'w')
grp1 = myfile2.create_group('myfile2grp1')
grp2 = myfile2.create_group('myfile2grp2')
subgrp1 = grp1.create_group('foo')
myfile2.close()
grp1 = myfile1.create_group('myfile1grp1')
grp2 = myfile1.create_group('myfile1grp2')
subgrp1 = grp1.create_group('bah')
myfile1['link1'] = h5py.ExternalLink('test2.h5', 'myfile2grp1')
myfile1['link2'] = h5py.ExternalLink('test2.h5', 'myfile2grp2')
myfile1.close()

In [None]:
with h5py.File('test1.h5', mode = 'r') as h5file:
    mylist = []
    out = h5file.id.links.iterate(mylist.append)