In [1]:
%pylab notebook


import sys
import sidpy
from sidpy.io.interface_utils import openfile_dialog
sys.path.append('../')

import SciFiReaders
print('SciFiReaders version: ', SciFiReaders.__version__)

print('sidpy version: ', sidpy.__version__)

Populating the interactive namespace from numpy and matplotlib
SciFiReaders version:  0.0.1
sidpy version:  0.0.5


In [81]:
import sys
from warnings import warn

import h5py
import sidpy
from pyNSID.io.hdf_utils import check_if_main, read_h5py_dataset

def get_all_nexus_groups(parent, verbose=False):
    """
    Simple function to recursively print the contents of an hdf5 group
    Parameters
    ----------
    parent : :class:`h5py.Group`
        HDF5 Group to search within
    verbose : bool, optional. Default = False
        If true, extra print statements (usually for debugging) are enabled
    Returns
    -------
    main_list : list of h5py.Dataset
        The datasets found in the file that meet the 'Main Data' criteria.
    """
    if not isinstance(parent, (h5py.Group, h5py.File)):
        raise TypeError('parent should be a h5py.File or h5py.Group object')

    group_list = []
    data_list = []

    def __check(name, obj):
        
        if isinstance(obj, h5py.Group):
            if 'NX_class' in obj.attrs.keys():
                if obj.attrs['NX_class']=='NXdata':
                    data_list.append(obj)
                    if verbose:
                        print(name, obj, ' is NXdata')
                else:
                    group_list.append(obj)
                    if verbose:
                        print(name, obj, ' is NXgroup')
            

    if verbose:
        print('Checking the group {} for `Main` datasets.'.format(parent.name))
    parent.visititems(__check)

    return group_list, data_list





class NeXusReader(sidpy.Reader):

    def __init__(self, file_path):
        """
        Creates an instance of NSIDReader which can read one or more HDF5
        datasets formatted according to NSID into sidpy.Dataset objects

        Parameters
        ----------
        file_path : str, h5py.File, or h5py.Group
            Path to a HDF5 file or a handle to an open HDF5 file or group
            object

        Notes
        -----
        Please consider using the ``self._h5_file`` object to get handles to
        specific datasets or sub-trees that need to be read instead of opening
        the file again outside the context of this Reader.
        """

        super(NeXusReader, self).__init__(file_path)

        # Let h5py raise an OS error if a non-HDF5 file was provided
        self._h5_file = h5py.File(file_path, mode='r+')
        

        self._groups, self._data = get_all_nexus_groups(self._h5_file, verbose=True)

        # DO NOT close HDF5 file. Dask array will fail if you do so.

    def can_read(self):
        """
        Checks whether or not this Reader can read the provided file

        Returns
        -------
        bool :
            True if this Reader can read the provided file and if this file
            contains at least one NSID-formatted main dataset. Else, False
        """
        return len(self._main_dsets) > 0

    def read(self, h5_object=None):
        """
        Reads all available NSID main datasets or the specified h5_object

        Parameters
        ----------
        h5_object : h5py.Dataset or h5py.Group
            HDF5 Dataset to read or the HDF5 group under which to read all
            datasets

        Returns
        -------
        sidpy.Dataset or list of sidpy.Dataset objects
            Datasets present in the provided file
        """
        if h5_object is None:
            return self.read_all(recursive=True)
        if not isinstance(h5_object, (h5py.Group, h5py.Dataset)):
            raise TypeError('Provided h5_object was not a h5py.Dataset or '
                            'h5py.Group object but was of type: {}'
                            ''.format(type(h5_object)))
        self.__validate_obj_in_same_file(h5_object)
        if isinstance(h5_object, h5py.Dataset):
            return read_h5py_dataset(h5_object)
        else:
            return self.read_all(parent=h5_object)

    def __validate_obj_in_same_file(self, h5_object):
        """
        Internal function that ensures that the provided HDF5 object is within
        the same file as that provided in __init__

        Parameters
        ----------
        h5_object : h5py.Dataset, h5py.Group
            HDF5 object

        Raises
        ------
        OSError - if the provded object is in a different HDF5 file.
        """
        if h5_object.file != self._h5_file:
            raise OSError('The file containing the provided h5_object: {} is '
                          'not the same as provided HDF5 file when '
                          'instantiating this object: {}'
                          ''.format(h5_object.file.filename,
                                    self._h5_file.filename))

    def read_all(self, recursive=True, parent=None):
        """
        Reads all HDF5 datasets formatted according to NSID specifications.

        Parameters
        ----------
        recursive : bool, default = True
            We might just remove this kwarg
        parent : h5py.Group, Default = None
            HDF5 group under which to read all available datasets.
            By default, all datasets within the HDF5 file are read.

        Returns
        -------
        sidpy.Dataset or list of sidpy.Dataset objects
            Datasets present in the provided file
        """

        if parent is None:
            h5_group = self._h5_file
        else:
            if not isinstance(parent, h5py.Group):
                raise TypeError('parent should be a h5py.Group object')
            self.__validate_obj_in_same_file(parent)
            h5_group = parent

        if recursive:
            list_of_main = self._data
        else:
            list_of_main = []
            for key in h5_group:
                if isinstance(h5_group[key], h5py.Group):
                    if 'NX_class' in h5_group[key].attrs.keys():
                        if  h5_group[key]['NX_class'] == 'NXdata':
                            list_of_main.append(h5_group[key])

        # Go through each of the identified
        list_of_datasets = []
        for dset in list_of_main:
            # list_of_datasets.append(read_h5py_dataset(dset))
            pass
        return list_of_datasets


In [83]:
file_name = "test17.hf5"
reader = NeXusReader(file_name)
# dataset = reader.read()

print(dataset)
dataset

Checking the group / for `Main` datasets.
Measurement_000/Channel_000 <HDF5 group "/Measurement_000/Channel_000" (2 members)>  is NXgroup
Measurement_000/Channel_000/random <HDF5 group "/Measurement_000/Channel_000/random" (7 members)>  is NXdata
instrument <HDF5 group "/instrument" (1 members)>  is NXgroup
instrument/detector <HDF5 group "/instrument/detector" (1 members)>  is NXgroup
[]


[]

In [73]:
for d in reader._main_dsets:
    print(d, d.attrs.keys())
    if 'NX_class' in d.attrs.keys():
        print( d.attrs['NX_class'])

<HDF5 group "/Measurement_000" (1 members)> <KeysViewHDF5 ['machine_id', 'platform', 'sidpy_version', 'timestamp']>
<HDF5 group "/Measurement_000/Channel_000" (2 members)> <KeysViewHDF5 ['NX_class', 'default']>
NXentry
<HDF5 group "/Measurement_000/Channel_000/random" (7 members)> <KeysViewHDF5 ['NX_class', 'axes', 'machine_id', 'platform', 'pyNSID_version', 'sidpy_version', 'signal', 'timestamp', 'x_indices', 'y_indices']>
NXdata
<HDF5 group "/Measurement_000/Channel_000/random/__dict__" (0 members)> <KeysViewHDF5 ['_axes-0', '_axes-1', '_data_type', '_h5_dataset', '_modality', '_original_metadata-NXinstrument-NXdetector-nx_dataset-attrs-units', '_original_metadata-NXinstrument-NXdetector-nx_dataset-data', '_original_metadata-NXinstrument-NXdetector-nx_dataset-name', '_quantity', '_source', '_title', '_units', 'a', 'b', 'dim_0', 'dim_1', 'ndim', 'npartitions', 'numblocks', 'shape', 'size', 'x', 'y']>
<HDF5 group "/Measurement_000/Channel_000/random/_axes" (0 members)> <KeysViewHDF5 ['