In [2]:
import h5py
import numpy as np
from collections.abc import Iterable

class HDF5Dataset:
    def __init__(self, datapath, datasets=None):
        self.datapath = datapath
        self.data = None
        self.target = None
        self._load_data(datasets)

    def _load_data(self, datasets: list = None):
        with h5py.File(self.datapath, 'r') as f:
            if datasets is None:
                datasets = list(f.keys())
                # select only numerical datasets
                datasets = [var for var in datasets if type(
                    f[var]) == h5py.Dataset]
            self.datasets = datasets

            # select data if it is a dataset
            self.data = [f[var][:] for var in datasets]

        # reshape data if it is 1D
        self.data = [data[None, :] if data.ndim ==
                     1 else data for data in self.data]

        self.start_indexes = {}
        self.end_indexes = {}

        self.lenghts = [data.shape[0] for data in self.data]

        last_index = 0
        for i, var in enumerate(self.datasets):
            self.start_indexes[var] = 0 + last_index
            self.end_indexes[var] = self.lenghts[i] + last_index
            last_index += self.lenghts[i]

        # concatenate data in a single snapshots
        self.data = np.concatenate(self.data, axis=0)

    def _slice(self, index):
        return slice(self.start_indexes[index], self.end_indexes[index], None)

    def __getitem__(self, index):
        if isinstance(index, list):
            index = tuple([index, slice(None, None, None)])
        if isinstance(index, str):
            return self.data[self.start_indexes[index]:self.end_indexes[index]]
        if isinstance(index, Iterable):
            index = list(index)
            for i, ind in enumerate(index):
                if type(ind) == str:
                    index[i] = slice(self.start_indexes[ind],
                                     self.end_indexes[ind], None)
                elif type(ind) == list:
                    return np.concatenate([self.__getitem__(tuple([ind_i, *index[1:]])) for ind_i in ind])
            index = tuple(index)
        return self.data[index]


In [3]:
lf_dataset = HDF5Dataset(
    datapath = '/home/ppiper/Dropbox/local/ihtc_nozzle/data/doe_lhs_multirun_N200/Q1D.hdf5', 
    datasets= ['M.txt', 'T.txt', 'T0in', 'id', 'p.txt', 'p0in', 'thickness'])


(637, 181)

In [4]:
lf_dataset[['M.txt','T.txt','T0in']]

array([[7.64132306e-02, 7.66547023e-02, 7.65043356e-02, ...,
        7.67352330e-02, 7.63462537e-02, 7.62746180e-02],
       [7.64249854e-02, 7.66664509e-02, 7.65160881e-02, ...,
        7.67469795e-02, 7.63580102e-02, 7.62863762e-02],
       [7.64596955e-02, 7.67012942e-02, 7.65508484e-02, ...,
        7.67818672e-02, 7.63926834e-02, 7.63210099e-02],
       ...,
       [2.71581173e+02, 3.98954498e+02, 3.15566294e+02, ...,
        4.52974906e+02, 2.40809437e+02, 2.08521878e+02],
       [2.70379469e+02, 3.97309556e+02, 3.14205914e+02, ...,
        4.51152724e+02, 2.39723706e+02, 2.07563008e+02],
       [5.87101323e+02, 8.18718119e+02, 6.68921211e+02, ...,
        9.13588931e+02, 5.28150863e+02, 4.64457315e+02]])

In [5]:
u,s,v= np.linalg.svd(lf_dataset[['M.txt','T.txt','T0in']])

In [6]:
u.shape

(423, 423)

(211, 423)