In [1]:
from pathlib import Path
import h5py
import numpy as np
import tables  # enables reading BLOSC compression

  from ._conv import register_converters as _register_converters


In [2]:
def load_split_eeg(root: Path) -> np.ndarray:
    """Load a full session of split EEG data.
    
    :param root: rhino root path
    :returns: full session EEG data
    
    """
    path = root.joinpath("protocols", "r1",
                         "subjects", "R1111M",
                         "experiments", "FR1",
                         "sessions", "0",
                         "ephys", "current_processed", "noreref")
    files = sorted(path.glob("*"))
    return np.array([np.fromfile(str(infile), dtype="int16") for infile in files])

In [3]:
def load_hdf5_eeg(path: Path) -> np.ndarray:
    """Load a full session of HDF5 EEG data.
    
    :param path: path to HDF5 file
    :returns: full session EEG data
    
    """
    with h5py.File(str(path), "r") as hfile:
        return hfile["eeg"][0, ...]

In [4]:
%%timeit
data = load_split_eeg(Path("/Users/depalati/mnt/rhino"))

450 ms ± 1.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
# GZIP compression level 9, shuffle not set
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_gzip_9.h5"))

1.79 s ± 9.88 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
# GZIP compression level 9, shuffle=True
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_gzip_9_shuffle.h5"))

1.3 s ± 23.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
# BLOSC compression (requires PyTables)
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_blosc.h5"))

367 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
# no compression
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_no_compression.h5"))

293 ms ± 7.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
