In [11]:
from pathlib import Path
import h5py
import numpy as np
import tables  # enables reading BLOSC compression

## Full session loading

In [12]:
def load_split_eeg(root: Path) -> np.ndarray:
    """Load a full session of split EEG data.
    
    :param root: rhino root path
    :returns: full session EEG data
    
    """
    path = root.joinpath("protocols", "r1",
                         "subjects", "R1111M",
                         "experiments", "FR1",
                         "sessions", "0",
                         "ephys", "current_processed", "noreref")
    files = sorted(path.glob("*"))
    return np.array([np.fromfile(str(infile), dtype="int16") for infile in files])

In [13]:
def load_hdf5_eeg(path: Path) -> np.ndarray:
    """Load a full session of HDF5 EEG data.
    
    :param path: path to HDF5 file
    :returns: full session EEG data
    
    """
    with h5py.File(str(path), "r") as hfile:
        eeg = hfile["eeg"]
        arr = np.empty(eeg.shape, dtype=eeg.dtype)
        eeg.read_direct(arr)
        return arr
        # return hfile["eeg"][0, ...]

In [14]:
%%timeit
data = load_split_eeg(Path("/Users/depalati/mnt/rhino"))

The slowest run took 53.48 times longer than the fastest. This could mean that an intermediate result is being cached.
4.11 s ± 8.81 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
# BLOSC compression (requires PyTables)
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_blosc.h5"))

The slowest run took 17.09 times longer than the fastest. This could mean that an intermediate result is being cached.
1.2 s ± 2.03 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
# no compression
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/eeg_timeseries_no_compression.h5"))

The slowest run took 13.83 times longer than the fastest. This could mean that an intermediate result is being cached.
815 ms ± 1.28 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
%%timeit
# no chunking
data = load_hdf5_eeg(Path("/Users/depalati/rhino_home/scratch/no_chunks.h5"))

186 ms ± 13.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Loading epochs

In [73]:
def load_split_eeg_epochs(root: Path) -> np.ndarray:
    """Load epochs from split EEG data.
    
    :param root:
    :returns: n_events x n_channels x time array
    
    """
    path = root.joinpath("protocols", "r1",
                         "subjects", "R1111M",
                         "experiments", "FR1",
                         "sessions", "0",
                         "ephys", "current_processed", "noreref")
    files = sorted(path.glob("*"))
    mmaps = [np.memmap(f, dtype="int16") for f in files]
    epochs = np.array([(start, start + 500) for start in range(0, 1623000, 1000)])
    n_epochs = epochs.shape[0]

    # shape: (channels, time, epochs)
    data = [
        [mmap[epochs[i, 0]:epochs[i, 1]] for i in range(n_epochs)]
        for mmap in mmaps
    ]
    return np.transpose(data, (2, 0, 1))

In [74]:
%%time
data = load_split_eeg_epochs(Path("/Users/depalati/mnt/rhino"))

CPU times: user 1.19 s, sys: 261 ms, total: 1.46 s
Wall time: 1.47 s


In [143]:
def load_hdf5_eeg_epochs(path) -> np.ndarray:
    """Load epochs from HDF5 EEG data.
    
    :param path: path to HDF5 file
    :returns: EEG data
    
    """
    def epochs_gen():
        for start in range(0, 1623000, 5000):
            yield range(start, start + 500)
    
    epochs = epochs_gen()
    
    with h5py.File(path, "r") as hfile:
        dset = hfile["eeg"]
        arr = np.empty((325, dset.shape[1], 500), dtype=dset.dtype)
        for i, epoch in enumerate(epochs):
            dset.read_direct(arr, np.s_[0, :, epoch], np.s_[i])
        return arr
        
        return [
            hfile["eeg"][0, :, epoch]
            for epoch in epochs
        ]

In [144]:
%%time
data = load_hdf5_eeg_epochs("/Users/depalati/rhino_home/scratch/eeg_timeseries_no_compression.h5")

CPU times: user 3.3 s, sys: 434 ms, total: 3.73 s
Wall time: 3.74 s


In [140]:
data.shape

(325, 100, 500)

In [105]:
%%time
data = load_hdf5_eeg_epochs("/Users/depalati/rhino_home/scratch/eeg_timeseries_blosc.h5")

CPU times: user 4.48 s, sys: 1.52 s, total: 6 s
Wall time: 26.7 s


In [106]:
%%time
data = load_hdf5_eeg_epochs("/Users/depalati/rhino_home/scratch/eeg_timeseries_gzip_9.h5")

CPU times: user 42.1 s, sys: 1.59 s, total: 43.7 s
Wall time: 1min 2s


In [110]:
hfile = h5py.File("/Users/depalati/rhino_home/scratch/eeg_timeseries_no_compression.h5", "r")

In [121]:
chunks = list([range(0, 100), range(200, 300)])

In [124]:
[hfile["eeg"][0, 0, chunk] for chunk in chunks]

[array([3596, 3595, 3596, 3599, 3601, 3596, 3597, 3601, 3605, 3614, 3624,
        3629, 3634, 3636, 3646, 3656, 3661, 3661, 3667, 3674, 3681, 3693,
        3695, 3701, 3708, 3710, 3723, 3732, 3736, 3734, 3730, 3728, 3727,
        3727, 3727, 3729, 3726, 3724, 3722, 3717, 3711, 3710, 3697, 3689,
        3683, 3677, 3670, 3670, 3666, 3662, 3659, 3658, 3653, 3649, 3635,
        3626, 3625, 3618, 3603, 3594, 3589, 3582, 3577, 3576, 3566, 3559,
        3552, 3548, 3550, 3550, 3554, 3559, 3563, 3569, 3566, 3573, 3580,
        3582, 3581, 3583, 3588, 3588, 3590, 3596, 3595, 3594, 3595, 3598,
        3615, 3622, 3631, 3629, 3629, 3633, 3637, 3634, 3623, 3611, 3607,
        3605], dtype=int16),
 array([3507, 3511, 3520, 3521, 3518, 3514, 3513, 3508, 3508, 3505, 3511,
        3504, 3498, 3497, 3493, 3487, 3480, 3476, 3478, 3479, 3479, 3489,
        3489, 3487, 3488, 3497, 3503, 3511, 3513, 3511, 3511, 3504, 3502,
        3498, 3498, 3498, 3504, 3503, 3506, 3498, 3498, 3497, 3500, 3502,
        3