# Loading MDA files into HDF5

Problem. I want to write an implementation of MUSIC in Julia because I understand how to optimize Julia code much better than how to optimize Python code.

Other problem. The raw data is in an MDA file, which is specific for MountainSort and I doubt there is a library for reading that file format in Julia.

Solution: So I'd like to put it into HDF5 to make analysis in Julia easier. I think a dataset for each tetrode makes sense.

In [1]:
import pyms.mlpy as mlpy
import numpy as np
import h5py

In [2]:
mda_dir = '/media/trevortknguyen/a8a15e97-b12c-4825-8482-0be8f89aac96/20170920_remy_02_r1/20170920_remy_02_r1.mda'
file_prefix = '20170920_remy_02_r1'
hdf5_fname = '../remy_raw.hdf5'

In [3]:
def get_tetrode_file(mda_dir, file_prefix, ntrodeid):
    file_path = f'{mda_dir}/{file_prefix}.nt{str(ntrodeid)}.mda'
    return file_path

def read_mda_file(mda_dir, filename):
    file_path = f'{mda_dir}/{filename}'
    return mlpy.readmda(file_path)

In [4]:
mda_fname = get_tetrode_file(mda_dir, file_prefix, 1)
neural_data = mlpy.readmda(mda_fname)

In [5]:
neural_data.shape

(4, 36975610)

In [18]:
# this will overwrite the file, so don't call this after file is written
with h5py.File(hdf5_fname, 'w') as f:
    f.visit(print)

In [46]:
with h5py.File(hdf5_fname, 'r') as f:
    f.visit(print)

tetrode_1
tetrode_10
tetrode_11
tetrode_12
tetrode_13
tetrode_14
tetrode_15
tetrode_16
tetrode_17
tetrode_18
tetrode_19
tetrode_2
tetrode_20
tetrode_21
tetrode_22
tetrode_23
tetrode_24
tetrode_25
tetrode_26
tetrode_27
tetrode_28
tetrode_29
tetrode_3
tetrode_30
tetrode_31
tetrode_32
tetrode_4
tetrode_5
tetrode_6
tetrode_7
tetrode_8
tetrode_9


In [37]:
def load_data(hdf5_fname, mda_dir, file_prefix, tetrode_id):
    mda_fname = get_tetrode_file(mda_dir, file_prefix, tetrode_id)
    neural_data = mlpy.readmda(mda_fname).T

    with h5py.File(hdf5_fname, 'r+') as f:                                        
        ds = f.create_dataset(f'tetrode_{tetrode_id}', neural_data.shape, dtype='i2')
        ds[:] = neural_data

In [44]:
for i in range(24, 33):
    load_data(hdf5_fname, mda_dir, file_prefix, i)