In [1]:
import numpy as np
import os
import h5py
from glob import glob

run_on_rhino = True

if run_on_rhino:
    rhino_mount = ''
else:
    rhino_mount = '/home/ctw/fusemounts/rhino'


# expfolder = 'RAM_FR'
expfolder = 'RAM_catFR'


In [2]:
output_root = '/scratch/cweidema/bootcamp/tmpdat'
sesspow_path = rhino_mount+output_root+'/RAM/'+expfolder+'/'+expfolder+'1_power/encoding/hdf5_files_sess/'
subjpow_path = rhino_mount+output_root+'/RAM/'+expfolder+'/'+expfolder+'1_power/encoding/hdf5_files_subj/'


In [7]:
pow_suffix='.hdf5'
sesspaths = np.sort(glob(os.path.join(sesspow_path,'*'+pow_suffix)))
subj_done = []

In [8]:
for sesspath in sesspaths:
    filename = os.path.split(sesspath)[-1]
    subj = filename.split('_')[1]
    if subj in subj_done:
        continue
    subjpowfile = subjpow_path+subj+'_pow.hdf5'
    if os.path.exists(subjpowfile):
        continue
    print('Processing ', subj)
    subj_done.append(subj)
    subj_files = sorted(glob(os.path.join(sesspow_path,'pow_'+subj+'_[0-9]_allleads.hdf5')))
    # there's at least one case of double digit session number ('10'):
    subj_files.extend(sorted(glob(os.path.join(sesspow_path,'pow_'+subj+'_[0-9][0-9]_allleads.hdf5'))))
    subj_files = np.array(subj_files)
    total_events = 0
    subjdat_attrs = {}
    # attrs_to_copy = ['buffer','baseline','subject','samplerate','exp']
    attrs_to_copy = ['buf','subject','samplerate','exp', 'period', 'mirror']
    subjdat_keys = {}
    keys_to_copy = ['channels','channels_orig','time','freqs']
    merged_evs = {}
    good_files = True
    for s, subjsesspath in enumerate(subj_files):
        print(subjsesspath)
        sessdat = h5py.File(subjsesspath,'r')
        try:
            total_events += len(sessdat['events/type'])
        except KeyError:
            print('Error, deleting', subj_files)
            for sf in subj_files:
                os.remove(sf)
            good_files = False
            break
        if s == 0:
            # subjdat_zmeans = sessdat['zmeans']
            # subjdat_zstd = sessdat['zstd']
            for attr in attrs_to_copy:
                subjdat_attrs[attr] = sessdat['data'].attrs[attr]
            subjdat_attrs['session'] = sessdat['data'].attrs['session']
            for key in keys_to_copy:
                subjdat_keys[key] = sessdat[key]
            for evkey in sessdat['events']:
                merged_evs[evkey] = sessdat['events'][evkey].value
        else:
            # subjdat_zmeans = np.dstack([subjdat_zmeans,sessdat['zmeans']])
            # subjdat_zstd = np.dstack([subjdat_zstd,sessdat['zstd']])
            for attr in attrs_to_copy:
                if np.any(
                        subjdat_attrs[attr] != sessdat['data'].attrs[attr]):
                    raise ValueError(
                        'Attribute missmatch: '+str(subjdat_attrs[attr])+'\n'+
                        str(sessdat['data'].attrs[attr])+'\n'+subjsesspath+'\n'+
                        str(s))
                subjdat_attrs['session'] = np.hstack(
                    [subjdat_attrs['session'],sessdat['data'].attrs['session']])
            for key in keys_to_copy:
                if np.any(subjdat_keys[key].value != sessdat[key].value):
                    if (key == 'time') and (np.allclose(
                            subjdat_keys[key].value, sessdat[key].value,
                            rtol=0.001, atol=0.001)):
                        continue
                    raise ValueError(
                        'Key missmatch: '+str(subjdat_keys[key])+'\n'+
                        str(sessdat[key])+'\n'+subjsesspath+'\n'+
                        str(s))
            for evkey in sessdat['events']:
                try:
                    merged_evs[evkey] = np.hstack(
                        [merged_evs[evkey],sessdat['events'][evkey]])
                except KeyError:
                    continue
                except ValueError as e:
                    if evkey == 'stim_params':
                        merged_evs.pop(evkey)
                    else:
                        raise e
    if not good_files:
        continue
    merged_shape = list(sessdat['data'].shape)
    merged_shape[2] = total_events
    subjdat = h5py.File(subjpowfile,'w-',libver='latest')
    subjdat_data = subjdat.create_dataset('data',merged_shape)
    # subjdat_zmeans = subjdat.create_dataset('zmeans',data=subjdat_zmeans)
    # subjdat_zstd = subjdat.create_dataset('zstd', data=subjdat_zstd)
    for key in keys_to_copy:
        subjdat_keys[key] = subjdat.create_dataset(key,data=subjdat_keys[key])
    ev_indx = 0
    for s,subjsesspath in enumerate(subj_files):
        sessdat = h5py.File(subjsesspath,'r')
        ev_len = len(sessdat['events/type'])
        subjdat_data[:,:,ev_indx:ev_indx+ev_len,:] = sessdat['data']
        ev_indx += ev_len
        if s == 0:
            #subjdat.copy(sessdat['events'],subjdat)
            if 'tal_struct' in sessdat:
                subjdat.copy(sessdat['tal_struct'], subjdat)
            elif 'h5info' in sessdat:
                subjdat.copy(sessdat['h5info'], subjdat)
            else:
                raise ValueError('Missing location data')
            subjdat.create_group('events')
            for evkey in merged_evs:
                subjdat['events/'+evkey] = merged_evs[evkey]
    for attr in attrs_to_copy:
        subjdat_data.attrs[attr] = subjdat_attrs[attr]
        
    subjdat_data.dims[0].label = 'channels'
    # pfile['channels'] = channels
    subjdat_data.dims.create_scale(subjdat['channels'], 'channels')
    subjdat_data.dims[0].attach_scale(subjdat['channels'])
    #
    subjdat_data.dims[1].label = 'freqs'
    # pfile['freqs'] = freqs
    subjdat_data.dims.create_scale(subjdat['freqs'], 'freqs')
    subjdat_data.dims[1].attach_scale(subjdat['freqs'])
    subjdat_data.dims[3].label = 'time'
    # pfile['time'] = times
    subjdat_data.dims.create_scale(subjdat['time'], 'time')
    subjdat_data.dims[3].attach_scale(subjdat['time'])
    subjdat_data.dims[2].label = 'events'
    for evkey in subjdat['events']: #events.dtype.names:
        subjdat_data.dims.create_scale(subjdat['events/'+evkey], evkey)
        subjdat_data.dims[2].attach_scale(subjdat['events/'+evkey])
    subjdat.close()
    # make file read only to avoid accidental loss:
    os.chmod(subjpowfile,0o444)



Processing  R1375C
/scratch/cweidema/bootcamp/tmpdat/RAM/RAM_catFR/RAM_catFR1_power/encoding/hdf5_files_sess/pow_R1375C_1_allleads.hdf5
/scratch/cweidema/bootcamp/tmpdat/RAM/RAM_catFR/RAM_catFR1_power/encoding/hdf5_files_sess/pow_R1375C_2_allleads.hdf5
/scratch/cweidema/bootcamp/tmpdat/RAM/RAM_catFR/RAM_catFR1_power/encoding/hdf5_files_sess/pow_R1375C_3_allleads.hdf5
/scratch/cweidema/bootcamp/tmpdat/RAM/RAM_catFR/RAM_catFR1_power/encoding/hdf5_files_sess/pow_R1375C_4_allleads.hdf5
