In [1]:
import numpy as np
import os
import pandas as pd
import h5py
from scipy.io import loadmat
import socket
import mne

hostname = socket.gethostname()
p = {}
if hostname == 'syndrome' or hostname == 'zod.psych.nyu.edu' or hostname == 'zod':
    p['datc'] = '/d/DATC/datc/MD_TMS_EEG'
else:
    p['datc'] = '/Users/mrugankdake/Documents/Clayspace/EEG_TMS/datc/MD_TMS_EEG'
p['data'] = p['datc'] + '/data'
p['analysis'] = p['datc'] + '/analysis'
p['EEGfiles'] = p['datc'] + '/EEGfiles'
p['meta'] = p['analysis'] + '/meta_analysis'
p['df_fname'] = os.path.join(p['meta'], 'calib_filtered.csv')
p['master_evoked'] = os.path.join(p['EEGfiles'], 'masterTFR_evoked.mat')
p['master_induced'] = os.path.join(p['EEGfiles'], 'masterTFR_induced.mat')
p['training_data'] = os.path.join(p['EEGfiles'], 'training_data.npy')
p['chan_names'] = os.path.join(p['EEGfiles'], 'chan_names.npy')
p['trl_matrix'] = os.path.join(p['EEGfiles'], 'trl_matrix.npy')

# Load up summary meta-data
summary_df = pd.read_csv(os.path.join(p['analysis'] + '/EEG_TMS_meta_Summary.csv'))
All_metadata = {row['Subject ID']: row for _, row in summary_df.iterrows()}

# Load up behavioral data
df_behav = pd.read_csv(p['df_fname'])

In [2]:
sub_list = [1]
day_list = [1, 2, 3]
conditions = ['pin', 'pout', 'ain', 'aout']

data_dict = {cond: {ss: {dd: [] for dd in day_list} for ss in sub_list} for cond in conditions}
trl_dict = {cond: {ss: {dd: [] for dd in day_list} for ss in sub_list} for cond in conditions}

subject_day_info = []
freq_band = (8, 12)
time_band = (-1, 4.5)
ch_count = None
time_points = None
tr_count = 0

tfr_type = 'evoked'

if os.path.exists(p['training_data']):
    data_matrix = np.load(p['training_data'])
    chan_list = np.load(p['chan_names'])
    trl_matrix = np.load(p['trl_matrix'])
else:
    for cond_idx, cond in enumerate(conditions):
        for ss in sub_list:
            for dd in day_list:
                this_fname = os.path.join(p['EEGfiles'], f'sub{ss:02}', f'day{dd:02}', f'sub{ss:02}_day{dd:02}_TFR_'+tfr_type+'.mat')
                trl_idx_fname = os.path.join(p['EEGfiles'], f'sub{ss:02}', f'day{dd:02}', f'sub{ss:02}_day{dd:02}_trl_idx.mat')
                trl_idx = loadmat(trl_idx_fname)['trl_idx'][0][0][cond_idx]
                trl_idx = np.asarray(trl_idx).T[0]
                trl_dict[cond][ss][dd] = trl_idx
                with h5py.File(this_fname, 'r') as f:
                    # Load up power-spectrum
                    powspctrm = np.array(f['POW'][cond]['powspctrm'])
                    # Load up channel labels
                    ch_refs = f['POW'][cond]['label'][0]
                    ch_labels = []
                    for ref in ch_refs:
                        label_data = f[ref]
                        label = ''.join(chr(c[0]) for c in label_data)
                        ch_labels.append(label)
                    # Load up time and frequency
                    time = np.array(f['POW'][cond]['time'])
                    freqs = np.array(f['POW'][cond]['freq'])
                    
                    # Create order of channel labels first time running this
                    if tr_count == 0:
                        chan_list = ch_labels
                    
                    # Reorder data for channel indices are different from the one in first dataset
                    channel_indices = [chan_list.index(ch) for ch in ch_labels]
                    powspctrm = powspctrm[:, :, channel_indices, :]

                    # Slice along the time of interest -1 to 4.5 seconds
                    time = np.array(f['POW'][cond]['time'])
                    time_band_indices = np.where((time >= time_band[0]) & (time <= time_band[1]))[0]
                    powspctrm = powspctrm[time_band_indices, :, :, :]

                    # Average over the alpha band
                    freqs = np.array(f['POW'][cond]['freq'])
                    freq_band_indices = np.where((freqs >= freq_band[0]) & (freqs <= freq_band[1]))[0]
                    powspctrm_avg = np.mean(powspctrm[:, freq_band_indices, :, :], axis=1)

                    # Reorder X_avg in the shape (trials, channels, time)
                    powspctrm_avg = np.transpose(powspctrm_avg, (2, 1, 0))

                    data_dict[cond][ss][dd] = powspctrm_avg

                    if ch_count is None:
                        ch_count = powspctrm_avg.shape[1]
                    if time_points is None:
                        time_points = powspctrm_avg.shape[2]
                    tr_count += powspctrm_avg.shape[0]

    data_matrix = np.zeros((len(conditions), len(sub_list), len(day_list), tr_count, ch_count, time_points))
    trl_matrix = np.zeros((len(conditions), len(sub_list), len(day_list), tr_count))
    current_trial_index = 0

    for cond_idx, cond in enumerate(conditions):
        for ss_idx, ss in enumerate(sub_list):
            for dd_idx, dd in enumerate(day_list):
                data = data_dict[cond][ss][dd]
                num_trials = data.shape[0]
                data_matrix[cond_idx, ss_idx, dd_idx, current_trial_index:current_trial_index+num_trials, :, :] = data
                current_trial_index += num_trials

                trl_mat = trl_dict[cond][ss][dd]
                data_matrix[cond_idx, ss_idx, dd_idx, :] = trl_mat


    np.save(p['training_data'], data_matrix)
    np.save(p['chan_names'], chan_list)
    np.save(p['trl_matrix'], trl_matrix)

ValueError: could not broadcast input array from shape (100,) into shape (1189,62,146)

In [17]:
data_matrix.shape

(4, 1, 1, 398, 62, 146)

In [14]:
trl_matrix.shape

100


In [None]:
chan_list.shape