In [5]:
import os, sys
import glob
import h5py
import numpy as np

sys.path.append('../utils/')

from config import *

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /dartfs-hpc/rc/home/w/f003rjw/.cache/huggingface/token
Login successful


In [7]:
src_dir = os.path.join(DATASETS_DIR, 'deniz-readinglistening/src/')

In [119]:
dtypes = ['listening', 'reading']
split = ['trn', 'val']

test_data = sorted(glob.glob(os.path.join(src_dir, f'responses/subject01*{dtypes[0]}*{split[0]}*')))
fname = test_data[0]

In [121]:
key = 'story_01'

func_data = dict()
with h5py.File(fname) as hf:
    if key is None:
        for k in hf.keys():
            print("{} will be loaded".format(k))
            func_data[k] = hf[k][()]
    else:
        func_data[key] = hf[key][()]

## Organize into BIDS format

In [191]:
import scipy
def load_sparse_array(fname, varname):
    """Load a numpy sparse array from an hdf file

    Parameters
    ----------
    fname: string
        file name containing array to be loaded
    varname: string
        name of variable to be loaded

    Notes
    -----
    This function relies on variables being stored with specific naming
    conventions, so cannot be used to load arbitrary sparse arrays.

    By Mark Lescroart

    """
        
    with h5py.File(fname) as hf:
        try:
            data = (hf['%s_data'%varname], hf['%s_indices'%varname], hf['%s_indptr'%varname])
            sparsemat = scipy.sparse.csr_matrix(data, shape=hf['%s_shape'%varname])
        except:
            if varname == 'voxel_to_fsaverage':
                left = 'vox_to_fsavg_left'
                right = 'vox_to_fsavg_right'
                
                lh_data = (hf['%s_data'%left], hf['%s_indices'%left], hf['%s_indptr'%left])
                lh_mat = scipy.sparse.csr_matrix(lh_data, shape=hf['%s_shape'%left])

                
                rh_data = (hf['%s_data'%right], hf['%s_indices'%right], hf['%s_indptr'%right])
                rh_mat = scipy.sparse.csr_matrix(rh_data, shape=hf['%s_shape'%right])

                sparsemat = scipy.sparse.hstack([lh_mat.T, rh_mat.T]).T
    return sparsemat

In [192]:
from itertools import product
from scipy import stats, sparse

src_dir = os.path.join(DATASETS_DIR, 'deniz-readinglistening/src/')
derivatives_dir = os.path.join(DATASETS_DIR, 'deniz-readinglistening/derivatives/dark-matter-preproc/')
sub_list = [i for i in range(1,9)]
splits = ['trn', 'val']
dtypes = ['listening', 'reading']

TASK_INFO = {
	'tasks': [
		'alternateithicatom', 'avatar', 'legacy', 'odetostepfather', 'souls',
		'howtodraw', 'myfirstdaywiththeyankees', 'naked', 'undertheinfluence', 'life',
		'exorcism', 'fromboyhoodtofatherhood', 'sloth', 'stagefright', 'tildeath',
		'adollshouse', 'adventuresinsayingyes', 'buck', 'haveyoumethimyet', 'inamoment', 'theclosetthatateeverything',
		'eyespy', 'hangtime', 'itsabox', 'swimmingwithastronauts', 'thatthingonmyarm', 'wheretheressmoke'
	],
	'n_trs': [
		354, 378, 410, 414, 360, 
		365, 368, 433, 314, 440,
		478, 357, 448, 304, 334,
		252, 402, 343, 507, 215, 325,
		389, 334, 365, 395, 444, 300
	]
}

story_mappings = {
    'story_01': 'alternateithicatom',
    'story_02': 'avatar',
    'story_03': 'howtodraw',
    'story_04': 'legacy',
    'story_05': 'life',
    'story_06': 'myfirstdaywiththeyankees',
    'story_07': 'naked',
    'story_08': 'odetostepfather',
    'story_09': 'souls',
    'story_10': 'undertheinfluence',
    'story_11': 'wheretheressmoke',
}

trim_trs = 5

for sub_num in sub_list:

    sub = f'sub-0{sub_num}'
    sub_func_dir = os.path.join(derivatives_dir, sub, 'func')
    sub_mapper_dir = os.path.join(derivatives_dir, sub, 'mappers')

    if not os.path.exists(sub_func_dir):
        os.makedirs(sub_func_dir)

    if not os.path.exists(sub_mapper_dir):
        os.makedirs(sub_mapper_dir)

    # load and save the mapper file to fsaverage
    sub_mapper_fn = glob.glob(os.path.join(src_dir, f'mappers/subject0{sub_num}*'))[0]
    sub_fsaverage_mapper = load_sparse_array(sub_mapper_fn, 'voxel_to_fsaverage')

    mapper_out_fn = os.path.join(sub_mapper_dir, f'{sub}_mapper-fsaverage.npz')
    sparse.save_npz(mapper_out_fn, sub_fsaverage_mapper)

    print (sub)
    
    # go through datatype and split 
    for dtype, split in product(dtypes, splits):
        ds_fn = sorted(glob.glob(os.path.join(src_dir, f'responses/subject0{sub_num}*{dtype}*{split}*')))[0]

        # get the current file
        with h5py.File(ds_fn) as hf:
            
            # keeps track of the validation runs
            if split == 'val':
                story_name = 'wheretheressmoke'
                ds = hf['story_11'][()].squeeze()
                
                run_counter = 1

                for i, ds_run in enumerate(ds):
                    
                    sub_fn = f'{sub}_ses-{dtype}_task-{story_name}_run-{i+1}_space-orig_desc-clean_trimmed-zscored.npy'
                    sub_fn = os.path.join(sub_func_dir, sub_fn)

                    # grab the story from the loaded file --> slice off first 5 trs and then zscore
                    ds_run = ds_run[story_slice, :]
                    ds_run = stats.zscore(ds_run, axis=0)

                    np.save(sub_fn, ds_run)
            else:
                # go through each story
                for key, story_name in story_mappings.items():
                    # for all stories except validation story
                    if story_name != 'wheretheressmoke':
                        
                        print (story_name)
                        ds = hf[key][()].squeeze()
                        
                        # prepare the slice for zscoring
                        story_index = TASK_INFO['tasks'].index(story_name)
                        n_trs = TASK_INFO['n_trs'][story_index]
                        story_slice = slice(trim_trs, trim_trs + n_trs)
                        
                        sub_fn = f'{sub}_ses-{dtype}_task-{story_name}_space-orig_desc-clean_trimmed-zscored.npy'
                        sub_fn = os.path.join(sub_func_dir, sub_fn)
                        
                        # grab the story from the loaded file --> slice off first 5 trs and then zscore
                        ds = hf[key][()].squeeze()
                        ds = ds[story_slice, :]
                        ds = stats.zscore(ds, axis=0)

                        np.save(sub_fn, ds)

sub-01
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
sub-02
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
sub-03
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
sub-04
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
alternateithicatom
avatar
howtodraw
legacy
life
myfirstdaywiththeyankees
naked
odetostepfather
souls
undertheinfluence
sub-05
alternateithi

In [196]:
stories = list(story_mappings.values())

In [235]:
train_split = sorted(np.random.choice(stories, 5, replace=False))
test_split = sorted(set(stories).difference(train_split))

print (train_split)
print (test_split)

['avatar', 'legacy', 'naked', 'souls', 'wheretheressmoke']
['alternateithicatom', 'howtodraw', 'life', 'myfirstdaywiththeyankees', 'odetostepfather', 'undertheinfluence']
