# ISC analysis with levels

Remark: could be the most promising.

`beta_series glm25 subj* nosmooth`

## Main goals

The BOLD signal contains noise. Here, we calculate correlations between subjects to reduce noise and estimate task-relevant signals. Want to find brain regions where the same levels of activity are displayed. Key predictions are that, in the theory encoding region, the ISC should:

1. Be highest for same levels, medium for same games, and lowest for random (shuffled) games
2. Increase over levels of the same game

---

## Outline

0. Load data in
1. Preprocess data
2. Reorder BOLD data based on names
3. ISC

In [1]:
import h5py
import warnings
import sys 
if not sys.warnoptions:
    warnings.simplefilter("ignore")
import os 
import glob
import time
from copy import deepcopy
import numpy as np
import pandas as pd 

from nilearn import datasets
from nilearn import surface
from nilearn import plotting
from nilearn.input_data import NiftiMasker, NiftiLabelsMasker
import nibabel as nib

from brainiak import image, io
from brainiak.isc import isc, isfc, permutation_isc
from brainiak.isc import compute_summary_statistic
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d 
import seaborn as sns 
import hdf5storage
import mat73
import pandas as pd

%autosave 30
%matplotlib inline
sns.set(style = 'white', context='talk', font_scale=1, rc={"lines.linewidth": 2})

Autosaving every 30 seconds


# 0. Load in data


In [5]:
data_dir = '/Users/Daphne/Desktop/beta_series/' # local directory
# specify filename
filename = 'beta_series_glm25_subj1_nosmooth.mat'

os.path.exists(data_dir) 

True

In [6]:
# each subject is a separate file so
subject = h5py.File(data_dir+filename,'r')

list(subject.keys()) # these are the variables in the data

['#refs#', 'B', 'Vmask', 'mask', 'names']

In [3]:
def decode_variable(file, item):

    '''
    Converts matlab cell array in the form "<HDF5 object reference>" to list of strings.

    IN

    file: the path + filename 
    item: the variable in the dataset that needs to be decoded

    RETURNS

    readable_data: np array of strings
    '''

    # Open file                                                                                    
    myfile = h5py.File(file,'r')
    variable = myfile[item] # get the names variable

    readable_data = [] # store the ne


    for var in variable: # encode and decode the objects, 18 per subject
        for v in var: # Read the references  

            #print(v)
            ds = myfile[v]
            #print(ds)
            data = ds[:]

            # store the decoded data
            word = []
            
            for i in data:
                letter = str(chr(i))  # the chr() function returns the character that represents the specified unicode.
                word.append(letter)
            word = ''.join(word) # join list of strings
            
            readable_data.append(word)
            
    return np.array(readable_data)

In [8]:
num_subjects = 8

B_data = []
mask_data = []
Vmask_data = []
levels_data = []

for i in range(num_subjects):
    idx = i+1
    
    # change filename to subject #
    data_dir = '/Users/Daphne/Desktop/beta_series/'
    filename = 'beta_series_glm25_subjk_nosmooth.mat'
    filename = filename.replace('k', str(idx))
    
    subject = h5py.File(data_dir+filename,'r') 
    print(f'Get data for subject {idx}')
    # load and save data for respective subject
    B = subject['B'].value
    mask = subject['mask'].value
    Vmask = subject['Vmask']
    
    # === decode level names ===
    names = decode_variable(data_dir+filename, 'names')
    
    # append to lists
    B_data.append(B)
    mask_data.append(mask)
    Vmask_data.append(Vmask)
    levels_data.append(names)

Get data for subject 1
Get data for subject 2
Get data for subject 3
Get data for subject 4
Get data for subject 5
Get data for subject 6
Get data for subject 7
Get data for subject 8


In [10]:
B_data[0].shape # voxels x levels

(179595, 54)

In [11]:
mask_data[0].shape

(79, 95, 79)

In [15]:
levels_data[0].shape # 54 levels (18x3)

(54,)

In [24]:
levels_data[0][0:10]

array(['Sn(1) vgfmri3_chase_run_1_block_1_instance_1*bf(1)',
       'Sn(1) vgfmri3_chase_run_1_block_1_instance_2*bf(1)',
       'Sn(1) vgfmri3_chase_run_1_block_1_instance_3*bf(1)',
       'Sn(1) vgfmri3_lemmings_run_1_block_2_instance_1*bf(1)',
       'Sn(1) vgfmri3_lemmings_run_1_block_2_instance_2*bf(1)',
       'Sn(1) vgfmri3_lemmings_run_1_block_2_instance_3*bf(1)',
       'Sn(1) vgfmri3_bait_run_1_block_3_instance_1*bf(1)',
       'Sn(1) vgfmri3_bait_run_1_block_3_instance_2*bf(1)',
       'Sn(1) vgfmri3_bait_run_1_block_3_instance_3*bf(1)',
       'Sn(2) vgfmri3_plaqueAttack_run_2_block_1_instance_1*bf(1)'],
      dtype='<U57')

# 1. Preprocessing

1. Clean the level name strings

<font color=red> TODO: Reordering mistake. Figure out meaning of SN() and reimplement. </font>

2. Reorder the BOLD data


In [28]:
def cleanup_names(s):
    '''
    Removes parts of the string to make it more orderly and easier to rearrange.
    '''

    for r in (('vgfmri3_', ''), ('*bf(1)', ''), ('Sn(', '')):
        s = s.replace(*r)
        
    return s

In [29]:
def get_in_shape(B_s, names_s):
    
    '''
    Massages data into right shape to perform ISC: [TRs, voxels, subjects] - bunch of stacked matrices
    
    IN
    
    B: the bold data for subject s
    names: the order of the levels for subject s
    
    OUT
    
    dfOrdered: the ordered df, just to sanity check the reordering
    B_ordered: the ordered B array [levels, voxels]
    '''
    
    #print(B_s.shape)
    
    # cleanup the level names first, remove stuff
    level_names = []

    for name in names_s:
        stripped_name = cleanup_names(name)
        level_names.append(stripped_name)
    
    #print(level_names)
    
    # read in B as pandas df
    df = pd.DataFrame(B_s)
    df.insert(0, 'level', level_names) # insert level names as first col
    
    dfOrdered = df.sort_values(by='level') # reorder the matrix based on the 'levels' column
    dfBold = dfOrdered.drop('level', 1) # don't need the column with the level names anymore
    
    B_ordered = dfBold.values # convert df to numpy array
    
    return dfOrdered, B_ordered

In [41]:
ISC_data = []
ordered_dfs = []

for s in range(num_subjects):
    
    print(f'Preprocess fMRI data for subject {s+1}')
    # get the betas and game order from this
    B_s = B_data[s].T # transpose to get [blocks, voxels]
    names_s = levels_data[s]
    
    dfOrdered, B_ordered = get_in_shape(B_s, names_s)
    
    ISC_data.append(B_ordered)
    ordered_dfs.append(dfOrdered)
    
ISC_arr = np.array(ISC_data)

Preprocess fMRI data for subject 1
Preprocess fMRI data for subject 2
Preprocess fMRI data for subject 3
Preprocess fMRI data for subject 4
Preprocess fMRI data for subject 5
Preprocess fMRI data for subject 6
Preprocess fMRI data for subject 7
Preprocess fMRI data for subject 8


In [42]:
ISC_arr.shape

(8, 54, 179595)

In [43]:
ISC_arr = np.swapaxes(ISC_arr, 0, 1) # need to get [TRs, voxels, subjects]
ISC_arr = np.swapaxes(ISC_arr, 1, 2)

ISC_arr.shape # [levels, voxels, subjects]

(54, 179595, 8)