# MDM imaging analysis

In [1]:
# load modules
from scipy.stats import stats
import nibabel as nib
import numpy as np
from matplotlib import pyplot as plt
import os
import glob
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from nilearn.input_data import NiftiMasker
import nilearn.plotting as plotting
import pickle
import pandas as pd
import seaborn as sns 

%matplotlib inline 
%autosave 5
sns.set(style = 'white', context='poster', rc={"lines.linewidth": 2.5})

Autosaving every 5 seconds


## Rename task number to run number.

In [10]:
data_root = '/home/rj299/project/mdm_analysis/data_rename'

In [12]:
os.chdir(os.path.join(data_root, 'sub-2073'))

In [13]:
filename_test = os.listdir()[0]

In [14]:
filename_test.split('_task-task')[1].split('_')[0]

'6'

In [101]:
# functions for changing file names

# rename all files by adding run number determined by session and task number during scanning
def addRunNum(directory): 
    """ Add scan run numbers to file name
    
    Parameters
    --------------
    directory: directory for a subject, contains data for all runs
    
    """
    os.chdir(directory)
    
    # get sorted task number from the directory
    task_num_all = getTaskNum(directory)
    
    # add run number and rename
    for filename in os.listdir(directory):
        # get task number
        task_num = int(filename.split('_task-task')[1].split('_')[0])
        
        # get the run number based on all the task number in the directory
        run_count = task_num_all.index(task_num) + 1

        
        filename_new = filename.split('_task-task%s' %task_num)[0] + '_run-%s' %run_count + filename.split('_task-task%s' %task_num)[1]

        os.rename(filename, filename_new)  
        print(filename_new)

# get all task numbers for ses one
def getTaskNum(directory):
    """ Get all the task number for a session
    
    Parameters
    -----------------
    directory: data directory for a subject
    
    Return
    -----------------
    task_num: sorted task number for each session
    """
    file_ses = glob.glob('sub-*_ses-1_task*_bold.nii.gz')
    
    task_num = []
    
    for file in file_ses:
        task_id = file.split('_task-task')[1].split('_space')[0]
        task_num.append(int(task_id))
    
    task_num.sort()
    
    return task_num

In [107]:
# get all subjects folder
sub_fold = glob.glob(data_root + '/sub-*')

# get all subject id
sub_num = [] 
for fold in sub_fold:
    sub_id = fold.split('sub-')[1]
    sub_num.append(int(sub_id))
print('All subjects included in analysis:', sub_num)

All subjects included in analysis: [2597, 2662, 2583, 2596, 2659, 2594, 2653, 2600, 2598, 2657, 2593, 2652, 2073, 2656, 2592, 2651, 2655, 2650, 2588, 2550, 2666, 2582, 2665, 2660, 2664, 2585, 2624, 2663, 2584]


In [99]:
fold != '/home/rj299/project/mdm_analysis/data_rename/sub-2582'

False

In [108]:
# rename files and add run number in the file name
# needs running only ONCE

# for fold in sub_fold:
#     if fold != '/home/rj299/project/mdm_analysis/data_rename/sub-2582':
#         addRunNum(fold)

In [None]:
# rename files and add run number in the file name
# subject 2582 only, because the files are named in a different way
# rename all files by adding run number determined by session and task number during scanning
def addRunNum(directory): 
    """ Add scan run numbers to file name
    
    Parameters
    --------------
    directory: directory for a subject, contains data for all runs
    
    """
    os.chdir(directory)
    
    # get sorted task number from the directory
    task_num_all = getTaskNum(directory)
    
    # add run number and rename
    for filename in os.listdir(directory):
        # get task number
        task_num = int(filename.split('_task-task')[1].split('_')[0])
        
        # get the run number based on all the task number in the directory
        run_count = task_num_all.index(task_num) + 1

        
        filename_new = filename.split('_task-task%s' %task_num)[0] + '_run-%s' %run_count + filename.split('_task-task%s' %task_num)[1]

        os.rename(filename, filename_new)  
        print(filename_new)

# get all task numbers for ses one
def getTaskNum(directory):
    """ Get all the task number for a session
    
    Parameters
    -----------------
    directory: data directory for a subject
    
    Return
    -----------------
    task_num: sorted task number for each session
    """
    file_ses = glob.glob('sub-*_ses-1_task*_bold.nii.gz')
    
    task_num = []
    
    for file in file_ses:
        task_id = file.split('_task-task')[1].split('_space')[0]
        task_num.append(int(task_id))
    
    task_num.sort()
    
    return task_num

In [56]:
getTaskNum(os.chdir(os.path.join(data_root, 'sub-2666')))

[3, 4, 5, 6, 7, 8, 9, 10]

In [91]:
addRunNum(os.path.join(data_root, 'sub-2666'))

sub-2666_ses-1_run-3_space-fsaverage5_hemi-L.func.gii
sub-2666_ses-1_run-3_desc-confounds_regressors.tsv
sub-2666_ses-1_run-5_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz
sub-2666_ses-1_run-8_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz
sub-2666_ses-1_run-2_space-MNI152NLin2009cAsym_boldref.nii.gz
sub-2666_ses-1_run-5_desc-confounds_regressors.tsv
sub-2666_ses-1_run-5_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz
sub-2666_ses-1_run-2_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz
sub-2666_ses-1_run-3_desc-confounds_regressors.json
sub-2666_ses-1_run-6_space-MNI152NLin2009cAsym_boldref.nii.gz
sub-2666_ses-1_run-5_space-fsaverage5_hemi-L.func.gii
sub-2666_ses-1_run-3_space-MNI152NLin2009cAsym_boldref.nii.gz
sub-2666_ses-1_run-1_desc-confounds_regressors.tsv
sub-2666_ses-1_run-2_space-MNI152NLin2009cAsym_desc-aparcaseg_dseg.nii.gz
sub-2666_ses-1_run-4_space-MNI152NLin2009cAsym_desc-brain_mask.json
sub-2666_ses-1_run-6_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz
sub

In [None]:
def load_epi_data(data_dir,sub,run):
    # Load MRI file (in Nifti format) of one localizer run
    if run < 5:
        ses_num = 1
    elif run > 4:
        ses_num = 2
    epi_in = os.path.join(data_dir, 'sub-%s' %sub, 
              "sub-%s_ses-%s_run-%s_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz" % (sub, ses_num, run))
    epi_data = nib.load(epi_in)
    print("Loading data from %s" % (epi_in))
    return epi_data

In [None]:
def load_whb_mask(sub, data_root):
    """Load whole brain mask for a subject
    
    Parameters
    -----------
    data_root: root data directory
    sub: subject number
    
    Return
    -----------
    mask: loaded whole-brain data
    """
    
    maskfile = os.path.join(data_root, 'sub-%s' %sub, 'sub-%s_ses-1_run-1_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz' %sub)
    mask = nib.load(maskfile)
    return mask