# Functional MRI Preprocessing Pipeline

In [1]:
# import required packages
import os
import numpy as np
import pandas as pd

In [2]:
# function for viewing full dataframes

def print_full_df(x):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', None)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')


## Set up Directory Structures & Convert File Formats

In [3]:
# main directory for raw 7 T data (READ ONLY)
raw_data_dir = '/media/forest/wd_1/data_CLMS/0_7T_RAW'

# main directory for preprocessing fmri data
fmri_dir = '/media/forest/wd_1/data_CLMS/functional/7T_func'

# main directory for preprocessing anatomical
anat_dir = '/media/forest/wd_1/data_CLMS/anatomical/7T_T1_MPR_uniden' 

Create directory to store the raw minc files. 

In [4]:
# create directory to copy over raw minc files

# find all files in raw_data_dir containing string "fmri-FA40"
fp_fmri_minc = !find {raw_data_dir} ~+ -type f -name "*fmri-FA40*"

# create dataframe to store the list of file paths
df_preproc_filepaths = pd.DataFrame({'subject_id':"", 'fmri_minc_filepath':fp_fmri_minc})

# fill in subject ids from mnc filepaths, where ids are formatted w/regex
df_preproc_filepaths['subject_id'] = df_preproc_filepaths['fmri_minc_filepath'].str.extract(r'(\w{3}_\d{3}_m\d{2})')

# print_full_df(df_preproc_filepaths)

**Manual Cleanup:** delete rows 25, 42 and 45 & reset indices ~ double check if more data added/removed

In [5]:
df_preproc_filepaths = df_preproc_filepaths.drop([27, 44, 47])
df_preproc_filepaths = df_preproc_filepaths.reset_index()

print_full_df(df_preproc_filepaths)

    index   subject_id                                                                                               fmri_minc_filepath
0       0  bbb_052_m00           /media/forest/wd_1/data_CLMS/0_7T_RAW/bbb_052/m00/CLMS-LONDON-7_002-LON-7_bbb_052_m00_fmri-FA40.mnc.gz
1       1  aab_026_m00           /media/forest/wd_1/data_CLMS/0_7T_RAW/aab_026/m00/CLMS-LONDON-7_002-LON-7_aab_026_m00_fmri-FA40.mnc.gz
2       2  aac_027_m00           /media/forest/wd_1/data_CLMS/0_7T_RAW/aac_027/m00/CLMS-LONDON-7_002-LON-7_aac_027_m00_fmri-FA40.mnc.gz
3       3  aac_027_m24           /media/forest/wd_1/data_CLMS/0_7T_RAW/aac_027/m24/CLMS-LONDON-7_002-LON-7_aac_027_m24_fmri-FA40.mnc.gz
4       4  aad_028_m00           /media/forest/wd_1/data_CLMS/0_7T_RAW/aad_028/m00/CLMS-LONDON-7_002-LON-7_aad_028_m00_fmri-FA40.mnc.gz
5       5  aad_028_m24           /media/forest/wd_1/data_CLMS/0_7T_RAW/aad_028/m24/CLMS-LONDON-7_002-LON-7_aad_028_m24_fmri-FA40.mnc.gz
6       6  aae_029_m00           /media/forest/w

In [6]:
# find all MP2RAGE uni-den files
fp_uniden_minc = !find {raw_data_dir} ~+ -type f -name "*t1gMPR-uni-den.*"

# create dataframe to store list of anatomical uni-den file paths
df_anat_uniden_filepaths = pd.DataFrame({'subject_id':"",'anat_minc_filepath':fp_uniden_minc})

# fill in subject ids from filepaths with regex, like above
df_anat_uniden_filepaths['subject_id'] = df_anat_uniden_filepaths['anat_minc_filepath'].str.extract(r'(\w{3}_\d{3}_m\d{2})')

print(df_anat_uniden_filepaths)

     subject_id                                 anat_minc_filepath
0   bbb_052_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/bbb_052/...
1   aab_026_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/aab_026/...
2   aac_027_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/aac_027/...
3   aac_027_m24  /media/forest/wd_1/data_CLMS/0_7T_RAW/aac_027/...
4   aad_028_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/aad_028/...
..          ...                                                ...
70  uvw_021_m24  /media/forest/wd_1/data_CLMS/0_7T_RAW/uvw_021/...
71  vwx_022_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/vwx_022/...
72  wxy_023_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/wxy_023/...
73  xyz_024_m00  /media/forest/wd_1/data_CLMS/0_7T_RAW/xyz_024/...
74  xyz_024_m24  /media/forest/wd_1/data_CLMS/0_7T_RAW/xyz_024/...

[75 rows x 2 columns]


In [7]:
# Manual deletion: row 32
df_anat_uniden_filepaths = df_anat_uniden_filepaths.drop([32])
df_anat_uniden_filepaths = df_anat_uniden_filepaths.reset_index()

In [8]:
df_anat_uniden_filepaths.columns

Index(['index', 'subject_id', 'anat_minc_filepath'], dtype='object')

In [9]:
# check for duplicates in subject_id column
df_anat_uniden_filepaths.duplicated().sum()

0

- Merge the fmri and mp2rage dataframes based on subject_id
- use outer join method so that any files with unmatched fmri/anat will have an NA entry
- double check NA entries to ensure file is present or not--if not delete the row and proceed

In [10]:
df_filepaths = pd.merge(df_preproc_filepaths, df_anat_uniden_filepaths, on='subject_id', how='outer')

In [11]:
# Create new column for subject (minus timepoint code)
df_filepaths['subject'] = df_filepaths['subject_id'].str.extract(r'(\w{3}_\d{3})')

In [12]:
# delete rows with no fMRI data
df_filepaths = df_filepaths[df_filepaths['fmri_minc_filepath'].notna()]

In [13]:
# count number of unique subjects
df_filepaths['subject'].nunique()


45

Copy/convert anatomical files. 

In [14]:
# directory to store raw nifti files
raw_anat_dir = anat_dir + '/0_raw'

# create directory if it does not exist
!if [[ -d "$raw_anat_dir" ]]; \
    then echo "Directory Already Exists"; \
    else mkdir $raw_anat_dir && echo "Created Directory: {raw_anat_dir}"; \
    fi

Directory Already Exists


In [15]:
# convert mnc files to nifti and move to the new directory
for file, subject in zip(df_filepaths['anat_minc_filepath'], df_filepaths['subject_id']):

    output_filename = subject + '_7T_mprage_uniden_raw'

    # if file extension is .nii or .nii.gz, copy file to new directory
    if file.endswith('.nii') or file.endswith('.nii.gz'):
        print('Copying nifti file')
        !cp $file $raw_anat_dir/{output_filename}
        print('Complete.')
        print('...')


    # if file extension is .mnc or .mnc.gz, convert to nifti and move to new directory
    else:
        print('Converting minc file...')
        !mnc2nii $file $raw_anat_dir/{output_filename}.nii
        print('Complete.')
        print('...')

Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Copying nifti file
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Copying nifti file
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file..

Copy/Convert fMRI data.

In [None]:
# directory to store raw nifti files
raw_fmri_dir = fmri_dir + '/0_raw'

# create directory if it does not exist
!if [[ -d "$raw_fmri_dir" ]]; \
    then echo "Directory Already Exists"; \
    else mkdir $raw_anat_dir && echo "Created Directory: {raw_anat_dir}"; \
    fi

In [18]:
# convert mnc files to nifti and move to the new directory
for file, subject in zip(df_filepaths['fmri_minc_filepath'], df_filepaths['subject_id']):

    output_filename = subject + '_7T_FMRI_raw'

    # if file extension is .nii or .nii.gz, copy file to new directory
    if file.endswith('.nii') or file.endswith('.nii.gz'):
        print('Copying nifti file')
        !cp $file $raw_fmri_dir/{output_filename}.nii.gz
        print('Complete.')
        print('...')


    # if file extension is .mnc or .mnc.gz, convert to nifti and move to new directory
    else:
        print('Converting minc file...')
        !mnc2nii $file $raw_fmri
        print('...')

Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Copying nifti file
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Copying nifti file
Complete.
...
Converting minc file...
Complete.
...
Converting minc file...
Complete.
...
Converting minc file..

## Part 1: Functional Data Preprocessing

via: https://en.wikibooks.org/wiki/Neuroimaging_Data_Processing/Processing/Steps/Slice_Timing

- first steps: 
    - realignment
    - motion correction
    - slice timing correction via temporal derivatives
- notes: 
    - data characteristics: short TR, interleaved slice order, potentially severe head motion among SPMS group
    - slice acquisition order was Siemens interleaved
    - slice timing correction method chosen based on above characteristics of our data

In [None]:
# 1.1 realignment to middle volume (#239)



In [None]:
# 1.2 motion correction & identifying outliers



In [None]:
# 1.3 slice timing correction via temporal derivatives
 

## Part 3: Registration & Normalization
- registration of fmri to high resolution t1w mp2rage anatomical data via boundary-based registration
- followed by normalization to template (MNI152 2mm)

## Part 4: Smoothing