In [None]:
import os
import pandas as pd 
import math
import nibabel as nb
import numpy as np

import nilearn
from nilearn import image, signal

First, load in all the subjects .nii files. Then, import the CSV file that specifies the timestamps where internal monologue is present during each video. For each video, the subject's neural timeseries file will be segmented out into 25 clips. This means that altogether, each subject will have 50 clips. 

In [None]:
data_path = '/Volumes/Scraplab/fSEND/inarr_data/'

sublist = [x for x in os.listdir(data_path) if 'sub' in x]

timestamps = pd.read_csv(data_path+'monologue_timestamps.csv')

v1_subs = ['sub-1','sub-10','sub-13','sub-14','sub-17','sub-18','sub-19',
'sub-20','sub-21','sub-24','sub-25','sub-26','sub-27']
v2_subs = ['sub-2','sub-3','sub-4','sub-5','sub-6','sub-7','sub-8','sub-9',
'sub-11','sub-12','sub-15','sub-16','sub-22','sub-23', 'sub-28']

This function takes in each .nii image and the subsetted dataframe, and uses the timestamps inside of the dataframe to determine where the nii files must be trimmed. Because we're dealing with brain data, we shift the timecourse by 6 seconds to account for the hemodynamic response function. 

In [None]:
def trim_image(brain, df, snum, movie):

    for i in range(df.index.min(),df.index.max()+1):

        # 6-second HRF 
        start = math.floor((df.loc[i, "start"] / 2) + 6)
        stop = math.ceil((df.loc[i, "stop"] /2) + 6)
        type = df.loc[i, "type"]
        version_silenced = df.loc[i, "version_silenced"]

        if snum in v1_subs:
            version = 1
        elif snum in v2_subs:
            version = 2

        if i%2 == 1: #this means its an IM segment
            if version_silenced == version:
                version = str(version)+"_silenced"

        if i != df.index[-1]:
            sub_trimmed = brain.slicer[:,:,:,start:stop]
        else:
            sub_trimmed = brain.slicer[:,:,:,start:]

        nb.save(sub_trimmed,data_path+snum+"/segmented_files/"+snum+"_"+movie+"_clip-"+str(i)+"_"+type+"_version-"+str(version)+".nii.gz")
    
    return (print("trimming complete."))

In [None]:
for sub in sublist[1:]:
    print(sub)
    sub_files = [x for x in os.listdir(data_path+sub) if '.nii.gz' in x]
    confounds = [x for x in os.listdir(data_path+sub) if '.tsv' in x]

    for f in sub_files:

        if 'physical' in f:
            movie = 'physical'
        elif 'stutterer' in f:
            movie = 'stutterer'
        
        #take this step to regress out confounds -- head motion, csf, white matter, etc
        conf_file = f'{sub}_task-{movie}_desc-confounds_timeseries.tsv'

        bold = nb.load(data_path+sub+os.sep+f)
        n_scans = bold.shape[-1]
        time = np.arange(n_scans)
        
        # Create polynomial trends (linear and quadratic)
        linear_trend = time
        quadratic_trend = time**2

        conf = pd.read_csv(data_path+sub+os.sep+conf_file, sep='\t', header=0)
        confounds = [c for c in conf.columns if not '_comp' in c]
        confounds = [c for c in confounds if not 'cosine' in c]
        confounds = [c for c in confounds if not 'motion' in c]
        conf = conf.loc[:,confounds]

        poly_confounds = np.column_stack((linear_trend, quadratic_trend, conf))
        poly_confounds[np.isnan(poly_confounds)] = 0

        cleaned_data = signal.clean(bold.get_fdata().reshape(-1, n_scans).T,confounds=poly_confounds,standardize='zscore_sample')
        clean_bold = image.new_img_like(bold, cleaned_data.T.reshape(bold.shape))

        if movie == 'physical':
            phys_df = timestamps[(timestamps.movie == 'physical')].copy()
            trim_image(clean_bold, phys_df, sub, 'physical')

        elif movie == 'stutterer':
            stut_df = timestamps[(timestamps.movie == 'stutterer')].copy().reset_index()
            trim_image(clean_bold, stut_df, sub, 'stutterer')

