In [None]:
import os
import pandas as pd 
import math
import nibabel as nb
import numpy as np
pd.set_option('display.max_rows', 10)

First, load in all the subjects .nii files. Then, import the CSV file that specifies the timestamps where internal monologue is present during each video. For each video, the subject's neural timeseries file will be segmented out into 25 clips. This means that altogether, each subject will have 50 clips. 

In [None]:
xhdpath = '/Volumes/LT/phd/inarr/'
sublist = [x for x in os.listdir(xhdpath) if 'sub' in x]

timestamps = pd.read_csv('/Users/lindseytepfer/Documents/phd/inarr/monologue_timestamps.csv')

In [None]:
#stutterer shape: (77, 95, 82, 373)
#physical shape: (77, 95, 82, 302)

This function takes in each .nii image and the subsetted dataframe, and uses the timestamps inside of the dataframe to determine where the nii files must be trimmed. Because we're dealing with brain data, we shift the timecourse by 6 seconds to account for the hemodynamic response function. 

In [None]:
def trim_image(image, df, snum, movie):

    for i in range(df.index.min(),df.index.max()+1):

        start = math.floor((df.loc[i, "start"] / 2) + 6)
        stop = math.ceil((df.loc[i, "stop"] /2) + 6)
        type = df.loc[i, "type"]
        version = df.loc[i, "version"]
        
        if i != df.index[-1]:
            sub_trimmed = image.slicer[:,:,:,start:stop]
            nb.save(sub_trimmed,xhdpath+snum+"/segmented_files/"+snum+"_"+movie+"_clip-"+str(i)+"_"+type+"_version-"+str(version)+".nii.gz")
        
        else:
            sub_trimmed = image.slicer[:,:,:,start:]
            nb.save(sub_trimmed,xhdpath+snum+"/segmented_files/"+snum+"_"+movie+"_"+"_clip-"+str(i)+"_"+type+"_version-"+str(version)+".nii.gz")

In [None]:
for sub in sublist:
    print(sub)
    sub_files = [x for x in os.listdir(xhdpath+sub) if '.nii.gz' in x]

    for f in sub_files:
        sub_brain = nb.load(xhdpath+sub+os.sep+f)

        if 'physical' in f:
            phys_df = timestamps[(timestamps.movie == 'physical')].copy()
            trim_image(sub_brain, phys_df, sub, 'physical')

        elif 'stutterer' in f:
            stut_df = timestamps[(timestamps.movie == 'stutterer')].copy().reset_index()
            trim_image(sub_brain, stut_df, sub, 'stutterer')



## <font color='hotpink'> Univariate analysis </font>

Next, we mask each clip into 400 different sub-regions (parcels) of the brain. In doing so, we create a 400-row dataframe for each clip, where each row is a parcel, and the column lengths are variable dependent upon the size of a given parcel. 

Afterwards, for each participant, we take the parcel data related to the internal monologue segments, and compute the mean for each clip. 

Take the clips from the (we ignore the non-internal monologue moments for this analysis) and take the silenced or unsilenced and you can compute a mean for each one of those clips, averaging both voxels within a parcel, and across time. We get 12 values per participant per parcel. 

In [None]:
import nilearn
from nilearn import datasets
import nilearn.image as image
from nilearn.maskers import NiftiMasker

In [None]:
schaefer_atlas = datasets.fetch_atlas_schaefer_2018(n_rois=400, yeo_networks=17, resolution_mm=1,
                                                    data_dir=None, base_url=None, resume=True, verbose=1)
'''
From the documentation:
The list of labels does not contain ‘Background’ by default. 
To have proper indexing, you should either manually add ‘Background’ to the list of labels:
'''

schaefer_atlas.labels = np.insert(schaefer_atlas.labels, 0, "Background")

In [None]:
# holding all 400 parcel masks in memory; takes apprx 2m13s
mask_list = []

for p in range(1,402): #402

    try:
        # create a new image from the parcel coordinate data 
        parcel = nilearn.image.new_img_like(schaefer_atlas.maps, nilearn.image.get_data(schaefer_atlas.maps) == p) #hold the parcel masks in memory 
        
        #convert the image into a mask
        masker = NiftiMasker() 
        parcel_mask = masker.fit(parcel)

        #throw the mask into a list
        mask_list.append(parcel_mask)
    
    except: 
        print("out of range, p=", p)
        continue

In [None]:
for ix,mask in enumerate(mask_list):
    print(ix)
    parcel_data = []
    subidlist, movielist, cliplist, versionlist, typelist = [], [], [], [], []
    
    for sub in sublist:
        print(sub)

        sub_clips = [x for x in os.listdir(xhdpath+sub+'/segmented_files/') if '_IM_' in x]
        sub_clips.sort()
    
        for clip in sub_clips:
            movie = clip.split('_')[1]
            clip_num = clip.split('_')[2]
            type = clip.split('_')[3]
            version = clip.split('_')[4].split('.')[0]

            clip_segment = nb.load(xhdpath+sub+'/segmented_files/'+clip)
            #this function takes the image's mean over time (the 4th dimension)
            clip_avg = image.mean_img(clip_segment)

            try:
                roi_data = mask.transform_single_imgs(clip_avg)
                parcel_data.append(roi_data[0])

                subidlist.append(sub)
                movielist.append(movie)
                cliplist.append(clip_num)
                versionlist.append(version.split('-')[1])
                typelist.append(type)
            except:
                print("index:", ix, sub, clip)
                
    df = pd.DataFrame(parcel_data)
    df['sub'] = subidlist
    df['movie'] = movielist
    df['clip'] = cliplist
    df['type'] = typelist
    df['version'] = versionlist
    
    df.to_csv(xhdpath+"/univariate_parcels/parcel_"+str(ix)+".csv", index=False)
    print('done')



Next, I average the voxels of each parcel, creating a reduced dataframe:

In [None]:
parcel_paths = '/Volumes/Scraplab/lindseytepfer/inarr/univariate_parcels/'
parcels = [x for x in os.listdir(parcel_paths) if '.csv' in x] #400

In [None]:
for i in parcels:
    df = pd.read_csv(parcel_paths+i)
    ignore_columns = ['sub', 'movie', 'clip', 'type', 'version']
    voxel_columns = [x for x in df.columns if x not in ignore_columns]
    df['avg_column'] = df[voxel_columns].mean(axis=1)
    new_df = df[['avg_column', 'sub', 'movie', 'clip', 'type', 'version']]
    new_df.to_csv(parcel_paths+"averaged/"+i)

### Plotting the Univariate Result
Now that we have all of our t-values, we insert them into a brain image and plot the results.

In [None]:
from nilearn import datasets, plotting, surface
import nibabel as nb
from nilearn.plotting import plot_img_on_surf

fsaverage = datasets.fetch_surf_fsaverage()

In [None]:
schaefer_atlas = datasets.fetch_atlas_schaefer_2018(n_rois=400, yeo_networks=17, resolution_mm=1,
    data_dir=None, base_url=None, resume=True, verbose=1)

schaefer_atlas.labels = np.insert(schaefer_atlas.labels, 0, "Background")

In [None]:
tval_path = "/Volumes/rc/lab/S/Scraplab/lindseytepfer/inarr/univariate_parcels/averaged/"
t_vals = []

# for i in range(400):
#     df = pd.read_csv(tval_path+"t_value_parcel_"+str(i)+".csv")
#     t_vals.append(df['t_value'][0])

# tval_df = pd.DataFrame(t_maps, columns=["t_values"])
# tval_df.to_csv(tmap_path+"tvalues_dataframe.csv")


In [None]:
df = pd.read_csv(tmap_path+"tvalues_dataframe.csv")
t_val_list = list(df["t_values"])

In [None]:
atlas = nb.load(schaefer_atlas.maps) # .maps provides the path to the map
atlas_data = atlas.get_fdata()
affine = atlas.affine

mapped_data = np.zeros_like(atlas_data)
mapped_data[atlas_data == 0] = np.nan # mark
unique_regions = np.unique(atlas_data)[1:]

for i, region in enumerate(unique_regions):
    mapped_data[atlas_data == region] = -np.log10(t_val_list[i])

In [None]:
tval_img = nb.Nifti1Image(mapped_data, affine)
nb.save(tval_img, 'tvalue_map.nii.gz')

In [None]:
plotting.plot_img_on_surf(
    tval_img,"fsaverage", inflate=True,
    views=['lateral', 'medial'],  # Hemispheric views to display
    hemispheres=['left', 'right'],  # Both hemispheres
    threshold=0.05,  # Highlight significant p-values
    cmap='plasma',  # Colormap (e.g., 'viridis', 'coolwarm')
    colorbar=True,  # Show colorbar
    vmin=0, vmax=.4,
)

plotting.show()

## <font color='turquoise'> Multivariate analysis (ISC)</font>

We again want to mask out our clips, but this time we want on the the NIM set of clips - the _non-internal monologue segments_, and we want the voxel-wise average across time. 

In [None]:
import nilearn
from nilearn import datasets
from nilearn.maskers import NiftiLabelsMasker

schaefer_atlas = datasets.fetch_atlas_schaefer_2018(n_rois=400, yeo_networks=17, resolution_mm=1,
                                                    data_dir=None, base_url=None, resume=True, verbose=1)
'''
From the documentation:
The list of labels does not contain ‘Background’ by default. 
To have proper indexing, you should either manually add ‘Background’ to the list of labels:
'''

schaefer_atlas.labels = np.insert(schaefer_atlas.labels, 0, "Background")

masker = NiftiLabelsMasker(
    labels_img=schaefer_atlas.maps,
    strategy='mean',  # Averages voxels in parcel at each TR
    standardize=False  # don't want z-scoring
)

In [None]:
for sub in sublist[0:1]:
    sub_clips = [x for x in os.listdir(xhdpath+sub+'/segmented_files/') if '_NIM_' in x]
    sub_clips = [x for x in sub_clips if 'clip-0' not in x] #discard the first clip
    sub_clips.sort()

    for clip in sub_clips[0:1]:
        subidlist, movielist, cliplist, versionlist, typelist = [], [], [], [], []
        movie = clip.split('_')[1]
        clip_num = clip.split('_')[2]
        type = clip.split('_')[3]
        version = clip.split('_')[4].split('.')[0]

        clip_segment = nb.load(xhdpath+sub+'/segmented_files/'+clip)
        #this function takes the image's mean over time (the 4th dimension)
        time_by_parcel = masker.fit_transform(clip_segment)

        subidlist.append(sub)
        movielist.append(movie)
        cliplist.append(clip_num)

        df = pd.DataFrame(time_by_parcel)

        df['sub'] = len(df)*subidlist
        df['movie'] = len(df)*movielist
        df['clip'] = len(df)*cliplist
        
        df.to_csv("/Volumes/Scraplab/lindseytepfer/inarr/isc_parcels/"+sub+"_"+movie+"_"+clip_num+".csv", index=False)
        

In [None]:
scrap_volume = "/Volumes/rc/lab/S/Scraplab/lindseytepfer/inarr/isc_parcels/"

v1_subs = ['sub-1','sub-10','sub-13','sub-14','sub-17','sub-18','sub-19','sub-20','sub-21','sub-24', 'sub-25','sub-26', 'sub-27']
v2_subs = ['sub-2','sub-3','sub-4','sub-5','sub-6','sub-7','sub-8','sub-9','sub-11','sub-12','sub-15','sub-16','sub-22','sub-23']
sublist = v1_subs + v2_subs

#ignore clip-0
clip_list = ['clip-2','clip-4','clip-6','clip-8','clip-10','clip-12',
             'clip-14','clip-16','clip-18','clip-20','clip-22','clip-24']

movie_list = ["stutterer", "physical"]

In [None]:
version = 'v1'
file_list = [x for x in os.listdir(scrap_volume+version) if '.csv' in x]

for parcel in range(400)[1:]:
    for movie in movie_list:
        sub_list = [x for x in file_list if movie in x]

        for clip in clip_list:

            filtered_files = [x for x in sub_list if clip+".csv" in x] #eg, clip-2

            all_subs_list = []

            for f in filtered_files:
                fname_split = f.split("_")
                x = pd.read_csv(scrap_volume+version+os.sep+f, usecols=[str(parcel)])
                x = x.T
                x[['sub', 'movie']] = fname_split[0],fname_split[1]
                all_subs_list.append(x)

            df = pd.concat(all_subs_list)
            df.sort_values(by='sub', inplace=True)

            df.to_csv(scrap_volume+version+os.sep+"parcels/parcel_"+str(parcel)+"_"+movie+"_"+clip+".csv")



Now that our .csv files have been re-organized, we can correlate each parcelxclip dataframe and then take the averages

In [None]:
parcel_path = '/Volumes/Scraplab/lindseytepfer/inarr/isc_parcels/v1/parcels/'
parcel_files = [x for x in os.listdir(parcel_path) if '.csv' in x] #all parcelxclips

In [None]:
#run this on andes tomorrow

for movie in movie_list:

    parcel_list = [x for x in parcel_files if movie in x]
    parcel_avg_corr = []

    for p in parcel_list:
        pnum = p.split("_"+movie)[0]
        filt_list = [x for x in parcel_list if pnum in x]
        filt_list = [x for x in filt_list if 'clip-0' not in x]

        parcel_clip_corr_list = []

        for ix,clip in enumerate(filt_list): #always 12 clips
            fname = clip.split("_")
            df = pd.read_csv(parcel_path+clip)
            df = df.drop(df.columns[0], axis=1)
            timecourses = df.drop(['sub', 'movie'], axis=1)
            parcel_clip_corr_list.append(np.corrcoef(timecourses)) #(13,13) we currently have 13 subjects in v1

        parcel_clip_arr = np.array(parcel_clip_corr_list)
        parcel_clip_avg = np.mean(parcel_clip_arr, axis=0)
        out_path = f"/Volumes/Scraplab/lindseytepfer/inarr/isc_parcels/v1/averages/{pnum}_{movie}_avg"
        np.save(out_path,parcel_clip_avg)




Then, we get the difference between the two versions across the parcels.  

Lastly, we permute our version assignments so that we can perform significance testing. 

# <font color='purple'>ISC to Behavioral Analysis</font>