In [1]:
## GENERAL ROI INFORMATION:
# This script uses functional roi niftis (converted from.img files and scaled in FSL to correct
# size in each dimension). These parcels are used as binary masks on output from first-level 
# analysis. 
# This yields a hypothesis space for each roi (same location for every 
# subject). These ROI's are face parcels from the Kanwisher website http://web.mit.edu/bcs/nklab/GSS.shtml
# with the exception of DMFPC and AMY (taken from saxelab).

## MAGNITUDE
# The top N (default = 50) voxels are then selected from these hypothesis space based on highest t-statistic
# (no threshold, and no constraint on contiguity). This defines the ROIs per subject.
# The corresponding con values are averaged as the mean magnitude summary statistic.
# Also computes average position

## LATERALIZATION
# Different MASKS used. Masks created by summing hypothesis space and flipped opposite space, 
# e.g. rTPJ computed as voxels within rTPJ parcel and flipped lTPJ. See commands:
# TO FLIP: fslswapdim data -x y z flip_data
# TO ADD: fslmaths mask1 -add mask2 -bin output ... (e.g. mask1 = rTPJ original, mask 2 = lTPJ flipped)
# Count NUMBER of voxels above a certain p value (0.01 or 0.001) to do calculations: L - R / L + R

## INTERREGIONAL CORRELATIONS
# Takes mean temporal signal in each roi and creates a pearson's r correlation matrix. Converts 
# this to Fisherman's Z, and then averages for a summary statistic. Does this per run, and saves
# both run data and averaged run data as separate csv files.

## TEMPORAL NOISE
# Details in section. Looks at specified number of points relative to hrf, and uses these
# indices to extract N BOLD values for each trial; finds std for each trial, then averages 
# across face conditions. Does this for each run, saves runs separately and the mean summary in csv.

## MULTIVARIATE VECTORS
# This is computed over entire hypothesis space, as a vector of all contrast values within the parcel.

# SAVES: 
# roi_individual_masks & roi_contrast_values (top N voxels per subject), 
# and text files per metric computations that can be opened as a dataframe.

# MASKS DIRECTORY
# /om/user/rezzo/OpenAutism/analysis_data/MASKS/STANDARD_MASKS
# /om/user/rezzo/OpenAutism/analysis_data/MASKS/LATERAL_MASKS

# Note : to convert Analyze format to Nifti format:
# load in header, image (and .mat) files into the folder
# fslchfiletype NIFTI_GZ RTPJ_xyz.nii.gz RTPJ_xyz

# SETTINGS
pilot = 1           # pilot == 1 runs on tomloc; 0 runs on analysis_data
roi_size = 50       # default number of voxels to define individual roi == 50

In [2]:
#import modules
from os.path import join as pjoin, split as psplit
import os
import numpy as np
import matplotlib
from statistics import mean
import csv
from glob import glob
import re
import pandas as pd
from scipy import stats
import warnings as warn
import operator
import itertools
from nipype.interfaces import fsl
import nibabel as nib
from nibabel.testing import data_path
import nilearn
from nilearn.masking import apply_mask
from nilearn import plotting

In [3]:
# subject look up table conversion (IGNORING undescores)
def Convert_Subname(Newname):
    tmp_root = '/om/user/rezzo/Subject_Conversion_Table.csv'
    with open(tmp_root, "r") as tsv:
        for line in csv.reader(tsv,  delimiter = ","):
            if Newname == line[0]:
                Oldname = line[1]
            else:
                continue
    return Oldname  

In [4]:
# to obtain values and indices of individual rois
def nan_largestval(ary, n):
    flat = ary.flatten()                      # transform to one array
    values = -np.sort(-flat)                  # order values greatest to least, nans at the end
    idx = (-flat).argsort()[:n]               # obtain indices of the values in flat array
    idx2 = np.unravel_index(idx, ary.shape)   # transform indices to original array
    return [values[0:n], idx2]                # return values, indices

In [5]:
# to obtain the count for the lateralization calculations
def nan_abovethresh(ary, thresh):
    flat = ary.flatten()                     # transform to one array
    count = np.sum(flat<thresh)              # count voxels greater than thresh
    return [count]                           # return count                                     

In [6]:
# to return a list of floats from a text file  
def txt2list(file,details):
    txtfile = open(file,'r')
    details = []
    for line in txtfile:
        details.extend([float(i) for i in line.rstrip('\n').split()])
    return details

In [7]:
# to transform pearson's R to fisher's Z
def pearson2fisher(pearsonR):
    fisherZ = 0.5*(np.log(1+pearsonR) - np.log(1- pearsonR)) # np.log is NATURAL LOG!*
    return fisherZ

In [8]:
# to cut a substring out of a larger string
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [9]:
# find between but with list input
def find_list(lis, str1, str2):
    init = []
    for el in lis:
        init.append(find_between(el, str1,str2))
        norepeat = list(set(init))
        final = list(filter(None, norepeat)) # fastest
    return final


In [10]:
# find out first element of design file
def Design_file(tmp_root):
    array = []
    with open(tmp_root, "r") as tsv:
        for line in csv.reader(tsv,  delimiter = "\t"):
            array.append(line[3])
    if array[1] == 'belief':
        return "1"
    elif array[1] == 'photo':
        return "0"

In [11]:
# list files in directory and obtain a list of the file names
def roi_list(directory_name):
    full_list = os.listdir(directory_name)
    for element in range(0, len(full_list)):
        #replace = re.match("(.*?)_",full_list[element]).group()
        replace = full_list[element].split('_')[0]
        full_list[element] = replace
    return full_list

In [12]:
def intervals(start_num, N):
    init = []
    counter = -1
    for el in range(0, N):
        counter = counter + 1
        init.append(start_num + counter)
    return init

In [13]:
# to create the appropriate list of lateral rois
def lat_roi_condense(roi_list):
    for elements in range(0, len(roi_list)):
        roi_list[elements] = roi_list[elements][1:]
    return list(set(roi_list))

In [55]:
# Create roi_indexes, and subject indexes

if pilot is 1:
    main_root = '/om/user/rezzo/OpenAutism/pilot_data/'
else:
    main_root = '/om/user/rezzo/OpenAutism/analysis_data/'

roiroot = main_root+'MASKS/STANDARD_MASKS/'
biroiroot = main_root+'MASKS/LATERAL_MASKS/'
roi_index = roi_list(roiroot)
lat_index = lat_roi_condense(roi_list(biroiroot))
#lat_index = lat_roi_condense(lat_index)
    
all_subjects = glob(main_root+"/SUBJECTS/*/")
subject_list = []

# create list of subject in folder
for subs in range(0, len(all_subjects)):
    m = re.search('SAX_OA_(.+?)/', all_subjects[subs])
    if m:
        found = m.group(1)
        subject_list.append('SAX_OA_'+found) # subject is the list with all subject names.

# Here is the sorted list of OA subject names        
subject_list.sort

# load list of subjects to include after motion outlier exclusion
good_subjects = main_root+'MOTION_INFO/Runs_LenientMotionFiltered.tsv'

In [16]:
######## MAGNITUDE AND LATERALIZATION ##########
# NOTE: this section identifies and excludes motion outliers and performs run by run
# this is redundant if these measures have been priorly performed but will not affect results

subject_list = ['SAX_OA_001'] #,'SAX_OA_006','SAX_OA_076', 'SAX_OA_096']

for subject in subject_list:
    warn.filterwarnings("ignore",category =RuntimeWarning)
    imgroot = main_root+'SUBJECTS/'+subject+'/standard/'
    temp_list = os.listdir(imgroot+'first_level_analyses')
    
    # identify number of tasks for this subject
    task_list = find_list(temp_list, "tstat1_", '_run-')  
    
    #identify number of runs for this task
    for task in task_list:
        runs = find_list(temp_list, "tstat1_"+task+'_run-', '.nii.gz') 
        iteration = 0
        
        # for each run in current task
        for run in runs:
            # check if it is a motion outlier:
            if subject+task+run in open(good_subjects).read():
                iteration = iteration + 1
                T_image = os.path.join(imgroot+'first_level_analyses/'+'tstat1_'+task+'_run-'+run+'.nii.gz')
                CON_image = os.path.join(imgroot+'first_level_analyses/'+'cope1_'+task+'_run-'+run+'.nii.gz')
                Z_image = os.path.join(imgroot+'first_level_analyses/zstat1_'+task+'_run-'+run+'.nii.gz')

                # load t-image, z-image, con-image
                t_image = nib.load(T_image)
                z_image = nib.load(Z_image)
                con_image = nib.load(CON_image)

                # convert images to numpy arrays
                t_data = np.array(t_image.dataobj)
                z_data = np.array(z_image.dataobj)
                CON_data = np.array(con_image.dataobj)

                # initialize lists
                mag_per_roi = [[]] * len(roi_index) # ave magnitude
                pos_per_roi = [[]] * len(roi_index) # ave position
                lat_lenient = [[]] * len(lat_index) # lat for low thresh
                count_lenient = [[]] * len(lat_index) # count of total voxels above low thresh
                lat_strict = [[]] * len(lat_index)  # lat for high thresh
                count_strict = [[]] * len(lat_index) # count of total voxels above high thresh

                count = -1

                for roi in roi_index:
                    count = count + 1
                    parcel_file = os.path.join(data_path, roiroot + roi+ '_FSL_space.nii.gz')
                    PARCEL = nib.load(parcel_file)
                    binary_data = np.array(PARCEL.dataobj) #this is the mask

                    # make all values in binary mask = 0 to NAN
                    binary_data[binary_data == 0] = 'nan'

                    # save as a flattened array
                    roi_con = CON_data*binary_data #just added this
                    MVPA_array = roi_con.flatten()
                    np.save(imgroot+'second_level_analyses/multivariate/MVPA_array', MVPA_array)

                    # multiply t-image by roi masks;
                    roi_hs = t_data*binary_data
                    roi_hs = roi_hs.astype('float')

                    # find top N t-values within this space to define individual's ROI
                    [values, indices] = nan_largestval(roi_hs, roi_size)

                    # initialize individual mask space
                    roi_mask = np.zeros(shape=roi_hs.shape)
                    binary_data[binary_data == 'nan'] = 0   # temporary, for saving mask

                    # create subject-specific roi mask (N voxels, e.g. 50)
                    for hh in range(roi_size):
                        roi_mask[indices[0][hh],indices[1][hh],indices[2][hh]] = 1

                    # save it as nifti image
                    roi_img = nib.Nifti1Image(roi_mask, PARCEL.affine, PARCEL.header)
                    fname = pjoin(imgroot+'second_level_analyses/MISC/individual_roi_masks/'+roi+'_indiv_roi_mask_run'+run)
                    nib.save(roi_img, fname)

                    # calculate average position for subject's run
                    X = int(np.mean(indices[0]))
                    Y = int(np.mean(indices[1]))
                    Z = int(np.mean(indices[2]))
                    location = [X, Y, Z]
                    pos_per_roi[count] = location      

                    #multiply roi_mask defined by highest t-vaues with con image to obtain magnitude of contrast
                    roi_mask[roi_mask == 0] = 'nan'
                    roi_contrast = CON_data*roi_mask
                    means = np.nanmean(roi_contrast)
                    mag_per_roi[count] = means # summary stat

                    # Save con values selected by the mask as a flat array
                    Topvoxels = roi_contrast.flatten()
                    Topvoxels = Topvoxels[~np.isnan(Topvoxels)]
                    np.save(imgroot+'second_level_analyses/magnitude/'+subject+task+run+'_Top'+str(roi_size)+'voxels_contrast_'+roi, Topvoxels)

                ############################# LATERALIZATION ###############################

                count = -1

                for roi in lat_index:
                    count = count + 1

                    combined_left = os.path.join(data_path, biroiroot + 'l'+roi+'_lateral_FSL_space.nii.gz')
                    combined_right = os.path.join(data_path, biroiroot + 'r'+roi+'_lateral_FSL_space.nii.gz') 

                    # load parcels
                    LEFT_PARCEL = nib.load(combined_left)
                    RIGHT_PARCEL = nib.load(combined_right)

                    # make into numpy arrays
                    left_binary_data = np.array(LEFT_PARCEL.dataobj)
                    right_binary_data = np.array(RIGHT_PARCEL.dataobj)

                    # make all values in binary mask = 0 to NAN (esp. needed for python 2)
                    left_binary_data[left_binary_data == 0] = 'nan'
                    right_binary_data[right_binary_data == 0] = 'nan'

                    p_values_1 = stats.norm.sf(abs(z_data))   #one-sided
                    p_values_2 = stats.norm.sf(abs(z_data))*2 #twosided (using this one)

                    #multiply p-values by roi masks;
                    left_roi_hs = (p_values_2)*left_binary_data
                    right_roi_hs = (p_values_2)*right_binary_data

                    #convert to float
                    left_roi_hs = left_roi_hs.astype('float')
                    right_roi_hs = right_roi_hs.astype('float')                  

                    thresh1 = 0.01
                    thresh2 = 0.001

                    ## thresh1 calculations
                    #count number of voxels on the left that are p < 0.01
                    [total_left1] = nan_abovethresh(left_roi_hs, thresh1)

                    #count number of voxels on the right that are p < 0.01
                    [total_right1] = nan_abovethresh(right_roi_hs, thresh1)

                    count_lenient[count] = total_left1 + total_right1

                    ## thresh2 calculations
                    #count number of voxels on the left that are p < 0.001
                    [total_left2] = nan_abovethresh(left_roi_hs, thresh2)

                    #count number of voxels on the right that are p < 0.001
                    [total_right2] = nan_abovethresh(right_roi_hs, thresh2)

                    count_strict[count] = total_left2 + total_right2

                    ## final calculations of lateralization
                    # -1 means right dominant and 1 means left dominant, 0 means bilaterial

                    # for p < 0.01
                    if float(total_left1 + total_right1) == 0:
                        lat_lenient[count] = 0.0
                    else:
                        Lat1 = float(total_left1 - total_right1) / float(total_left1 + total_right1)
                        lat_lenient[count] = Lat1

                    # for p < 0.001
                    if float(total_left2 + total_right2) == 0:
                        lat_strict[count] = 0.0
                    else:
                        Lat2 = float(total_left2 - total_right2) / float(total_left2 + total_right2)
                        lat_strict[count] = Lat2

                ################## Create the dataframes for all metrics ###############              

                pd.set_option('display.max_colwidth', -1)

                # dataframe for magnitude
                mag_init = mag_per_roi
                mag_init.extend(['Mean contrast magnitude of the top 50 voxels by t-value'])
                Mdataframe_init = ['MAG_' + s for s in roi_index]
                Mdataframe_init.extend(['Description'])
                Msub_measures = pd.DataFrame([mag_init],
                                           columns = Mdataframe_init)
                Msub_measures.rename(index={0: 'Magnitude'})

                # dataframe for position
                pos_new = pos_per_roi
                pos_new.extend(['XYZ average position of the top voxels of individual roi based on top t-values'])
                POS_measures = pd.DataFrame([pos_new],
                                columns = Mdataframe_init)
                POS_measures.rename(index={0: 'Position'})

                # dataframe for both lateralization indices
                lat_init = lat_lenient
                lat_init2 = lat_strict
                lat_init.extend(['Lateralization index based on voxel selection with p < 0.01 (left count - right count / left and right count)'])
                lat_init2.extend(['Lateralization index based on voxel selection with p < 0.001 (left count - right count / left and right count)'])
                Ldataframe_init = ['LAT_' + s for s in lat_index]
                Ldataframe_init.extend(['Description'])
                Lsub_measures = pd.DataFrame([lat_init],
                                               columns = Ldataframe_init)
                Lsub_measures2 = pd.DataFrame([lat_init2],
                                               columns = Ldataframe_init)
                Lsub_measures.rename(index={0: 'Laterality Index (p < 0.01)'})
                Lsub_measures2.rename(index={0: 'Laterality Index (p < 0.001)'})

                ## dataframe for both lateralization counts (total voxels above thresh)
                count_init = count_lenient
                count_init.extend(['Total count based on voxel selection with p < 0.01 (left + right)'])
                count_init2 = count_strict
                count_init2.extend(['Total count based on voxel selection with p < 0.001 (left + right)'])
                count_measures1 = pd.DataFrame([count_init],
                                               columns = Ldataframe_init)
                count_measures2 = pd.DataFrame([count_init2],
                                               columns = Ldataframe_init)

                # save (or append) all dataframes to csv file
                if iteration == 1:
                    Msub_measures.to_csv(imgroot+'second_level_analyses/magnitude/'+subject+'_'+task+'_MAG_SUM_STATS.csv', mode='w', index=False)
                    Lsub_measures.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_SUM_STATS.csv', mode='w', index=False)
                    Lsub_measures2.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_SUM_STATS.csv', mode='w', index=False)
                    count_measures1.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_COUNT_STATS.csv', mode='w', index=False)
                    count_measures2.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_COUNT_STATS.csv', mode='w', index=False)
                    POS_measures.to_csv(imgroot+'second_level_analyses/MISC/'+subject+'_'+task+'_POSITION_ROI_STATS.csv', mode = 'w', index=False)
                else:
                    Msub_measures.to_csv(imgroot+'second_level_analyses/magnitude/'+subject+'_'+task+'_MAG_SUM_STATS.csv', mode='a', index=False, header=False)
                    Lsub_measures.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_SUM_STATS.csv', mode='a', index=False, header=False)
                    Lsub_measures2.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_SUM_STATS.csv', mode='a', index=False, header=False)
                    count_measures1.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_COUNT_STATS.csv', mode='a', index=False, header=False)
                    count_measures2.to_csv(imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_COUNT_STATS.csv', mode='a', index=False, header=False)
                    POS_measures.to_csv(imgroot+'second_level_analyses/MISC/'+subject+'_'+task+'_POSITION_ROI_STATS.csv', mode='a', index=False, header=False)

                # log each completed run on console
                print(subject+'_'+task+run)
                
            else:
                print(subject+'_'+task+run+' excluded due to motion')
        #####################################################################

        # make a list of all the files (these files include each run)
        file_list = [imgroot+'second_level_analyses/magnitude/'+subject+'_'+task+'_MAG_SUM_STATS.csv', imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_SUM_STATS.csv',
                     imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_SUM_STATS.csv', imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT_COUNT_STATS.csv',
                    imgroot+'second_level_analyses/lateralization/'+subject+'_'+task+'_LAT2_COUNT_STATS.csv']

        # now average all metrics runs together, print exception for tasks with no valid runs
        for file in file_list:
            try:
                df = pd.read_csv(file)
                ex = pd.DataFrame(df.mean())
                ex = ex.transpose()
                ex.to_csv(file[:-4]+'_AVE_RUNS.csv', mode='w', index=False)
            except FileNotFoundError:
                pass




SAX_OA_001_tomloc002
SAX_OA_001_tomloc001


In [40]:
############# INTERREGIONAL CORRELATION AND TEMPORAL VARIANCE ###############

#subject_list = ['SAX_OA_006']

# define the experimental conditions for all experiments
if pilot == 1:
    faces = ['belief'] # dummy face for tomloc
    TR = 2
else:
    faces = [] # need a list of all analysis_data face conditions

# define N discrete points for temporal variance (same place relative to hrf across trials)
DISCRETE_POINTS = 4

for subject in subject_list:
    warn.filterwarnings("ignore",category =RuntimeWarning)
    imgroot = main_root+'SUBJECTS/'+subject+'/standard/'
    temp_list = os.listdir(imgroot+'first_level_analyses/BOLD_data')
    
    # identify number of tasks for this subject
    task_list = find_list(temp_list, "temporaldata_", '_run-')  
    
    #identify number of runs for this task
    for task in task_list:
        runs = find_list(temp_list, "temporaldata_"+task+'_run-', '.nii.gz') 
        iteration = 0

        # need to determine if block or event-related for analysis_data

        # need to figure out TR for analysis_data

        # for each run in current task
        for run in runs:

            Ztrans_values = []

            # check if it is a motion outlier:
            if subject+task+run in open(good_subjects).read():
                iteration = iteration + 1
                ######################### TEMPORAL VARIANCE ######################
                # read in design file
                fpath = imgroot+'first_level_analyses/BOLD_data/design_'+task+'_run-'+run+'.tsv'
                designfile = pd.read_csv(fpath, sep='\t')

                # these lists will separate each face condition within a run
                onset_list = [[]*len(faces)]
                shifted_onsets = [[]*len(faces)]
                duration_list = [[]*len(faces)]
                count = -1

                for element in faces:
                    # create list specific to each face condition within a trial
                    count = count + 1
                    onset_list[count] = designfile.loc[designfile[designfile['trial_type'] == element].index.tolist(),'onset']
                    onset_list[count] = [ np.round(x) for x in onset_list[count]] 
                    duration_list[count] = designfile.loc[designfile[designfile['trial_type'] == element].index.tolist(),'duration']
                    # the TR index
                    shifted_onsets[count] = (designfile.loc[designfile[designfile['trial_type'] == element].index.tolist(),'onset'] + TR) / TR
                    shifted_onsets[count] = [ np.round(x) for x in shifted_onsets[count]] # offset rounded.
                    shifted_onsets[count] = [ int(x) for x in shifted_onsets[count]]
                    # flatten all lists (treating all face conditions as one condition)
                    onset_list = [item for sublist in onset_list for item in sublist]
                    duration_list = [item for sublist in duration_list for item in sublist]
                    shifted_onsets = [item for sublist in shifted_onsets for item in sublist]

                    n_items = len(shifted_onsets) # N face conditions in this run

                final_exp_points = [[]]*n_items # initialize list for N arrays

                # this array will contain subarrays (temporal indices)
                for element in range(0, n_items):
                    temp =[]
                    temp = intervals(shifted_onsets[element], DISCRETE_POINTS)
                    final_exp_points[element]= temp

                # load in 4D functional data of current run
                Time_image = os.path.join(imgroot+'first_level_analyses/BOLD_data/temporaldata_'+task+'_run-'+run+'.nii.gz')
                time_image = nib.load(Time_image)
                time_data = np.array(time_image.dataobj)

                # initialize arrays related to roi
                roicount = -1
                temporal_arrays = [[]] * len(roi_index) # will contain mean temporal array of roi
                TEMPORAL_VAR = [] # variable of interest (final temporal variance for this run)

                for roi in roi_index:
                    roicount = roicount + 1
                    test = []
                    # initializing
                    final_exp_values = [[]]*len(final_exp_points)
                    std_exp_values = [[]]*len(final_exp_points)
                    std_mean_values = [[]]*len(final_exp_points)

                    # load in individual roi mask
                    roi_file = os.path.join(data_path, imgroot+'second_level_analyses/MISC/individual_roi_masks/'+roi+'_indiv_roi_mask_run'+run+'.nii')
                    ROI = nib.load(roi_file)
                    binary_data = np.array(ROI.dataobj)

                    # use fsl to extract mean signal in roi: 
                    meants = fsl.ImageMeants(in_file=imgroot+'first_level_analyses/BOLD_data/temporaldata_'+task+'_run-'+run+'.nii.gz', 
                                             mask=imgroot+'second_level_analyses/MISC/individual_roi_masks/'+roi+'_indiv_roi_mask_run'+run+'.nii',
                                             out_file=imgroot+'second_level_analyses/MISC/mean_roi_Temporal_Signal/Mean_temporal_signal_'+roi+'_'+task+'_run-'+run+'.txt')

                    meants.cmdline
                    meants.run()

                    # save the mean signal as a list for each roi
                    temporal_arrays[roicount] = txt2list(imgroot+'second_level_analyses/MISC/mean_roi_Temporal_Signal/Mean_temporal_signal_'+roi+'_'+task+'_run-'+run+'.txt', test)

                    # identifying values for temporal var computation
                    for item in range(0, len(final_exp_points)):
                        templist = []
                        for points in range (0, DISCRETE_POINTS):
                            templist.append(temporal_arrays[roicount][(final_exp_points[item][points])-1]) # accounting for 0 index
                        final_exp_values[item] = templist         # BOLD values of the temporal indices
                        std_exp_values[item] = np.std(templist)   # std of each trial
                        std_mean_values[item] = np.mean(std_exp_values[item]) # mean of all stds

                    # save variance per roi
                    TEMPORAL_VAR.append(std_mean_values)

                ########################## INTERREGIONAL CORRELATION #########################################################
                # make a correlation matrix for all rois' mean signal arrays
                COR_MATRIX = np.corrcoef(temporal_arrays)                   

                # save this per subject per run
                np.save(imgroot+'second_level_analyses/interregional_cor/InterregionCor_R_'+task+'_run-'+run,COR_MATRIX)

                # need to z-transform the R correlations 
                ZCorrMatrix = pearson2fisher(COR_MATRIX)
                np.save(imgroot+'second_level_analyses/interregional_cor/InterregionCor_Z_'+task+'_run-'+run,ZCorrMatrix)  

                # use a mask to convert diagonal elements to nans
                mask = np.ones(ZCorrMatrix.shape, dtype=bool)
                np.fill_diagonal(ZCorrMatrix, 'nan')

                # calculate mean ignoring nans
                Ztrans_values.append(ZCorrMatrix[~np.isnan(ZCorrMatrix)].mean())

                pd.set_option('display.max_colwidth', -1)

                ############ CREATING AND SAVING DATAFRAMES #############

                # Create dataframe for temporal variance 
                dataframe_meas = [Ztrans_values, ['Z-transformed score of the mean pearsons R of correlation matrix relating each ROIs activation (excludes diagonal)']]
                dataframe_MEAS = np.concatenate(dataframe_meas).ravel()
                dataframe_COL = ['INTERREGION_COR', 'Description']
                SUB_measures = pd.DataFrame([dataframe_MEAS],
                                         columns=dataframe_COL)
                SUB_measures.rename(index={0: 'Correlation_Z_transf'})

                # Create dataframe for temporal variance 
                tempv_0 = [item[0] for item in TEMPORAL_VAR]
                tempv_0.extend(['temporal variance within subject, within roi, for '+faces[0]+' condition'])
                dataframe_meas1 = [tempv_0]
                dataframe_MEA1 = np.concatenate(dataframe_meas1).ravel()
                dataframe_col2 = ['TEMPVAR_FACES_' + s for s in roi_index]
                dataframe_col2.extend(['Description'])
                sub_measures2 = pd.DataFrame([dataframe_MEA1],
                        columns=dataframe_col2)

                # save dataframes as csv files
                if iteration == 1:
                    SUB_measures.to_csv(imgroot+'second_level_analyses/interregional_cor/'+subject+'_'+task+'_INTERREG_SUM_STATS.csv', mode= 'w', index=False)
                    sub_measures2.to_csv(imgroot+'second_level_analyses/temporal_variance/'+subject+'_'+task+'_TEMPNOISE_SUM_STATS.csv', mode= 'w', index=False)
                else:
                    SUB_measures.to_csv(imgroot+'second_level_analyses/interregional_cor/'+subject+'_'+task+'_INTERREG_SUM_STATS.csv', mode= 'a', index=False, header=False)
                    sub_measures2.to_csv(imgroot+'second_level_analyses/temporal_variance/'+subject+'_'+task+'_TEMPNOISE_SUM_STATS.csv', mode= 'a', index=False, header=False)
                
            # if run is never found in inclusion list, will not run analysis
            else:
                print(subject+task+run+' is a motion outlier')

        file_list = [imgroot+'second_level_analyses/interregional_cor/'+subject+'_'+task+'_INTERREG_SUM_STATS.csv',
                imgroot+'second_level_analyses/temporal_variance/'+subject+'_'+task+'_TEMPNOISE_SUM_STATS.csv'] 

        for file in file_list:
            # now average all runs together
            try:
                df = pd.read_csv(file)
                ex = pd.DataFrame(df.mean())
                ex = ex.transpose()
                ex.to_csv(file[:-4]+'_final.csv', mode='w', index=False)
            except FileNotFoundError:
                pass
        

SAX_OA_006tomloc001 is a motion outlier


FileNotFoundError: File b'/om/user/rezzo/OpenAutism/pilot_data/SUBJECTS/SAX_OA_098/standard/first_level_analyses/BOLD_data/design_tomloc_run-002.tsv' does not exist

In [78]:
# now put all data in one big matrix (subject--task)
count = -1

tasks = ['tomloc'] # just for tomloc
count = 0
counter = 0

for subject in subject_list:
    
    # add task
    for task in tasks:
    
        dfheader = []
        counter = counter + 1
        imgroot = main_root+'SUBJECTS/'+subject+'/standard/second_level_analyses/'
        metric_folders = ['magnitude' ,'lateralization','interregional_cor','temporal_variance']
        df0 = pd.DataFrame([[subject, task]],
                                  columns=['SAX_ID', 'TASK'])
        dflist = [df0]

        for metric in metric_folders:
            file = ''
            if metric is 'magnitude':
                file = [subject+'_'+task+'_'+'MAG_SUM_STATS_AVE_RUNS.csv']
            elif metric is 'lateralization':
                file = [subject+'_'+task+'_'+'LAT_SUM_STATS_AVE_RUNS.csv']
            elif metric is 'interregional_cor':
                file = [subject+'_'+task+'_'+'INTERREG_SUM_STATS_final.csv']
            elif metric is 'temporal_variance':
                file = [subject+'_'+task+'_'+'TEMPNOISE_SUM_STATS_final.csv']

            for ext in range(0,len(file)):
                newimgroot = imgroot + metric + '/' + file[ext]

                try:
                    df = pd.read_csv(newimgroot)
                    dflist.append(df[:1])  # add the last row to the list
                    #print(df[:1])
                    if count <= 1:
                        dfheader.append(df.columns.values.tolist())
                except FileNotFoundError:
                    print("No Valid Runs for "+subject+'_'+task)   
        # concatenate list of rows (all dataframes) for one subject
        df_concat = pd.concat(dflist, axis=1)
        count = count + 1


        if counter == 1:
            header = [item for sublist in dfheader for item in sublist]
            add2header = ['SAX_OA_ID','TASK']
            header = add2header+header
            df_concat.to_csv(main_root+'/SUMMARY_STATS/ALLSUBJECTS_MATRIX.csv', index=False, mode='w', header=header)
        else:
            df_concat.to_csv(main_root+'/SUMMARY_STATS/ALLSUBJECTS_MATRIX.csv', index=False, mode='a', header=None)



No Valid Runs for SAX_OA_098_tomloc
No Valid Runs for SAX_OA_098_tomloc
No Valid Runs for SAX_OA_009_tomloc
No Valid Runs for SAX_OA_009_tomloc
No Valid Runs for SAX_OA_005_tomloc
No Valid Runs for SAX_OA_005_tomloc
No Valid Runs for SAX_OA_032_tomloc
No Valid Runs for SAX_OA_032_tomloc
No Valid Runs for SAX_OA_012_tomloc
No Valid Runs for SAX_OA_012_tomloc
No Valid Runs for SAX_OA_004_tomloc
No Valid Runs for SAX_OA_004_tomloc
No Valid Runs for SAX_OA_081_tomloc
No Valid Runs for SAX_OA_081_tomloc
No Valid Runs for SAX_OA_049_tomloc
No Valid Runs for SAX_OA_049_tomloc
No Valid Runs for SAX_OA_016_tomloc
No Valid Runs for SAX_OA_016_tomloc
No Valid Runs for SAX_OA_101_tomloc
No Valid Runs for SAX_OA_101_tomloc
No Valid Runs for SAX_OA_096_tomloc
No Valid Runs for SAX_OA_096_tomloc
No Valid Runs for SAX_OA_096_tomloc
No Valid Runs for SAX_OA_096_tomloc
No Valid Runs for SAX_OA_108_tomloc
No Valid Runs for SAX_OA_108_tomloc
No Valid Runs for SAX_OA_083_tomloc
No Valid Runs for SAX_OA_083