### This code takes the parcellated outputs from the GLM and computes the pattern distance between episodes (reevaluated and control) between Listens (per individual)


In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import tqdm


In [2]:
def calculate_correlation(A, B):
    ## Create a mask where both A and B are not NaN
    mask = ~np.isnan(A) & ~np.isnan(B)

    A_masked = A[mask]
    B_masked = B[mask]
    
    correlation, _ = pearsonr(A_masked, B_masked)
    
    return correlation

In [5]:
sub_nums = list(range(1, 38))
idx = sub_nums.index(18) #there is no data for subject 18!
sub_nums.pop(idx)
sub_nums = [f'sub-{val:02}' for val in sub_nums]  

data_dir = 'glm_out'
data_dir_local = '../../../darkend/data/'

In [7]:
### Event length:

# #reorganized by how they were put into the AFNI Timing GLM script 
onsets = [22, 70, 144, 171, 235, 261, 303, 366, 420, 470]
offsets = [30, 78, 153, 180, 246, 272, 321, 384, 447, 497]

label = ['reappraisal','control','control','reappraisal','control', 'reappraisal', 'reappraisal', 'control', 'control', 'reappraisal']
name = ['conversation_bf', 'welcome_store_control', 'store_knowledge_control', 'calls_robot', 'dress_against_policy_control', 'steve_skinny', 'emergency_song', 'excuse_me_control', 'lucy_nice_control', 'resolution']
event_num = range(1,11)


df = pd.DataFrame()
df['onsets'] = onsets
df['offsets'] = offsets
df['len'] = df['offsets']- df['onsets']
df['label'] = label 
df['name'] = name
df['event_num'] = event_num

df

Unnamed: 0,onsets,offsets,len,label,name,event_num
0,22,30,8,reappraisal,conversation_bf,1
1,70,78,8,control,welcome_store_control,2
2,144,153,9,control,store_knowledge_control,3
3,171,180,9,reappraisal,calls_robot,4
4,235,246,11,control,dress_against_policy_control,5
5,261,272,11,reappraisal,steve_skinny,6
6,303,321,18,reappraisal,emergency_song,7
7,366,384,18,control,excuse_me_control,8
8,420,447,27,control,lucy_nice_control,9
9,470,497,27,reappraisal,resolution,10


#### Computing the r-value in each node for episode type (reevaluated versus control) that will then be put into LME model

In [10]:
sub_list = []
node_list = []
episode_list = []
r_list = []
episode_label = []


for sub in tqdm.tqdm(sub_nums):


    listen = 'L1'
    data_L1 = np.load(data_dir_local + '_schaeffer_rois_reevaluated_null/' + f'{sub}_7N_reevaluated_periods_{listen}_subcortical_included.npy',allow_pickle=True).item()

    listen = 'L2'
    data_L2 = np.load(data_dir_local + '_schaeffer_rois_reevaluated_null/' + f'{sub}_7N_reevaluated_periods_{listen}_subcortical_included.npy',allow_pickle=True).item()

    for node in list(data_L1.keys()):
        
        for episode in df['event_num']:
            
            sub_list.append(sub)
            node_list.append(node)
            episode_list.append(episode)
            episode_label.append(list(df[df['event_num']==episode]['label'])[0])

            r_list.append(calculate_correlation(data_L1[node][episode],data_L2[node][episode]))
              
        
r_vals_for_GLM = pd.DataFrame()
r_vals_for_GLM['sub'] = sub_list
r_vals_for_GLM['node'] = node_list
r_vals_for_GLM['episode'] = episode_list
r_vals_for_GLM['label'] = episode_label
r_vals_for_GLM['r'] = r_list

r_vals_for_GLM
    
    
    
    
    

100%|██████████| 36/36 [03:09<00:00,  5.27s/it]


Unnamed: 0,sub,node,event,label,r
0,sub-01,1,1,reappraisal,-0.107656
1,sub-01,1,2,control,-0.020468
2,sub-01,1,3,control,0.122438
3,sub-01,1,4,reappraisal,0.123952
4,sub-01,1,5,control,-0.099088
...,...,...,...,...,...
36715,sub-37,hippocampus_data_R,6,reappraisal,-0.109089
36716,sub-37,hippocampus_data_R,7,reappraisal,-0.126241
36717,sub-37,hippocampus_data_R,8,control,0.314625
36718,sub-37,hippocampus_data_R,9,control,0.097409


In [11]:
r_vals_for_GLM.to_csv('2.2._rvals_per_reevaluated_episode_include_subcortical.csv')
