## Script for the RSA analysis of Morbi EEG project

In [None]:
%matplotlib inline
import os
import mne
import multiprocessing
from tqdm import tqdm
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.stats import rankdata,pearsonr
from neurora.stuff import permutation_corr
from mne.viz import plot_topomap
from mne.stats import fdr_correction
from scipy.stats import ttest_1samp, ttest_ind, ttest_rel, f_oneway
from mne.stats import fdr_correction, f_mway_rm, permutation_cluster_test
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
import seaborn as sns
from jupyterthemes import jtplot
jtplot.style(theme='grade3') 
from Function import get_tril_vec, spearmanr, permutation_cor


# define data repository
behav_path = 'E:/Bilingual_Morphology_Project/Data/Behav/'
eeg_path = 'E:/Bilingual_Morphology_Project/Data/EEG_prep'
vec_path = 'E:/Bilingual_Morphology_Project/Data/word_vec/'
results_path = 'E:/Bilingual_Morphology_Project/Results/'
plot_path = 'E:/Bilingual_Morphology_Project/Results/Plot'

sub_list = list(range(5,35))
mark_list = list(range(2,10))

### Section 1: Representational similarity matrix of Word vector

Load the word vectors as numpy arrays

In [None]:
# Load the chinese words vectors (200 dimesnions)
vec_cpp1 = pd.read_pickle(os.path.join(vec_path,'CH/vec_cpp1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_cpp2 = pd.read_pickle(os.path.join(vec_path,'CH/vec_cpp2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_cpn1 = pd.read_pickle(os.path.join(vec_path,'CH/vec_cpn1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_cpn2 = pd.read_pickle(os.path.join(vec_path,'CH/vec_cpn2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ccp1 = pd.read_pickle(os.path.join(vec_path,'CH/vec_ccp1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ccp2 = pd.read_pickle(os.path.join(vec_path,'CH/vec_ccp2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ccn1 = pd.read_pickle(os.path.join(vec_path,'CH/vec_ccn1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ccn2 = pd.read_pickle(os.path.join(vec_path,'CH/vec_ccn2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)

# load the english words vectors (300 dimensions)
vec_epp1 = pd.read_pickle(os.path.join(vec_path,'EN/vec_epp1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_epp2 = pd.read_pickle(os.path.join(vec_path,'EN/vec_epp2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_epn1 = pd.read_pickle(os.path.join(vec_path,'EN/vec_epn1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_epn2 = pd.read_pickle(os.path.join(vec_path,'EN/vec_epn2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ecp1 = pd.read_pickle(os.path.join(vec_path,'EN/vec_ecp1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ecp2 = pd.read_pickle(os.path.join(vec_path,'EN/vec_ecp2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ecn1 = pd.read_pickle(os.path.join(vec_path,'EN/vec_ecn1.pkl')).iloc[:,1:].to_numpy().astype(np.float64)
vec_ecn2 = pd.read_pickle(os.path.join(vec_path,'EN/vec_ecn2.pkl')).iloc[:,1:].to_numpy().astype(np.float64)

Calculate the distance of word vectors between primer and target

In [None]:
## Calculate the pearson r as distance
vec_r_cpp = []
for i in range(vec_cpp1.shape[0]):
    vec1,vec2 = vec_cpp1[i,:],vec_cpp2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_cpp.append(r)
vec_r_cpp = np.array(vec_r_cpp)

vec_r_cpn = []
for i in range(vec_cpn1.shape[0]):
    vec1,vec2 = vec_cpn1[i,:],vec_cpn2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_cpn.append(r)
vec_r_cpn = np.array(vec_r_cpn)
vec_r_cpn = np.append(vec_r_cpn, [[vec_r_cpn.mean(),vec_r_cpn.mean()]])

vec_r_ccp = []
for i in range(vec_ccp1.shape[0]):
    vec1,vec2 = vec_ccp1[i,:],vec_ccp2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_ccp.append(r)
vec_r_ccp = np.array(vec_r_ccp)

vec_r_ccn = []
for i in range(vec_ccn1.shape[0]):
    vec1,vec2 = vec_ccn1[i,:],vec_ccn2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_ccn.append(r)
vec_r_ccn = np.array(vec_r_ccn)

vec_r_epp = []
for i in range(vec_epp1.shape[0]):
    vec1,vec2 = vec_epp1[i,:],vec_epp2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_epp.append(r)
vec_r_epp = np.array(vec_r_epp)

vec_r_epn = []
for i in range(vec_epn1.shape[0]):
    vec1,vec2 = vec_epn1[i,:],vec_epn2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_epn.append(r)
vec_r_epn = np.array(vec_r_epn)
vec_r_epn = np.append(vec_r_epn, [[vec_r_epn.mean()]])

vec_r_ecp = []
for i in range(vec_ecp1.shape[0]):
    vec1,vec2 = vec_ecp1[i,:],vec_ecp2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_ecp.append(r)
vec_r_ecp = np.array(vec_r_ecp)

vec_r_ecn = []
for i in range(vec_ecn1.shape[0]):
    vec1,vec2 = vec_ecn1[i,:],vec_ecn2[i,:]
    r = 1 - np.linalg.norm(vec2-vec1)
    vec_r_ecn.append(r)
vec_r_ecn = np.array(vec_r_ecn)

In [None]:
# n_con = 8
# vec = np.array([vec_r_cpp.mean(),vec_r_cpn.mean(),vec_r_ccp.mean(),vec_r_ccn.mean(),\
#         vec_r_epp.mean(),vec_r_epn.mean(),vec_r_ecp.mean(),vec_r_ecn.mean()])
# vec_rank = rankdata(vec) # explicity convert the raw scores to ranks
# nn_vec = np.zeros((n_con, n_con))

# for i in range(n_con):
#     for j in range(n_con):
#         if i < j:
#             dist_ij = 1-(abs(vec_rank[i]-vec_rank[j])/n_con) 
#             nn_vec[i,j] = dist_ij
#             nn_vec[j,i] = dist_ij
             
# np.fill_diagonal(nn_vec, 1)

# # mask =np.zeros_like(nn_vec)
# # mask[np.triu_indices_from(mask)] = True
# plt.figure(figsize=(20,10))
# sns.heatmap(nn_vec, square=True, cmap='RdBu_r', linewidths=0.1,  xticklabels=False, yticklabels=False, cbar_kws={'label': 'similarity', "shrink": 0.8})
# plt.title('', fontsize=25, fontweight='bold')

# wordvec_rsm_nn = get_tril_vec(nn_vec)

In [None]:
n_con = 8
vec = np.array([vec_r_cpp.mean(),vec_r_cpn.mean(),vec_r_ccp.mean(),vec_r_ccn.mean(),\
        vec_r_epp.mean(),vec_r_epn.mean(),vec_r_ecp.mean(),vec_r_ecn.mean()])
vec_rank = rankdata(vec) # explicity convert the raw scores to ranks
annk_vec = np.zeros((n_con, n_con))

for i in range(n_con):
    for j in range(n_con):
        if i < j:
            sim_ij = np.mean([vec_rank[i], vec_rank[j]])/n_con
            annk_vec[i,j] = sim_ij
            annk_vec[j,i] = sim_ij
        elif i==j:
            annk_vec[i,j] = 1
             
np.fill_diagonal(annk_vec, 1)
wordvec_rsm_annk = get_tril_vec(annk_vec)

annk_vec = pd.DataFrame(annk_vec)
annk_vec.columns = ['Chinese priming people-rlated','Chinese priming people unrelated','Chinese control people related','Chinese control people unrelated','English priming people-rlated','English priming people unrlated','English control people-rlated','English control people unrlated']
annk_vec.index = ['Chinese priming people-rlated','Chinese priming people unrelated','Chinese control people related','Chinese control people unrelated','English priming people-rlated','English priming people unrlated','English control people-rlated','English control people unrlated']
mask =np.zeros_like(annk_vec)
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize=(20,10))
sns.heatmap(annk_vec, square=True, cmap='RdBu_r', linewidths=0.1,xticklabels=False, yticklabels=True, cbar_kws={'label': 'similarity', "shrink": 0.8})

In [None]:
# wordvec = np.stack([vec_r_cpp, vec_r_cpn, vec_r_ccp, vec_r_ccn, vec_r_epp, vec_r_epn, vec_r_ecp, vec_r_ecn], axis=1)
# wordvec_rsm = 1-pdist(wordvec.T, metric='correlation')
# wordvec_rsm_square = squareform(wordvec_rsm)

# # mask =np.zeros_like(wordvec_rsm_square)
# # mask[np.triu_indices_from(mask)] = True
# plt.figure(figsize=(20,10))
# sns.heatmap(wordvec_rsm_square, square=True, cmap='RdBu_r', linewidths=0.1,  xticklabels=False, yticklabels=False, cbar_kws={'label': 'similarity', "shrink": 0.8})
# plt.title('RSM of word vectors distance (all pairs)', fontsize=25, fontweight='bold')

### Section 2: Representational similarity matrix of ERP scalp distribution

In [None]:
demo = mne.read_epochs_eeglab('E:/Bilingual_Morphology_Project/Data/EEG_prep/S31_09.set')
montage_file = 'E:/Bilingual_Morphology_Project/Scripts/morbi.loc'
montage = mne.channels.read_custom_montage(montage_file)
demo.set_montage(montage)

eeg_meta = {}
for sub in sub_list:
    conditions = {}
    for mark in mark_list:
        tp = mne.read_epochs_eeglab(os.path.join(eeg_path, 'S' + str(sub) + '_0'+ str(mark) + '.set'))
        tp.set_montage(montage)
        conditions[mark] = tp
    eeg_meta[sub] = conditions

In [None]:
# Channel indexing
# Obtain the channel names as a list
ch_names = eeg_meta[5][2].ch_names
print(ch_names)
# Crate a index list for channels
ch_idx = list(range(31))
# Combine the channels and index and convert to a dict
ch_num = dict(zip(ch_names, ch_idx))
print(ch_num['PO9'])

In [None]:
cpp_meta, ccp_meta, epp_meta, ecp_meta, cpn_meta, ccn_meta, epn_meta, ecn_meta = {}, {}, {}, {}, {}, {}, {}, {}
for sub in sub_list:
    epoch = eeg_meta[sub]
    # Convert data to numpy array
    cpp = epoch[2].get_data()*10**6  # Chinese priming condition
    cpn = epoch[3].get_data()*10**6
    ccp = epoch[4].get_data()*10**6  # Chinese control condition
    ccn = epoch[5].get_data()*10**6 
    epp = epoch[6].get_data()*10**6  # English priming condition  
    epn = epoch[7].get_data()*10**6
    ecp = epoch[9].get_data()*10**6  # English control condition
    ecn = epoch[8].get_data()*10**6
    
    cpp_meta[sub], ccp_meta[sub], epp_meta[sub], ecp_meta[sub], \
                    cpn_meta[sub], ccn_meta[sub], epn_meta[sub], ecn_meta[sub] = cpp, cpn, ccp, ccn, epp, epn, ecp, ecn


# ERP data structure: [n_channels, n_sub, n_times]
cpp_erp, cpn_erp, ccp_erp, ccn_erp, epp_erp, epn_erp, ecp_erp, ecn_erp \
            = np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500]), \
            np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500])
# Loop across all channels and subjects
for ch in ch_idx:
    for sub in sub_list:
        cpp_erp[ch,sub-5,:] = np.average(cpp_meta[sub][:,ch,:], axis=0)
        cpn_erp[ch,sub-5,:] = np.average(cpn_meta[sub][:,ch,:], axis=0)
        ccp_erp[ch,sub-5,:] = np.average(ccp_meta[sub][:,ch,:], axis=0)
        ccn_erp[ch,sub-5,:] = np.average(ccn_meta[sub][:,ch,:], axis=0)
        epp_erp[ch,sub-5,:] = np.average(epp_meta[sub][:,ch,:], axis=0)
        epn_erp[ch,sub-5,:] = np.average(epn_meta[sub][:,ch,:], axis=0)
        ecp_erp[ch,sub-5,:] = np.average(ecp_meta[sub][:,ch,:], axis=0)
        ecn_erp[ch,sub-5,:] = np.average(ecn_meta[sub][:,ch,:], axis=0)

In [None]:
neural_rsm = np.zeros((28,30,500)) # n-pair of similarity * n-timepoints * n-subjects
for sub in sub_list:
    for i in range(500):
        cpp, cpn, ccp, ccn, epp, epn, ecp, ecn = cpp_erp[:,sub-5,i], cpn_erp[:,sub-5,i], ccp_erp[:,sub-5,i], ccn_erp[:,sub-5,i], \
                epp_erp[:,sub-5,i], epn_erp[:,sub-5,i], ecp_erp[:,sub-5,i], ecn_erp[:,sub-5,i]
        rep = np.stack([cpp,cpn,ccp,ccn,epp,epn,ecp,ecn],axis=1)
        simi = 1-pdist(rep.T, metric='correlation')
        neural_rsm[:,sub-5,i] = simi

In [None]:
eeg_rsm = squareform(np.average(neural_rsm.mean(axis=1),axis=1))
np.fill_diagonal(eeg_rsm, 1)

eeg_rsm = pd.DataFrame(eeg_rsm)
eeg_rsm.columns = ['Chinese priming people-rlated','Chinese priming people unrelated','Chinese control people related','Chinese control people unrelated','English priming people-rlated','English priming people unrlated','English control people-rlated','English control people unrlated']
eeg_rsm.index = ['Chinese priming people-rlated','Chinese priming people unrelated','Chinese control people related','Chinese control people unrelated','English priming people-rlated','English priming people unrlated','English control people-rlated','English control people unrlated']
plt.figure(figsize=(20,10))
sns.heatmap(eeg_rsm, square=True, cmap='RdBu_r', vmax=0.65,vmin=0.5,linewidths=0.1,xticklabels=False, yticklabels=True, cbar_kws={'label': 'similarity', "shrink": 0.8})

### Sectio 3: Representational similarity analysis of wordvec & ERP

Annk method

Define RSA functions

In [None]:
def neuralvec_rsa(i,sub,neural_rsm,wordvec_rsm):
    r = spearmanr(neural_rsm[:,sub,i],wordvec_rsm)
    p = permutation_corr(neural_rsm[:,sub,i],wordvec_rsm, method='spearman', iter=10000)
    
    return [r,p]

In [None]:
def neuralvec_rsa_mean(i,neural_rsm,wordvec_rsm):
    r = spearmanr(neural_rsm[:,i],wordvec_rsm)
    p = permutation_corr(neural_rsm[:,i],wordvec_rsm, method='spearman', iter=10000)
    return [r,p]

Grand average across all subjects

In [None]:
mean_sub = Parallel(n_jobs=8)(delayed(neuralvec_rsa_mean)(i,neural_rsm.mean(axis=1),wordvec_rsm_annk) for i in range(500))

mean_r,mean_p = [],[]
for i in range(500):
    mean_r.append(mean_sub[i][0])
    mean_p.append(mean_sub[i][1])

In [None]:
## Plot the RSA r value
times = np.arange(-200, 800, 2)
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(-1,1)
plt.grid()

# Plot the r value
plt.plot(times, np.array(mean_r), alpha=0.9,lw=3)
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman rho', fontdict={'family':'Arial', 'weight':'bold','size':25})
#plt.title(str(title),fontdict={'family':'Arial', 'weight':'bold','size':20})
plt.savefig(os.path.join(plot_path,'rsa.png'),bbox_inches='tight',dpi=600,pad_inches=0.1)
plt.show()

In [None]:
## Plot the RSA r value
times = np.arange(-200, 800, 2)
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(0,0.05)
plt.grid()

# Plot the r value
plt.plot(times, np.array(mean_p), alpha=0.9,lw=3)
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman rho', fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.show()

Conduct RSA on each subjects

In [None]:
rsa_r_meta, rsa_p_meta = np.zeros((500,30)), np.zeros((500,30))

for sub in tqdm(sub_list):
    sing_sub = Parallel(n_jobs=8)(delayed(neuralvec_rsa)(i,sub-5,neural_rsm,wordvec_rsm_annk) for i in range(500))

    rsa_r,rsa_p = [],[]
    for i in range(500):
        rsa_r.append(sing_sub[i][0])
        rsa_p.append(sing_sub[i][1])
    
    rsa_r_meta[:,sub-5], rsa_p_meta[:,sub-5] = np.array(rsa_r), np.array(rsa_p)

In [None]:
np.save(os.path.join(results_path,'rsa_r_allsub.npy'),rsa_r_meta)
np.save(os.path.join(results_path,'rsa_p_allsub.npy'),rsa_p_meta)