## Script for the IS-RSA analysis for Morbi EEG project

In [None]:
%matplotlib inline
import os
import mne
from tqdm import tqdm
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from joblib import Parallel, delayed
from neurora.stuff import permutation_corr
from mne.viz import plot_topomap
from mne.stats import fdr_correction
from scipy.stats import ttest_1samp, ttest_ind, ttest_rel, f_oneway
from mne.stats import fdr_correction, f_mway_rm, permutation_cluster_test
from scipy.spatial.distance import pdist,squareform
import matplotlib.pyplot as plt
import seaborn as sns
from jupyterthemes import jtplot
jtplot.style(theme='grade3') 
from Function import get_tril_vec, spearmanr, permutation_cor


# define data repository
behav_path = 'E:/Bilingual_Morphology_Project/Data/Behav/'
eeg_path = 'E:/Bilingual_Morphology_Project/Data/EEG_prep'
results_path = 'E:/Bilingual_Morphology_Project/Results/'
plot_path = 'E:/Bilingual_Morphology_Project/Results/Plot'
sub_list = list(range(5,35))
mark_list = list(range(2,10))

### Section 1: Baehav(LexTALE score) intersubejct similarity

In [None]:
n_subs = 30
score = pd.read_csv(os.path.join(behav_path,'participant_profile.csv'))
lex = score['LexTALE']
lex_rank = rankdata(lex) # explicity convert the raw scores to ranks
annk_lex = np.zeros((n_subs, n_subs))

for i in range(n_subs):
    for j in range(n_subs):
        if i < j:
            sim_ij = np.mean([lex_rank[i], lex_rank[j]])/n_subs
            annk_lex[i,j] = sim_ij
            annk_lex[j,i] = sim_ij
        elif i==j:
            annk_lex[i,j] = 1
np.fill_diagonal(annk_lex, 1) 
# Plot the isc matrix
mask =np.zeros_like(annk_lex)
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize=(20,10))
sns.heatmap(annk_lex, square=True, cmap='RdBu_r', mask=mask, linewidths=0.1,  xticklabels=False, yticklabels=False, cbar_kws={'label': 'similarity', "shrink": 0.8})
plt.title('LexTALE Score Intersubject Similarity (AnnK method)', fontsize=25, fontweight='bold')


In [None]:
n_subs = 30
score = pd.read_csv(os.path.join(behav_path,'participant_profile.csv'))
lex = score['LexTALE']
lex_rank = rankdata(lex) # explicity convert the raw scores to ranks
nn_lex = np.zeros((n_subs, n_subs))

for i in range(n_subs):
    for j in range(n_subs):
        if i < j:
            dist_ij = 1-(abs(lex_rank[i]-lex_rank[j])/n_subs) 
            nn_lex[i,j] = dist_ij
            nn_lex[j,i] = dist_ij
            
np.fill_diagonal(nn_lex, 1)    

mask =np.zeros_like(nn_lex)
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize=(20,10))
sns.heatmap(nn_lex, square=True, cmap='RdBu_r', mask=mask, linewidths=0.1,  xticklabels=False, yticklabels=False, cbar_kws={'label': 'similarity', "shrink": 0.8})
#plt.title('LexTALE Score Intersubject Similarity (NN method)', fontsize=25, fontweight='bold')
#plt.savefig(os.path.join(plot_path,'lex_isc.png'),bbox_inches='tight',dpi=600,pad_inches=0.1)

### Section 2: EEG intersubejct simialrity

In [None]:
demo = mne.read_epochs_eeglab('E:/Bilingual_Morphology_Project/Data/EEG_prep/S31_09.set')
montage_file = 'E:/Bilingual_Morphology_Project/Scripts/morbi.loc'
montage = mne.channels.read_custom_montage(montage_file)
demo.set_montage(montage)

eeg_meta = {}
for sub in sub_list:
    conditions = {}
    for mark in mark_list:
        tp = mne.read_epochs_eeglab(os.path.join(eeg_path, 'S' + str(sub) + '_0'+ str(mark) + '.set'))
        tp.set_montage(montage)
        conditions[mark] = tp
    eeg_meta[sub] = conditions

In [None]:
# Channel indexing
# Obtain the channel names as a list
ch_names = eeg_meta[5][2].ch_names
print(ch_names)
# Crate a index list for channels
ch_idx = list(range(31))
# Combine the channels and index and convert to a dict
ch_num = dict(zip(ch_names, ch_idx))
print(ch_num['PO9'])

In [None]:
epp_meta, ecp_meta, epn_meta, ecn_meta = {}, {}, {}, {}
for sub in sub_list:
    epoch = eeg_meta[sub]
    # Convert data to numpy array
    epp = epoch[6].get_data()*10**6  # English priming condition  
    epn = epoch[7].get_data()*10**6
    ecp = epoch[9].get_data()*10**6  # English control condition
    ecn = epoch[8].get_data()*10**6
    
    epp_meta[sub], ecp_meta[sub], epn_meta[sub], ecn_meta[sub] = epp, epn, ecp, ecn


# ERP data structure: [n_channels, n_sub, n_times]
epp_erp, epn_erp, ecp_erp, ecn_erp = np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500]), np.zeros([31,30,500])

# Loop across all channels and subjects
for ch in ch_idx:
    for sub in sub_list:
        epp_erp[ch,sub-5,:] = np.average(epp_meta[sub][:,ch,:], axis=0)
        epn_erp[ch,sub-5,:] = np.average(epn_meta[sub][:,ch,:], axis=0)
        ecp_erp[ch,sub-5,:] = np.average(ecp_meta[sub][:,ch,:], axis=0)
        ecn_erp[ch,sub-5,:] = np.average(ecn_meta[sub][:,ch,:], axis=0)

In [None]:
# construct similarities matrice n(timepoints)*n(pairs)
ep_erp = np.stack([epp_erp,epn_erp],axis=3).mean(axis=3)
ec_erp = np.stack([ecp_erp,ecn_erp],axis=3).mean(axis=3)

ep_simi = np.zeros((500,435))
ec_simi = np.zeros((500,435))

for i in range(500):
    simi_1 = 1-pdist(ep_erp[:,:,i].T, metric='correlation')
    ep_simi[i,:] = simi_1

    simi_2 = 1-pdist(ec_erp[:,:,i].T, metric='correlation')
    ec_simi[i,:] = simi_2


In [None]:
erp_isc = squareform(ep_simi[200,:])
np.fill_diagonal(erp_isc, 1)    

mask =np.zeros_like(erp_isc)
mask[np.tril_indices_from(mask)] = True
plt.figure(figsize=(20,10))
sns.heatmap(erp_isc, square=True, cmap='RdBu_r', mask=mask, linewidths=0.1,  xticklabels=False, yticklabels=False, cbar_kws={'label': 'similarity', "shrink": 0.8})
#plt.title('LexTALE Score Intersubject Similarity (NN method)', fontsize=25, fontweight='bold')
#plt.savefig(os.path.join(plot_path,'erp_isc.png'),bbox_inches='tight',dpi=600,pad_inches=0.1)

In [None]:
# Function for plotting erp wave of single condition
def plot_eegisc(isc, times,title):
    ##Plotting parameters
    plt.figure(figsize=(12,8))
    ax = plt.axes()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_color('black')
    ax.spines['left'].set_color('black')
    ax.spines['bottom'].set_linewidth(3)
    ax.spines['left'].set_linewidth(3)
    plt.tick_params(direction='in',length=10,width=3,labelsize=20)
    plt.xlim(-200,800)
    plt.ylim(-0.1,0.8)
    plt.grid()
    
    # Plot
    plt.plot(times, isc, alpha=0.9,lw=3)
    plt.axvline(x=0, color="black", linestyle="--",lw=2)
    plt.axhline(y=0, color="black",lw=2)
    plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
    plt.ylabel('Intersubject similarity', fontdict={'family':'Arial', 'weight':'bold','size':25})
    #plt.title(str(title),fontdict={'family':'Arial', 'weight':'bold','size':20})
    plt.show()

In [None]:
# Construct timeseries
times = np.arange(-200, 800, 2)
plot_eegisc(ep_simi.mean(axis=1), times, title='')
plot_eegisc(ep_simi.mean(axis=1), times, title='')

### Section 3: Intersubject similarity analysis

In this section, we will examine how individual variation in LexTALE modulate the individual differences in neural representation of english words
Four conditions
* English priming people-related
* English priming peope-unrelated
* English control people-related
* English control peope-unrelated

In [None]:
# Obtain the lower traingle of the behav intersubject similarity matrix
# Two methods: AnnK and NN
lex_simi_annk = get_tril_vec(annk_lex)
lex_simi_nn = get_tril_vec(nn_lex)

In [None]:
def morbi_isrsa(i,neural_isc,lex_isc):
    r = spearmanr(neural_isc[i,:],lex_isc)
    p = permutation_corr(neural_isc[i,:],lex_isc, method='spearman', iter=10000)
    return dict(isc_r = r,isc_p = p)

In [None]:
ep_result = Parallel(n_jobs=8)(delayed(morbi_isrsa)(i,ep_simi,lex_simi_annk) for i in range(500))
ec_result = Parallel(n_jobs=8)(delayed(morbi_isrsa)(i,ec_simi,lex_simi_annk) for i in range(500))

In [None]:
ep_r,ep_p = [],[]
for i in range(500):
    ep_r.append(ep_result[i]['isc_r'])
    ep_p.append(ep_result[i]['isc_p'])

ec_r,ec_p = [],[]
for i in range(500):
    ec_r.append(ec_result[i]['isc_r'])
    ec_p.append(ec_result[i]['isc_p'])

isrsa_df = pd.DataFrame({'ep_r':ep_r, 'ep_p':ep_p, 'ec_r':ec_r,'ec_p':ec_p})
isrsa_df.to_csv(os.path.join(results_path,'isrsa.csv'))

Plot the IS-RSA results

In [None]:
isrsa_df = pd.read_csv(os.path.join(results_path,'isrsa.csv'))

In [None]:
## Plot the IS-RSA r value
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(-0.15,0.2)
plt.grid()

# Plot the r value
plt.plot(times, isrsa_df['ep_r'], label='English priming condition',alpha=0.9,lw=4)
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman \u03C1', fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.legend(loc='best', prop={'family':'Arial', 'size':20})
#plt.title('Intersubejct representational similarity of LexTALE & ERP',fontdict={'family':'Arial', 'weight':'bold','size':20})
plt.savefig(os.path.join(plot_path,'isrsa_ep.png'),bbox_inches='tight',dpi=600,pad_inches=0.1)
plt.show()

In [None]:
## Plot the IS-RSA r value
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(0,0.05)
plt.grid()

# Plot the r value
plt.plot(times, isrsa_df['ep_p'], label='English priming condition',alpha=0.9,lw=4)
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman \u03C1', fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.legend(loc='best', prop={'family':'Arial', 'size':20})
plt.show()

In [None]:
## Plot the IS-RSA r value
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(-0.2,0.2)
plt.grid()

# Plot the r value
plt.plot(times, isrsa_df['ec_r'],label='English control condition',alpha=0.9,lw=4, c='#F25757')
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman \u03C1', fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.legend(loc='best', prop={'family':'Arial', 'size':20})
#plt.title('Intersubejct representational similarity of LexTALE & ERP',fontdict={'family':'Arial', 'weight':'bold','size':20})
plt.savefig(os.path.join(plot_path,'isrsa_ec.png'),bbox_inches='tight',dpi=600,pad_inches=0.1)
plt.show()

In [None]:
## Plot the IS-RSA r value
plt.figure(figsize=(20,12))
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('black')
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
plt.tick_params(direction='in',length=10,width=3,labelsize=20)
plt.xlim(-200,800)
plt.ylim(0,0.05)
plt.grid()

# Plot the r value
plt.plot(times, isrsa_df['ec_p'],label='English control condition',alpha=0.9,lw=4, c='#F25757')
plt.axvline(x=0, color="black", linestyle="--",lw=2)
plt.axhline(y=0, color="black",lw=2)
plt.xlabel('Time (ms)',fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.ylabel('Spearman \u03C1', fontdict={'family':'Arial', 'weight':'bold','size':25})
plt.legend(loc='best', prop={'family':'Arial', 'size':20})
plt.show()