In [None]:
# Fall 2024 - MH
# Environment: fmri_stats
# Organize and plot group-level RSA results pooled across studies 1 & 2

In [3]:
# SET-UP
import numpy as np
import pandas as pd
import scipy.stats as st
import copy
import pickle
from nltools.stats import one_sample_permutation

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

n_parcs=500

base_dir = '/dartfs-hpc/rc/lab/K/KraemerD/ASL1-2_combined/'
rsa_dir=base_dir+'data/rsa/'

%run '/dartfs-hpc/rc/lab/K/KraemerD/ASL1-2_combined/scripts/asl_combo_functions_surface_plotting.py'

# Group-Level T-test at each parcel from normed RSA correlations

### English

In [7]:
rsa_dict = {}
words='eng'
model='word2vec'

rsa_dict[model] = [[]]*n_parcs

files=[rsa_dir+'corrs/'+model+'_grpASL_eng_Schaefer500_corrs_normed.pkl',
       rsa_dir+'corrs/'+model+'_allsubs_asl2_eng_all_by_group_Schaefer500_corrs_normed.pkl']

for s in range(len(files)):
    corrs = pickle.load(open(files[s], 'rb' ))

    for parc in range(n_parcs):
        rsa_dict[model][parc] = rsa_dict[model][parc]+list(corrs[parc])

results_dict = {}

results_dict[model+'_Z'] = []
results_dict[model+'_p']=[]
for parc in range(len(rsa_dict[model])):
    test = one_sample_permutation(rsa_dict[model][parc])
    results_dict[model+'_Z'].append(test['mean'])
    results_dict[model+'_p'].append(test['p'])

df = pd.DataFrame(results_dict)
pd.DataFrame(results_dict).to_csv(rsa_dir+'combo_'+model+'_allsub_eng_scores.csv',index=False)

### ASL-Known

In [None]:
rsa_dict = {}
words='asl-known'
model='word2vec_weighted' # toggle between word2vec_weighted, nonsigner_iconicity_weighted, and asl-lex_weighted

rsa_dict[model] = [[]]*n_parcs

files=[rsa_dir+'corrs/'+model+'_grpASL_asl_Schaefer500_corrs_normed.pkl',
   rsa_dir+'corrs/'+model+'_grp1_asl2_words1_Schaefer500_corrs_normed.pkl',
   rsa_dir+'corrs/'+model+'_grp2_asl2_words2_Schaefer500_corrs_normed.pkl']

results_dict = {}

results_dict[model+'_Z'] = []
results_dict[model+'_p']=[]
for parc in range(len(rsa_dict[model])):
    test = one_sample_permutation(rsa_dict[model][parc])
    results_dict[model+'_Z'].append(test['mean'])
    results_dict[model+'_p'].append(test['p'])

pd.DataFrame(results_dict).to_csv(rsa_dir+'combo_'+model+'_allsub_asl-known_scores.csv',index=False)

### ASL-Unknown

In [None]:
rsa_dict = {}
words='asl-unknown'
model='word2vec_weighted' # toggle between word2vec_weighted, nonsigner_iconicity_weighted, and asl-lex_weighted

rsa_dict[model] = [[]]*n_parcs

files=[rsa_dir+'corrs/'+model+'_grpASL_asl_Schaefer500_corrs_normed.pkl',
   rsa_dir+'corrs/'+model+'_grp1_asl2_words2_Schaefer500_corrs_normed.pkl',
   rsa_dir+'corrs/'+model+'_grp2_asl2_words1_Schaefer500_corrs_normed.pkl']

results_dict = {}

results_dict[model+'_Z'] = []
results_dict[model+'_p']=[]
for parc in range(len(rsa_dict[model])):
    test = one_sample_permutation(rsa_dict[model][parc])
    results_dict[model+'_Z'].append(test['mean'])
    results_dict[model+'_p'].append(test['p'])

pd.DataFrame(results_dict).to_csv(rsa_dir+'combo_'+model+'_allsub_asl-unknown_scores.csv',index=False)

# Figure 4: Overlap map of Pooled RSA results for English and ASL

### Read in RSA Z scores

In [8]:
eng_stats = pd.read_csv(rsa_dir+'combo_word2vec_allsub_eng_scores.csv')
asl_stats = pd.read_csv(rsa_dir+'combo_word2vec_weighted_allsub_asl-known_scores.csv')

In [5]:
parc_vals = [0]*n_parcs
pval = 0.05

for p in range(len(parc_vals)):
    if eng_stats['mean'][p]>=0 and eng_stats['p'][p]<pval: # if this parc is in eng map
        if asl_stats['mean'][p]>=0 and asl_stats['p'][p]<pval: # if this parc is ALSO in asl map
            parc_vals[p] = 2.
        else:        
            parc_vals[p] = 3.

    elif asl_stats['mean'][p]>=0 and asl_stats['p'][p]<pval: # if this parc is only in asl map
        parc_vals[p] = 1.


In [None]:
colorlist = ['#faeb2c', '#2cd600', '#1685f8'] # yellow-green-blue cmap
customcmap = ListedColormap(colorlist)

rh_masked, lh_masked = parc_list_to_surf(parc_vals, n_parcs)

fn = base_dir+'figures/combined_studies_word2vec_ENG-ASL_overlap_p'+str(pval)
four_panel_surfplot(rh_masked, lh_masked,fn,title="ASL1+2_word2vec_ENG-ASL_overlap_p<"+str(pval),bg_on_data=True,colormap=customcmap,cmap_method='range')

fn = base_dir+'figures/combined_studies_word2vec_ENG-ASL_overlap_p'+str(pval)+'_dorsal_ventral'
dors_vent_surfplot(rh_masked, lh_masked,fn,title="ASL1+2_word2vec_ENG-ASL_overlap_p<"+str(pval),bg_on_data=True,colormap=customcmap,cmap_method='range')

# Figure 5: Overlap of Word2Vec, ASL-LEX, and Nonsigner-Rated Iconicity results

In [9]:
colorlist = ['#2072b1','#db141e','#fbb0ba']
customcmap = ListedColormap(colorlist)
pval = 0.05

words = ['asl-known','asl-unknown']
models = ['word2vec_weighted','asl-lex_weighted','nonsignericonicity_weighted']

sigparc_dict={}

for m in models:
    for w in words:
        stats = pd.read_csv(rsa_dir+'combo_'+m+'_allsub_'+w+'_scores.csv')
        sigparc_dict[str(m+'_'+w)] = list(stats[(stats[m+'_p']<pval) & (stats[m+'_Z']>=0)].index)
        
# print results/check for parcels that appear in more than one map - orthogonalization of DMs should make this unlikely
# for key in sigparc_dict.keys():
#     print("There are "+str(len(sigparc_dict[key]))+" sig parcs in the RSA for "+key)
#     print(list(sigparc_dict[key]))
#     for otherkey in [x for x in sigparc_dict.keys() if x != key]:
#         overlappers = [y for y in sigparc_dict[otherkey] if y in sigparc_dict[key]]
#         if len(overlappers) > 0:
#             print(str(len(overlappers)),"overlap with ",otherkey,": ",list(overlappers))
#     print("")
#     print("")

In [11]:
words = 'asl-known' # toggle between asl-known for Figure 5, asl-unknown for Supplementary Figure 1B
parc_vals = [0]*n_parcs

for p in range(len(parc_vals)):
    if p in sigparc_dict['word2vec_weighted_'+words]:
        parc_vals[p] = 1.
    elif p in sigparc_dict['asl-lex_weighted_'+words]:
        parc_vals[p] = 2.
    elif p in sigparc_dict['nonsignericonicity_weighted_'+words]:
        parc_vals[p] = 3.

In [None]:
rh_masked, lh_masked = parc_list_to_surf(parc_vals, n_parcs)

fn = base_dir+'figures/combinedstudies_all_model_overlap_'+words+'_p'+str(pval)
four_panel_surfplot(rh_masked, lh_masked,fn,title="ASL1+2_"+words+"_WeightedRSA_overlap_p<"+str(pval),bg_on_data=True,colormap=customcmap,cmap_method='range')

fn = base_dir+'figures/combined_studies_all_model_overlap_'+words+'_p'+str(pval)+'_dorsal_ventral'
dors_vent_surfplot(rh_masked, lh_masked,fn,title="ASL1+2_"+words+"_WeightedRSA_overlap_p<"+str(pval),bg_on_data=True,colormap=customcmap,cmap_method='range')