In [328]:
%matplotlib notebook

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from subs2vec.vecs import Vectors
from scipy.stats import ttest_rel, pearsonr, spearmanr
from os import listdir

sns.set(style='whitegrid', palette='Set2')

# Materials

In [356]:
# load materials
df_materials = pd.read_csv('materials.tsv', sep='\t').dropna()
df_materials.columns = df_materials.columns.str.lower()
df_materials['stimulus'] = df_materials['stimulus'].str.replace('.', '').str.lower()
display(df_materials)

  after removing the cwd from sys.path.


Unnamed: 0,itemnum,pic_sent,plaus_implaus,stimulus,list,agent,verb,patient,agent_start,agent_end,action_start,action_end,patient_start,patient_end
0,1,sent,plaus,the cop is arresting the criminal,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
1,1,sent,implaus,the criminal is arresting the cop,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
2,2,sent,plaus,the babysitter is scolding the child,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
3,2,sent,implaus,the child is scolding the babysitter,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
4,3,sent,plaus,the doctor is using a stethoscope on the patient,2,2.0,6.0,9.0,1.0,2.0,3.0,7.0,8.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,38,sent,implaus,the princess is measuring the tailor,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
76,39,sent,plaus,the lifeguard is saving the grandmother,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
77,39,sent,implaus,the grandmother is saving the lifeguard,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0
78,40,sent,plaus,the chauffeur is opening the door for the lady,1,2.0,6.0,9.0,1.0,2.0,3.0,7.0,8.0,9.0


In [330]:
# load embeddings
vecs = Vectors('../embeddings/wiki-subs.en.1e6.vec')
#vecs = Vectors('../embeddings/cc.en.300.vec')
#vecs = Vectors('../embeddings/fic.en.vec')

[INFO] loading vectors ../embeddings/wiki-subs.en.1e6.vec
[INFO] <function Vectors.__init__ at 0x7f9d7f814560> ran in 76.558 seconds


## Contrast agent-verb similarity in plausible versus implausible sentences
Because implausible sentences were created by switching agent-patient roles in plausible sentences, the agent-verb similarities are identical to patient-verb similarities in the implausible sentences (and vice-versa).

In [357]:
def phrase_similarity(df, sentence_col, a_cols, b_cols, method='word2vec', model=None, tokenizer=None):
    
    norm = lambda x: x / np.linalg.norm(x)
    cos = lambda a, b: np.dot(norm(a), norm(b))
    
    def get_cols(cols):
        if len(cols) == 1:
            # return column indices
            return [slice(int(idx) - 1, int(idx)) for idx in df[cols].values]
        elif len(cols) == 2:
            # return list of slices
            return [slice(int(row[cols[0]]) - 1, int(row[cols[1]])) for _, row in df.iterrows()]
        else:
            # raise error
            raise ValueError('invalid number of columns passed, pass a list or tuple containing one or two column names')
    
    if method.lower() == 'word2vec':
        vecs_dict = model.as_dict()
        def get_vec(word):
            return vecs_dict.get(word, np.zeros(300))
        
        # split sentences
        sentences = [sentence.split(' ') for sentence in df[sentence_col].values]
        
        # convert column names to lists of words
        a_phrases = [sentences[i][idx] for i, idx in enumerate(get_cols(a_cols))]
        b_phrases = [sentences[i][idx] for i, idx in enumerate(get_cols(b_cols))]
        
        # convert lists of words to mean vectors
        a_vecs = [np.mean([get_vec(word) for word in phrase], axis=0) for phrase in a_phrases]
        b_vecs = [np.mean([get_vec(word) for word in phrase], axis=0) for phrase in b_phrases]
        
        # return cosine similarities
        return [cos(a_vecs[i], b_vecs[i]) for i in range(len(sentences))]

    elif method.lower() == 'bert':
        
        def tokenize(sentences, tokenizer):
            token_dict = tokenizer(sentences, padding=True, return_tensors='pt')
            tokens = [tokenizer.tokenize(sentence) for sentence in sentences]
            token_lengths = [[len(tokenizer.tokenize(word)) for word in sentence.split(' ')] for sentence in sentences]
            token_lengths = [tokens + [1] for tokens in token_lengths]
            return token_dict, tokens, token_lengths
        
        def translate_cols(cols, token_lengths):
            for i, col in enumerate(cols):
                start = np.sum(token_lengths[i][:col.start])
                end = np.sum(token_lengths[i][:col.stop + 1]) - 1
                cols[i] = slice(int(start), int(end))
            return cols
        
        def cos_tensor(tensor, a_slice, b_slice):
            a_phrase = torch.sum(tensor[a_slice, -4:], dim=(0, 1))
            b_phrase = torch.sum(tensor[b_slice, -4:], dim=(0, 1))
            return cos(a_phrase, b_phrase)
        
        # tokenize
        token_dict, tokens, token_lengths = tokenize(list(df[sentence_col]), tokenizer)
        model.eval;  # evaluation mode
        with torch.no_grad():  # do not draw full graph
            # feed tokens into model
            outputs = model(**token_dict)
            # get hidden layer activations
            activations = torch.stack(outputs[2], dim=0).permute(1,2,0,3)
        
        a_cols = translate_cols(get_cols(a_cols), token_lengths)
        b_cols = translate_cols(get_cols(b_cols), token_lengths)
        
        return [cos_tensor(tensor, a_cols[i], b_cols[i]) for i, tensor in enumerate(activations)]
    else:
        raise ValueError('method not recognized, use either "word2vec" or "bert"')

In [358]:
# compute agent-verb distance
df_materials['agent_verb_cosine'] = phrase_similarity(df_materials, 'stimulus', ['agent'], ['verb'], model=vecs)

# compute patient-verb distance
df_materials['patient_verb_cosine'] = phrase_similarity(df_materials, 'stimulus', ['patient'], ['verb'], model=vecs)

display(df_materials)

[INFO] <function Vectors.as_dict at 0x7f9d7f814830> ran in 0.927 seconds
[INFO] <function Vectors.as_dict at 0x7f9d7f814830> ran in 0.795 seconds


Unnamed: 0,itemnum,pic_sent,plaus_implaus,stimulus,list,agent,verb,patient,agent_start,agent_end,action_start,action_end,patient_start,patient_end,agent_verb_cosine,patient_verb_cosine
0,1,sent,plaus,the cop is arresting the criminal,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.353695,0.448583
1,1,sent,implaus,the criminal is arresting the cop,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.448583,0.353695
2,2,sent,plaus,the babysitter is scolding the child,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.380197,0.293505
3,2,sent,implaus,the child is scolding the babysitter,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.293505,0.380197
4,3,sent,plaus,the doctor is using a stethoscope on the patient,2,2.0,6.0,9.0,1.0,2.0,3.0,7.0,8.0,9.0,0.483958,0.504265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,38,sent,implaus,the princess is measuring the tailor,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.076098,0.175920
76,39,sent,plaus,the lifeguard is saving the grandmother,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.230932,0.186822
77,39,sent,implaus,the grandmother is saving the lifeguard,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.186822,0.230932
78,40,sent,plaus,the chauffeur is opening the door for the lady,1,2.0,6.0,9.0,1.0,2.0,3.0,7.0,8.0,9.0,0.311750,0.281344


In [359]:
# any items for which we don't have word embeddings will return NaN and be excluded from plots and tests
display(df_materials[df_materials['agent_verb_cosine'].isna()])

Unnamed: 0,itemnum,pic_sent,plaus_implaus,stimulus,list,agent,verb,patient,agent_start,agent_end,action_start,action_end,patient_start,patient_end,agent_verb_cosine,patient_verb_cosine
29,15,sent,implaus,the snorkeler is biting the shark,1,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,,0.407985
43,22,sent,implaus,the sportsstar is interviewing the journalist,2,2.0,4.0,6.0,1.0,2.0,3.0,4.0,5.0,6.0,,0.384173


### Visual comparison

In [360]:
# quick plot
g = sns.boxplot(data=df_materials.dropna(), x='plaus_implaus', y='agent_verb_cosine')

<IPython.core.display.Javascript object>

### Statistical comparison

In [361]:
# paired samples t-test
df_test = df_materials.dropna()
t = ttest_rel(df_test.loc[df_test['plaus_implaus'] == 'plaus', 'agent_verb_cosine'],
          df_test.loc[df_test['plaus_implaus'] == 'implaus', 'agent_verb_cosine'],
          nan_policy='omit')
print(f'Paired samples t-statistic: {t[0]:.2f}, p-value: {t[1]:.2f}')

Paired samples t-statistic: 3.55, p-value: 0.00


## Contrast difference between agent-verb similarity and patient-verb similarity in plausible versus implausible sentences

In [362]:
# compute plausibility difference between agent- and patient-attachment
df_materials['plausibility_diff'] = df_materials['agent_verb_cosine'] - df_materials['patient_verb_cosine']

### Visual comparison

In [364]:
# quick plot
g = sns.boxplot(data=df_materials.dropna(), x='plaus_implaus', y='plausibility_diff')

<IPython.core.display.Javascript object>

### Statistical comparison

In [365]:
# paired samples t-test
df_test = df_materials.dropna()
t = ttest_rel(df_test.loc[df_test['plaus_implaus'] == 'plaus', 'plausibility_diff'],
          df_test.loc[df_test['plaus_implaus'] == 'implaus', 'plausibility_diff'],
          nan_policy='omit')
print(f'Paired samples t-statistic: {t[0]:.2f}, p-value: {t[1]:.2f}')

Paired samples t-statistic: 3.55, p-value: 0.00


# Correlate similarities with behavioral data

In [366]:
# load behavioral data
dfs = [pd.read_csv('data/' + fname) for fname in listdir('data/')]
df_controls = pd.concat(dfs)
df_controls.columns = df_controls.columns.str.lower()
df_controls = df_controls.loc[df_controls['condition'] == 'Sent_Sem']
df_controls['item'] = df_controls['item'].str.replace('.', '').str.lower()

display(df_controls)

  


Unnamed: 0,subjid,list,order,run,trialnumber,trialonset,condition,itemnum,item,motion,correctanswer,response,accuracy,rt
11,FED_20190320a_3T2,2,2,2,12,46,Sent_Sem,4,the grandmother is rescuing the fireman,right,2,0,0,0.000000
12,FED_20190320a_3T2,2,2,2,13,48,Sent_Sem,11,the lion is pouncing on the safari guide,right,1,2,0,0.497588
13,FED_20190320a_3T2,2,2,2,14,50,Sent_Sem,33,the boss is yelling at the worker,right,1,2,0,0.063568
14,FED_20190320a_3T2,2,2,2,15,52,Sent_Sem,6,the girl is frightening the ghost,right,2,1,0,1.614897
15,FED_20190320a_3T2,2,2,2,16,54,Sent_Sem,7,the mother is feeding the baby,left,1,1,1,1.818027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,FED_20160519a_3T1,1,1,1,58,166,Sent_Sem,17,the woman is biting the vampire,left,2,0,0,0.000000
58,FED_20160519a_3T1,1,1,1,59,168,Sent_Sem,8,the artist is painting the businessman,right,1,1,1,1.036093
59,FED_20160519a_3T1,1,1,1,60,170,Sent_Sem,39,the grandmother is saving the lifeguard,right,2,2,1,1.417967
60,FED_20160519a_3T1,1,1,1,61,172,Sent_Sem,4,the fireman is rescuing the grandmother,left,1,1,1,1.230064


In [367]:
df = df_controls.merge(df_materials, left_on='item', right_on='stimulus', how='left')
display(df)

Unnamed: 0,subjid,list_x,order,run,trialnumber,trialonset,condition,itemnum_x,item,motion,...,patient,agent_start,agent_end,action_start,action_end,patient_start,patient_end,agent_verb_cosine,patient_verb_cosine,plausibility_diff
0,FED_20190320a_3T2,2,2,2,12,46,Sent_Sem,4,the grandmother is rescuing the fireman,right,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.174271,0.297179,-0.122908
1,FED_20190320a_3T2,2,2,2,13,48,Sent_Sem,11,the lion is pouncing on the safari guide,right,...,8.0,1.0,2.0,3.0,5.0,6.0,8.0,0.321946,0.197307,0.124640
2,FED_20190320a_3T2,2,2,2,14,50,Sent_Sem,33,the boss is yelling at the worker,right,...,7.0,1.0,2.0,3.0,5.0,6.0,7.0,0.313729,0.145268,0.168461
3,FED_20190320a_3T2,2,2,2,15,52,Sent_Sem,6,the girl is frightening the ghost,right,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.295777,0.483953,-0.188176
4,FED_20190320a_3T2,2,2,2,16,54,Sent_Sem,7,the mother is feeding the baby,left,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.260654,0.318519,-0.057865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
955,FED_20160519a_3T1,1,1,1,58,166,Sent_Sem,17,the woman is biting the vampire,left,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.194295,0.377822,-0.183527
956,FED_20160519a_3T1,1,1,1,59,168,Sent_Sem,8,the artist is painting the businessman,right,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.562744,0.180253,0.382491
957,FED_20160519a_3T1,1,1,1,60,170,Sent_Sem,39,the grandmother is saving the lifeguard,right,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.186822,0.230932,-0.044109
958,FED_20160519a_3T1,1,1,1,61,172,Sent_Sem,4,the fireman is rescuing the grandmother,left,...,6.0,1.0,2.0,3.0,4.0,5.0,6.0,0.297179,0.174271,0.122908


## Trial-level response accuracy

### Accuracy and agent-verb similarity

In [368]:
# compare participant accuracy to embedding distances
df_test = df.dropna()
r = pearsonr(df_test['agent_verb_cosine'], df_test['accuracy'])
rank_r = spearmanr(df_test['agent_verb_cosine'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.08, p-value: 0.01
Spearman r: 0.07, p-value: 0.03


### Accuracy and patient-verb similarity

In [369]:
# compare participant accuracy to embedding distances
r = pearsonr(df_test['patient_verb_cosine'], df_test['accuracy'])
rank_r = spearmanr(df_test['patient_verb_cosine'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.06, p-value: 0.08
Spearman r: 0.06, p-value: 0.07


### Accuracy and difference between agent-verb and patient-verb similarity

In [370]:
# compare participant accuracy to plausibility diff
r = pearsonr(df_test['plausibility_diff'], df_test['accuracy'])
rank_r = spearmanr(df_test['plausibility_diff'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.02, p-value: 0.60
Spearman r: 0.01, p-value: 0.68


## Mean response accuracy

### Accuracy and agent-verb similarity

In [381]:
# compare aggregated accuracy to embedding distances
df_test = df.dropna()
df_test = df_test.groupby(['item', 'plaus_implaus']).mean().reset_index()
r = pearsonr(df_test['agent_verb_cosine'], df_test['accuracy'])
rank_r = spearmanr(df_test['agent_verb_cosine'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.28, p-value: 0.02
Spearman r: 0.26, p-value: 0.03


### Accuracy and patient-verb similarity

In [382]:
# compare aggregated accuracy to embedding distances
r = pearsonr(df_test['patient_verb_cosine'], df_test['accuracy'])
rank_r = spearmanr(df_test['patient_verb_cosine'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.23, p-value: 0.05
Spearman r: 0.22, p-value: 0.06


### Accuracy and difference between agent-verb and patient-verb similarity

In [383]:
# compare aggregated accuracy to plausibility diff
r = pearsonr(df_test['plausibility_diff'], df_test['accuracy'])
rank_r = spearmanr(df_test['plausibility_diff'], df_test['accuracy'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: 0.04, p-value: 0.71
Spearman r: 0.03, p-value: 0.77


### Mean participant accuracy plotted over agent-verb similarity

In [384]:
df_test['itemcode'] = df_test['itemnum_x'].astype(str)
df_test = df_test.sort_values('agent_verb_cosine')
g = sns.FacetGrid(df_test,
                  col='plaus_implaus')
g.map_dataframe(sns.barplot, x='itemcode', y='agent_verb_cosine')
g.map_dataframe(sns.stripplot, x='itemcode', y='accuracy')
g.set(xticklabels=[], xlabel='items sorted by cosine similarity')

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x7f9b2b0d5810>

## Trial-level response time

### Response time and agent-verb similarity

In [375]:
# compare response time to embedding distances
df_test = df.dropna()
df_test = df_test[df_test['accuracy'] == 1]
r = pearsonr(df_test['agent_verb_cosine'], df_test['rt'])
rank_r = spearmanr(df_test['agent_verb_cosine'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.04, p-value: 0.35
Spearman r: -0.07, p-value: 0.11


### Response time and patient-verb similarity

In [376]:
# compare response time to embedding distances
r = pearsonr(df_test['patient_verb_cosine'], df_test['rt'])
rank_r = spearmanr(df_test['patient_verb_cosine'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.00, p-value: 0.98
Spearman r: -0.01, p-value: 0.87


### Response time and difference between agent-verb similarity and patient-verb similarity

In [377]:
# compare response time to plausibility diff
r = pearsonr(df_test['plausibility_diff'], df_test['rt'])
rank_r = spearmanr(df_test['plausibility_diff'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.03, p-value: 0.45
Spearman r: -0.03, p-value: 0.51


## Mean response time

### Response time and agent-verb similarity

In [378]:
# compare response time to embedding distances
df_test = df.dropna()
df_test = df_test[df_test['accuracy'] == 1]
df_test = df_test.groupby(['item', 'plaus_implaus']).mean().reset_index()
r = pearsonr(df_test['agent_verb_cosine'], df_test['rt'])
rank_r = spearmanr(df_test['agent_verb_cosine'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.13, p-value: 0.29
Spearman r: -0.18, p-value: 0.12


### Response time and patient-verb similarity

In [379]:
# compare response time to embedding distances
r = pearsonr(df_test['patient_verb_cosine'], df_test['rt'])
rank_r = spearmanr(df_test['patient_verb_cosine'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.11, p-value: 0.37
Spearman r: -0.07, p-value: 0.56


### Response time and difference between agent-verb similarity and patient-verb similarity

In [380]:
# compare response time to plausibility diff
r = pearsonr(df_test['plausibility_diff'], df_test['rt'])
rank_r = spearmanr(df_test['plausibility_diff'], df_test['rt'])
print(f'Pearson r: {r[0]:.2f}, p-value: {r[1]:.2f}')
print(f'Spearman r: {rank_r[0]:.2f}, p-value: {rank_r[1]:.2f}')

Pearson r: -0.02, p-value: 0.88
Spearman r: -0.06, p-value: 0.59
