In [111]:
import pandas as pd
import sys
import torch
from transformers import GPT2Config, GPT2Tokenizer, GPT2LMHeadModel, GPT2Model
from scipy.stats import entropy
from bertviz import head_view, model_view
from collections import Counter
from ast import literal_eval as make_tuple
import numpy as np

In [8]:
model = GPT2LMHeadModel.from_pretrained('gpt2', output_attentions = True)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [77]:
def find_token_position(l,t):
    return [i for i in range(len(l)) if l[i]==t]

In [44]:
#This code is from Vig (2021)
def format_attention(attention):
   squeezed = []
   for layer_attention in attention:
      squeezed.append(layer_attention.squeeze(0))
   return torch.stack(squeezed)

In [None]:
def get_surprisal(sent, roi_loc):
    indexed_tokens = tokenizer.encode(sent)
    tokens = [tokenizer.decode(i).strip() for i in indexed_tokens]
    
    prefix_index = indexed_tokens[:roi_loc]
    prefix_tensor = torch.tensor([prefix_index])
    prefix_tensor = prefix_tensor.to(device)
    model.to(device)
    with torch.no_grad():
        output= model(prefix_tensor)
        predictions = torch.nn.functional.softmax(output[0],-1)
        result = predictions[0,-1,:]
        score_current_w = result[indexed_tokens[roi_loc]]
        surprisal_current_w = -1*torch.log2(score_current_w)
        surprisal_current_w = surprisal_current_w.numpy()
    return float(surprisal_current_w)

In [195]:
def get_surprisal(sent, token):
    indexed_tokens = tokenizer.encode(sent)
    tokens = tokenizer.convert_ids_to_tokens(indexed_tokens)
    tokens = [tokenizer.decode(i).strip() for i in indexed_tokens]
    token_positions = find_token_position(tokens,token)
    
    if len(token_positions) != 1: return None # If a target token does not apper or appear multiple times, the returned surprisal is zero
    else: token_position = token_positions[0]
     
    prefix_index = indexed_tokens[:token_position]
    prefix_tensor = torch.tensor([prefix_index])
    prefix_tensor = prefix_tensor.to(device)
    
    model.to(device)
    with torch.no_grad():
        output= model(prefix_tensor)
        predictions = torch.nn.functional.softmax(output[0],-1)
        result = predictions[0,-1,:]
        score_current_w = result[indexed_tokens[token_position]]
        surprisal_current_w = -1*torch.log2(score_current_w)
        surprisal_current_w = surprisal_current_w.numpy()
    return round(float(surprisal_current_w),4)

In [236]:
def get_entropy(sent, token, head = None):
    indexed_tokens = tokenizer.encode_plus(text = sent, return_tensors = 'pt', add_special_tokens = False)['input_ids'][0]
   
    with torch.no_grad():
        attention = model(indexed_tokens)[-1]
        attention = format_attention(attention)

    tokens = tokenizer.convert_ids_to_tokens(indexed_tokens)
    tokens = [tokenizer.decode(i).strip() for i in indexed_tokens]
    token_positions = find_token_position(tokens,token)
    
    if len(token_positions) != 1: return None # If a target token does not apper or appear multiple times, the returned surprisal is zero
    else: token_position = token_positions[0]
    
    if not head: # if the specific head ID is not given, then returns global entropy.
        
        return round(np.mean(entropy(attention[:,:,token_position,:].detach().numpy(), base = 2, axis = 2)),4)
    if head:  # if the specific head ID is  given, then returns local entropy from the given head.
        l, h = head[0], head[1]
        return round(entropy(attention[l,h,token_position,:].detach().numpy(), base = 2),4)


In [237]:
'''
Computes the amount of attention from source_token to target_token in head
'''
def get_paid_attention(sent, source_token, target_token, head):
    indexed_tokens = tokenizer.encode_plus(text = sent, return_tensors = 'pt', add_special_tokens = False)['input_ids'][0]
    
    with torch.no_grad():
        attention = model(indexed_tokens)[-1]
        attention = format_attention(attention)

    tokens = tokenizer.convert_ids_to_tokens(indexed_tokens)
    tokens = [tokenizer.decode(i).strip() for i in indexed_tokens]
    source_token_positions, target_token_positions = find_token_position(tokens,source_token), find_token_position(tokens,target_token)
    
    if len(source_token_positions) != 1 or len(target_token_positions)!= 1 : return None # If a target token does not apper or appear multiple times, the returned surprisal is zero
    else: source_token_position, target_token_position = source_token_positions[0], target_token_positions[0]

    l, h = head[0], head[1]
    return round(float(attention[l,h,source_token_position,target_token_position]),5)


In [238]:
Stolz = pd.read_csv('data/Stolz1967.csv')

In [241]:
df = pd.DataFrame()

for sent_id in range(1,16):
    id_set = Stolz.loc[Stolz['id']==sent_id]
    lv1noun, lv2noun, lv3noun = id_set.iloc[0].subj1, id_set.iloc[0].subj2, id_set.iloc[0].subj3
    lv1verb, lv2verb, lv3verb = id_set.iloc[0].verb1, id_set.iloc[0].verb2, id_set.iloc[0].verb3

    for lv in [1,2,3]:
        this_level = id_set[id_set['level']==lv]
        for sent_type in ["CE","RB"]:
            this_sent_type = this_level[this_level['type']==sent_type]
            sentence = this_sent_type.iloc[0].sentence
            
            if lv == 1: source, target = lv1verb, lv1noun
            elif lv == 2: source, target = lv2verb, lv2noun 
            elif lv == 3: source, target = lv3verb, lv3noun
            
            surprisal = get_surprisal(sentence, source) # This line of code was incorrect in the original code. Need to be careful.
            entropy_43 = get_entropy(sentence, source , head = [4,3])
            entropy_all = get_entropy(sentence, source , head = None)
            attn_to_target = get_paid_attention(sentence, source, target, head = [4,3])

            attn_to_lv1_noun = get_paid_attention(sentence, source, lv1noun, head = [4,3])
            attn_to_lv2_noun = get_paid_attention(sentence, source, lv2noun, head = [4,3])
            attn_to_lv3_noun = get_paid_attention(sentence, source, lv3noun, head = [4,3])

            newRow = pd.DataFrame({'id':[sent_id],
                                "type":[sent_type],
                                "level":[lv],
                                "verb":[source],
                                "sentence":[sentence],
                                "target":[target],
                                "surprisal":[surprisal],
                                "entropy_43": [entropy_43],
                                "entropy_all": [entropy_all], 
                                "attn_to_target":[attn_to_target],  
                                "attn_to_lv1_noun": [attn_to_lv1_noun],
                                "attn_to_lv2_noun": [attn_to_lv2_noun],
                                "attn_to_lv3_noun": [attn_to_lv3_noun],       
                              } )
                
            df = pd.concat([df, newRow],ignore_index=True)     

In [242]:
df.to_csv('stolz-results.csv')

In [8]:
#df.to_csv('results/Stolz1967.csv')
df.to_csv('results_231108/Stolz1967.csv')


In [9]:
this_set = Stolz.loc[Stolz['id']==4]
this_set

Unnamed: 0,sentence,id,type,level,subj1,verb1,subj2,verb2,subj3,verb3
18,The game that the inmates that the judge sente...,4,CE,1,game,involved,inmates,played,judge,sentenced
19,The game that the inmates that the judge sente...,4,CE,2,game,involved,inmates,played,judge,sentenced
20,The game that the inmates that the judge sente...,4,CE,3,game,involved,inmates,played,judge,sentenced
21,The judge sentenced the inmates who played the...,4,RB,1,game,involved,inmates,played,judge,sentenced
22,"Yesterday evening, the judge sentenced the inm...",4,RB,2,game,involved,inmates,played,judge,sentenced
23,"After the sun went down, the judge sentenced t...",4,RB,3,game,involved,inmates,played,judge,sentenced


In [10]:
ce = this_set[this_set['type']=="CE"].iloc[0].sentence
rb = this_set[this_set['type']=="RB"]
rb_lv1 = rb[rb['level']==1].iloc[0].sentence
rb_lv2 = rb[rb['level']==2].iloc[0].sentence
rb_lv3 = rb[rb['level']==3].iloc[0].sentence

In [11]:
inputs = tokenizer.encode_plus(ce, return_tensors='pt', add_special_tokens=True)
input_ids_a = inputs['input_ids']
attention_a = model(input_ids_a)[-1]
input_id_list_a = input_ids_a[0].tolist() # Batch index 0
tokens_a = tokenizer.convert_ids_to_tokens(input_id_list_a)

head_view(attention_a, tokens_a, layer =4, heads=[3])

<IPython.core.display.Javascript object>

In [12]:
inputs = tokenizer.encode_plus(rb_lv1, return_tensors='pt', add_special_tokens=True)
input_ids_a = inputs['input_ids']
attention_a = model(input_ids_a)[-1]
input_id_list_a = input_ids_a[0].tolist() # Batch index 0
tokens_a = tokenizer.convert_ids_to_tokens(input_id_list_a)

head_view(attention_a, tokens_a, layer =4, heads=[3])

<IPython.core.display.Javascript object>

In [13]:
inputs = tokenizer.encode_plus(rb_lv2, return_tensors='pt', add_special_tokens=True)
input_ids_a = inputs['input_ids']
attention_a = model(input_ids_a)[-1]
input_id_list_a = input_ids_a[0].tolist() # Batch index 0
tokens_a = tokenizer.convert_ids_to_tokens(input_id_list_a)

head_view(attention_a, tokens_a, layer =4, heads=[3])

<IPython.core.display.Javascript object>

In [None]:
inputs = tokenizer.encode_plus(rb_lv3, return_tensors='pt', add_special_tokens=True)
input_ids_a = inputs['input_ids']
attention_a = model(input_ids_a)[-1]
input_id_list_a = input_ids_a[0].tolist() # Batch index 0
tokens_a = tokenizer.convert_ids_to_tokens(input_id_list_a)

head_view(attention_a, tokens_a, layer =4, heads=[3])

In [None]:
rb_lv2

In [1]:
import os 
os.getcwd()

'/Users/soohyunryu/Library/Mobile Documents/com~apple~CloudDocs/Umich/Psych619/journal_code/psych experiment'