In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from attention_graph_util import *
import seaborn as sns
import itertools 
import matplotlib as mpl
import networkx as nx
import os
from util import constants

from absl import app
from absl import flags
import pandas as pd

from util.models import MODELS
from util.tasks import TASKS
#from dnotebook_utils import *
from attention_graph_util import *
%matplotlib inline
from util.config_util import get_task_params
from notebooks.notebook_utils import *
from util import inflect

from tqdm import tqdm
from scipy.stats import spearmanr
import math


rc={'font.size': 10, 'axes.labelsize': 10, 'legend.fontsize': 10.0, 
    'axes.titlesize': 32, 'xtick.labelsize': 20, 'ytick.labelsize': 16}
plt.rcParams.update(**rc)
mpl.rcParams['axes.linewidth'] = .5 #set the value globally

import torch
from transformers import *
from transformers import BertConfig, BertForMaskedLM, BertTokenizer

[nltk_data] Downloading package punkt to /home/dehghani/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
!pip install --upgrade transformers
!pip install networkx
!pip install --upgrade matplotlib
!pip install --upgrade seaborn


!pip install torch torchvision

Requirement already up-to-date: transformers in /home/dehghani/anaconda3/envs/indist/lib/python3.7/site-packages (2.8.0)
Requirement already up-to-date: matplotlib in /home/dehghani/anaconda3/envs/indist/lib/python3.7/site-packages (3.2.1)
Requirement already up-to-date: seaborn in /home/dehghani/anaconda3/envs/indist/lib/python3.7/site-packages (0.10.0)


In [3]:
task_name = 'word_sv_agreement_lm'
task_params = get_task_params(batch_size=1)
task = TASKS[task_name](task_params, data_dir='../InDist/data')
cl_token = task.sentence_encoder().encode(constants.bos)
task_tokenizer = task.sentence_encoder()._tokenizer

INFO:absl:Overwrite dataset info from restored data version.


Vocab len:  10032


INFO:absl:Constructing tf.data.Dataset for split validation, from ../InDist/data/word_sv_agreement/0.1.0
INFO:absl:Constructing tf.data.Dataset for split test, from ../InDist/data/word_sv_agreement/0.1.0
INFO:absl:Constructing tf.data.Dataset for split train, from ../InDist/data/word_sv_agreement/0.1.0


In [4]:
from transformers import DistilBertTokenizer, DistilBertModel
import torch

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased',
                                        output_hidden_states=True,
                                        output_attentions=True)

In [5]:
def offset_convertor(encoded_input_task, task_offset, task_encoder, tokenizer):
    string_part1 = task_encoder.decode(encoded_input_task[:task_offset])
    tokens_part1 = tokenizer.tokenize(string_part1)
    
    return len(tokens_part1)


In [6]:
for x,y in task.test_dataset:
    sentence = task.sentence_encoder().decode(x[0][1:])
    print(sentence)
    break

tokens = ['cls']+tokenizer.tokenize(sentence)+['sep']
print(len(tokens), tokens)
tf_input_ids = tokenizer.encode(sentence)
input_ids = torch.tensor([tf_input_ids])
logits, all_hidden_states, all_attentions = model(input_ids)
print(logits.shape)
_attentions = [att.detach().numpy() for att in all_attentions]
attentions_mat = np.asarray(_attentions)[:,0]
print(attentions_mat.shape)

embeded_inputs = torch.autograd.Variable(model.distilbert.embeddings(input_ids), requires_grad=True)
logits, all_hidden_states, all_attentions = model(inputs_embeds=embeded_inputs)
print(embeded_inputs.shape)


lsum = logits.sum()
print(lsum)

lsum.backward()
embeded_inputs.require_grad = True
print(embeded_inputs.grad.shape)

many NNS of woodland remain and support a JJ sector in the southern portion of the state .
21 ['cls', 'many', 'n', '##ns', 'of', 'woodland', 'remain', 'and', 'support', 'a', 'jj', 'sector', 'in', 'the', 'southern', 'portion', 'of', 'the', 'state', '.', 'sep']
torch.Size([1, 21, 30522])
(6, 12, 21, 21)
torch.Size([1, 21, 768])
tensor(-3811819.5000, grad_fn=<SumBackward0>)
torch.Size([1, 21, 768])


In [7]:
all_examples_x = []
all_examples_y = []
all_examples_attentions = []
all_examples_blankout_relevance = []
all_examples_grads = []
all_examples_inputgrads = []
n_batches = 10

all_examples_accuracies = []

infl_eng = inflect.engine()
verb_infl, noun_infl = gen_inflect_from_vocab(infl_eng, '../InDist/notebooks/wiki.vocab')

test_data = task.databuilder.as_dataset(split='validation', batch_size=1)
for examples in tqdm(test_data):
    sentence = task.sentence_encoder().decode(examples['sentence'][0])
    
    verb_position = examples['verb_position'][0].numpy()+1  #+1 because of adding cls.
    verb_position = offset_convertor(examples['sentence'][0], verb_position, task.sentence_encoder(), tokenizer)
    
    sentence = ['cls']+tokenizer.tokenize(sentence)+['sep']
    
    
    sentence[verb_position] = tokenizer.mask_token
    tf_input_ids = tokenizer.encode(sentence)
    input_ids = torch.tensor([tf_input_ids])
    

    
    s_shape = input_ids.shape
    batch_size, length = s_shape[0], s_shape[1]
    actual_verb = examples['verb'][0].numpy().decode("utf-8")
    inflected_verb = verb_infl[actual_verb] 


    actual_verb_index = tokenizer.encode(tokenizer.tokenize(actual_verb))[1]
    inflected_verb_index = tokenizer.encode(tokenizer.tokenize(inflected_verb))[1]

    all_examples_x.append(input_ids)
    embeded_inputs = torch.autograd.Variable(model.distilbert.embeddings(input_ids), requires_grad=True)
    predictions = model(inputs_embeds=embeded_inputs)
    logits = predictions[0][0]

    
        
    probs = torch.nn.Softmax(dim=-1)(logits)
    actual_verb_score = probs[verb_position][actual_verb_index]
    inflected_verb_score = probs[verb_position][inflected_verb_index]
    
    main_diff_score = actual_verb_score - inflected_verb_score
    
    all_examples_accuracies.append(main_diff_score > 0)
    
    main_diff_score.backward()
    grads = embeded_inputs.grad
    grad_scores = abs(np.sum(grads.detach().numpy(), axis=-1))
    input_grad_scores = abs(np.sum((grads * embeded_inputs).detach().numpy(), axis=-1))
    all_examples_grads.append(grad_scores)
    all_examples_inputgrads.append(input_grad_scores)
    
    hidden_states, attentions = predictions[-2:]
    _attentions = [att.detach().numpy() for att in attentions]
    attentions_mat = np.asarray(_attentions)[:,0]

    all_examples_attentions.append(attentions_mat)
    
    # Repeating examples and replacing one token at a time with unk
    batch_size = 1
    max_len = input_ids.shape[1]
    
    # Repeat each example 'max_len' times
    x = input_ids
    extended_x = np.reshape(np.tile(x[:,None,...], (1, max_len, 1)),(-1,x.shape[-1]))

    # Create unk sequences and unk mask
    unktoken = tokenizer.encode([tokenizer.mask_token])[1]
    unks = unktoken * np.eye(max_len)
    unks =  np.tile(unks, (batch_size, 1))
    
    unk_mask =  (unktoken - unks)/unktoken
  
    # Replace one token in each repeatition with unk
    extended_x = extended_x * unk_mask + unks
    
    # Get the new output
    extended_predictions = model(torch.tensor(extended_x, dtype=torch.int64))
    extended_logits = extended_predictions[0]
    extended_probs = torch.nn.Softmax(dim=-1)(extended_logits)
    
    extended_correct_probs = extended_probs[:,verb_position,actual_verb_index]
    extended_wrong_probs =  extended_probs[:,verb_position,inflected_verb_index]
    extended_diff_scores = extended_correct_probs - extended_wrong_probs
    
    # Save the difference in the probability predicted for the correct class
    diffs = abs(main_diff_score - extended_diff_scores)

    all_examples_blankout_relevance.append(diffs.detach())
    n_batches -= 1
    if n_batches <= 0:
        break




INFO:absl:Constructing tf.data.Dataset for split validation, from ../InDist/data/word_sv_agreement/0.1.0
999it [05:59,  3.04it/s]

In [None]:
def get_raw_att_relevance(full_att_mat, input_tokens, layer=-1):
    cls_index = 0
    raw_rel = full_att_mat[layer].sum(axis=0)[cls_index]/full_att_mat[layer].sum(axis=0)[cls_index].sum()
    
    return raw_rel


def get_joint_relevance(full_att_mat, input_tokens, layer=-1, output_index=0):
    att_sum_heads =  full_att_mat.sum(axis=1)/8
    joint_attentions = compute_joint_attention(att_sum_heads, add_residual=True)
    relevance_attentions = joint_attentions[layer][output_index]
    return relevance_attentions


def get_flow_relevance(full_att_mat, input_tokens, layer):
    
    input_tokens = input_tokens
    res_att_mat = full_att_mat.sum(axis=1)/full_att_mat.shape[1]
    res_att_mat = res_att_mat + np.eye(res_att_mat.shape[1])[None,...]
    res_att_mat = res_att_mat / res_att_mat.sum(axis=-1)[...,None]

    res_adj_mat, res_labels_to_index = get_adjmat(mat=res_att_mat, input_tokens=input_tokens)
    
    A = res_adj_mat
    res_G=nx.from_numpy_matrix(A, create_using=nx.DiGraph())
    for i in np.arange(A.shape[0]):
        for j in np.arange(A.shape[1]):
            nx.set_edge_attributes(res_G, {(i,j): A[i,j]}, 'capacity')


    output_nodes = ['L'+str(layer+1)+'_0']
    input_nodes = []
    for key in res_labels_to_index:
        if res_labels_to_index[key] < full_att_mat.shape[-1]:
            input_nodes.append(key)
    
    flow_values = compute_node_flow(res_G, res_labels_to_index, input_nodes, output_nodes, length=full_att_mat.shape[-1])
    
    n_layers = full_att_mat.shape[0]
    length = full_att_mat.shape[-1]
    final_layer_attention = flow_values[(layer+1)*length:,layer*length:(layer+1)*length]
    cls_index = 0
    relevance_attention_raw = final_layer_attention[cls_index]

    return relevance_attention_raw

In [11]:
print("compute raw relevance scores ...")
all_examples_raw_relevance = {}
for l in np.arange(0,6):
    all_examples_raw_relevance[l] = []
    for i in tqdm(np.arange(len(all_examples_x))):
        tokens = tokenizer.decode(all_examples_x[i][0].numpy())
        length = len(tokens)
        attention_relevance = get_raw_att_relevance(all_examples_attentions[i][...,:length, :length], tokens, layer=l)
        all_examples_raw_relevance[l].append(np.asarray(attention_relevance))

print("compute joint relevance scores ...")
all_examples_joint_relevance = {}
for l in np.arange(0,6):
    all_examples_joint_relevance[l] = []
    for i in tqdm(np.arange(len(all_examples_x))):
        tokens = tokenizer.decode(all_examples_x[i][0].numpy())
        length = len(tokens)
        attention_relevance = get_joint_relevance(all_examples_attentions[i][...,:length, :length], tokens, layer=l)
        all_examples_joint_relevance[l].append(np.asarray(attention_relevance))
    
print("compute flow relevance scores ...")
all_examples_flow_relevance = {}
for l in np.arange(0,6):
    all_examples_flow_relevance[l] = []
    for i in tqdm(np.arange(len(all_examples_x))):
        tokens = tokenizer.decode(all_examples_x[i][0].numpy())
        length = len(tokens)
        attention_relevance = get_flow_relevance(all_examples_attentions[i][...,:length, :length], tokens, layer=l)
        all_examples_flow_relevance[l].append(np.asarray(attention_relevance))


100%|██████████| 1000/1000 [00:00<00:00, 14470.55it/s]

100%|██████████| 1000/1000 [00:00<00:00, 15242.09it/s]

  0%|          | 0/1000 [00:00<?, ?it/s][A

compute raw relevance scores ...


100%|██████████| 1000/1000 [00:00<00:00, 15334.88it/s]

100%|██████████| 1000/1000 [00:00<00:00, 15259.45it/s]

100%|██████████| 1000/1000 [00:00<00:00, 15390.02it/s]

100%|██████████| 1000/1000 [00:00<00:00, 15335.44it/s]

  0%|          | 0/1000 [00:00<?, ?it/s][A
 42%|████▎     | 425/1000 [00:00<00:00, 4247.23it/s][A

compute joint relevance scores ...



100%|██████████| 1000/1000 [00:00<00:00, 4130.59it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A
 46%|████▌     | 457/1000 [00:00<00:00, 4569.08it/s][A
100%|██████████| 1000/1000 [00:00<00:00, 4539.99it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A
 46%|████▋     | 464/1000 [00:00<00:00, 4637.15it/s][A
100%|██████████| 1000/1000 [00:00<00:00, 4584.75it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A
 47%|████▋     | 466/1000 [00:00<00:00, 4641.51it/s][A
100%|██████████| 1000/1000 [00:00<00:00, 4590.57it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A
 46%|████▋     | 464/1000 [00:00<00:00, 4631.09it/s][A
100%|██████████| 1000/1000 [00:00<00:00, 4589.91it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A
 46%|████▋     | 464/1000 [00:00<00:00, 4604.48it/s][A
100%|██████████| 1000/1000 [00:00<00:00, 4534.28it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A

compute flow relevance scores ...



  0%|          | 1/1000 [00:00<12:57,  1.28it/s][A
  0%|          | 2/1000 [00:05<34:17,  2.06s/it][A
  0%|          | 4/1000 [00:06<24:54,  1.50s/it][A
  0%|          | 5/1000 [00:06<20:05,  1.21s/it][A
  1%|          | 6/1000 [00:15<56:23,  3.40s/it][A
  1%|          | 7/1000 [00:16<47:20,  2.86s/it][A
  1%|          | 8/1000 [00:17<36:13,  2.19s/it][A
  1%|          | 10/1000 [00:18<27:22,  1.66s/it][A
  1%|          | 11/1000 [00:21<35:10,  2.13s/it][A
  1%|          | 12/1000 [00:21<25:23,  1.54s/it][A
  1%|▏         | 13/1000 [00:21<18:49,  1.14s/it][A
  1%|▏         | 14/1000 [00:22<18:14,  1.11s/it][A
  2%|▏         | 15/1000 [00:26<28:49,  1.76s/it][A
  2%|▏         | 16/1000 [00:28<33:01,  2.01s/it][A
  2%|▏         | 17/1000 [00:28<23:40,  1.45s/it][A
  2%|▏         | 18/1000 [00:32<34:57,  2.14s/it][A
  2%|▏         | 19/1000 [00:32<25:30,  1.56s/it][A
  2%|▏         | 21/1000 [00:33<19:22,  1.19s/it][A
  2%|▏         | 22/1000 [00:34<18:12,  1.12s/it][A

KeyboardInterrupt: 

In [10]:

# print(np.mean([spearmanr(all_examples_flow_relevance[i], all_examples_joint_relevance[i]) for i in np.arange(len(all_examples_x))]))
# print(np.mean([spearmanr(all_examples_flow_relevance[i], all_examples_blankout_relevance[i]) for i in np.arange(len(all_examples_x))]))




for l in np.arange(0,6):
    print("###############Layer ",l, "#############")
    print('raw blankout')
    sps = []
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_raw_relevance[l][i],all_examples_blankout_relevance[i], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    
    print('raw inputgrad')
    sps = []
    print(all_examples_raw_relevance[l][0].shape, all_examples_inputgrads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_raw_relevance[l][i],all_examples_inputgrads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('raw grad')
    sps = []
    print(all_examples_raw_relevance[l][0].shape, all_examples_grads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_raw_relevance[l][i],all_examples_grads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('joint blankout')
    sps = []
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_joint_relevance[l][i],all_examples_blankout_relevance[i])
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('joint grad')
    sps = []
    print(all_examples_joint_relevance[l][0].shape, all_examples_grads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_joint_relevance[l][i],all_examples_grads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('joint inputgrad')
    sps = []
    print(all_examples_joint_relevance[l][0].shape, all_examples_inputgrads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_joint_relevance[l][i],all_examples_inputgrads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('flow')
    sps = []
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_flow_relevance[l][i],all_examples_blankout_relevance[i])
        
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
  
    print('flow grad')
    sps = []
    print(all_examples_joint_relevance[l][0].shape, all_examples_grads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_flow_relevance[l][i],all_examples_grads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
    
    print('flow inputgrad')
    sps = []
    print(all_examples_joint_relevance[l][0].shape, all_examples_inputgrads[0][0].shape)
    for i in np.arange(len(all_examples_x)):
        sp = spearmanr(all_examples_flow_relevance[l][i],all_examples_inputgrads[i][0], axis=1)
        if not math.isnan(sp[0]):
            sps.append(sp)
        else:
            sps.append(0)
        
    print(np.mean(sps))
        

# for l in np.arange(6):
#     print("layer ",l)
#     print(all_examples_blankout_relevance[0].numpy().shape, all_examples_raw_relevance[l][0].shape, all_examples_joint_relevance[l][0].shape)
#     print('raw:',np.mean([spearmanr(all_examples_blankout_relevance[i], all_examples_blankout_relevance[i].numpy()) for i in np.arange(len(all_examples_x))]))
#     print('joint',np.mean([spearmanr(all_examples_joint_relevance[l][i], all_examples_blankout_relevance[i].numpy()) for i in np.arange(len(all_examples_x))]))
#     #print('flow',np.mean([spearmanr(all_examples_flow_relevance[l][i], all_examples_blankout_relevance[l][i]) for i in np.arange(len(all_examples_x))]))

###############Layer  0 #############
raw blankout


NameError: name 'all_examples_raw_relevance' is not defined