In [1]:
import pandas as pd
import transformers 
import numpy as np
import torch
import sys

sys.path.insert(0, '..')
from decompose_bert import BertForMaskedLMDecomposed


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-large-uncased")

decomposed_model = BertForMaskedLMDecomposed.from_pretrained(
    "bert-large-uncased",
    debug=False
    )


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another archit

In [3]:
verbs = pd.read_csv("all_VERBs.csv")["WORD"]
verb_ids = []

for verb in verbs.iloc:
    i = tokenizer.encode(" " + verb, add_special_tokens=False)
    if (len(i) == 1):
        verb_ids.append(i[0])

verb_ids = torch.Tensor(verb_ids).to(int)


In [4]:
number_df = pd.read_csv("nounpp.tsv", delimiter="\t")
max_length = 16

number_df["id"] = number_df["id"].apply(lambda x: int(x[2:]))

# only keep sentences that are plural/singular or singular/plural (distractor has different number)
number_df["subject_distractor_number"] = number_df["subject_distractor_number"].apply(
    lambda x: x if x == "singular_plural" or x == "plural_singular" else np.nan
)
number_df.dropna(inplace=True)

number_df["subject_number"] = number_df["subject_distractor_number"].apply(lambda x: x.split("_")[0])
number_df["distractor_number"] = number_df["subject_distractor_number"].apply(lambda x: x.split("_")[1])

number_df["verb"] = number_df["sentence"].apply(lambda x: " " + x.split(" ")[-1])
number_df["sentence"] = number_df["sentence"].apply(lambda x: " ".join(x.split(" ")[:-1]) + "[MASK]")

number_df = number_df.drop(
    columns=["subject_distractor_number"]
    ).pivot(index=["id", "subject_number", "distractor_number", "sentence"], columns=["correctness"], values=["verb"]).reset_index()


def get_token(correct_token, wrong_token):
    # drop if longer than 1 token
    if len(correct_token) > 3 or len(wrong_token) > 3:
        return np.nan

    return [correct_token[1], wrong_token[1]]

number_df[("token", "correct")] = tokenizer(number_df[("verb", "correct")].to_list())["input_ids"]
number_df[("token", "wrong")] = tokenizer(number_df[("verb", "wrong")].to_list())["input_ids"]

number_df[("verb_tokens")] = number_df.apply(
    lambda x: get_token(x[("token", "correct")], x[("token", "wrong")]),
    axis=1)

number_df[("sentence_tokens")] = tokenizer(number_df[("sentence", "")].to_list(),
                                           max_length=max_length,
                                           padding="max_length")["input_ids"]

# drop duplicates
number_df = number_df.dropna().reset_index(drop=True).drop(columns="token")
number_df


  number_df = number_df.dropna().reset_index(drop=True).drop(columns="token")


Unnamed: 0_level_0,id,subject_number,distractor_number,sentence,verb,verb,verb_tokens,sentence_tokens
correctness,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,correct,wrong,Unnamed: 7_level_1,Unnamed: 8_level_1
0,603,singular,plural,The athlete behind the cats[MASK],engages,engage,"[24255, 8526]","[101, 1996, 8258, 2369, 1996, 8870, 103, 102, ..."
1,604,singular,plural,The athlete behind the cats[MASK],remembers,remember,"[17749, 3342]","[101, 1996, 8258, 2369, 1996, 8870, 103, 102, ..."
2,605,singular,plural,The athlete behind the chairs[MASK],observes,observe,"[24451, 11949]","[101, 1996, 8258, 2369, 1996, 8397, 103, 102, ..."
3,606,singular,plural,The athlete behind the dogs[MASK],encourages,encourage,"[16171, 8627]","[101, 1996, 8258, 2369, 1996, 6077, 103, 102, ..."
4,608,singular,plural,The athlete behind the trucks[MASK],avoids,avoid,"[26777, 4468]","[101, 1996, 8258, 2369, 1996, 9322, 103, 102, ..."
...,...,...,...,...,...,...,...,...
556,1793,plural,singular,The women near the cat[MASK],avoid,avoids,"[4468, 26777]","[101, 1996, 2308, 2379, 1996, 4937, 103, 102, ..."
557,1795,plural,singular,The women near the dog[MASK],engage,engages,"[8526, 24255]","[101, 1996, 2308, 2379, 1996, 3899, 103, 102, ..."
558,1797,plural,singular,The women near the window[MASK],avoid,avoids,"[4468, 26777]","[101, 1996, 2308, 2379, 1996, 3332, 103, 102, ..."
559,1799,plural,singular,The women near the window[MASK],engage,engages,"[8526, 24255]","[101, 1996, 2308, 2379, 1996, 3332, 103, 102, ..."


In [5]:
def make_component_masks(sentence_tokens):
    # prep = [639, 13276, 583] # beside, near, behind
    prep = tokenizer.encode(' behind beside near')[1:-1]

    # always starts with 0, 133
    i = 2
    init_i = 0
    The_i = 1
    subj_i = []

    while sentence_tokens[i] not in prep:
        subj_i.append(i)
        i += 1
        
    prep_i = i
    the_i = i + 1
    distractor_id = []
    i += 2

    while sentence_tokens[i] != tokenizer.mask_token_id:
        distractor_id.append(i)
        i += 1
    
    mask_i = i
    
    # place each one in a separate array 
    component_masks = np.zeros((6, max_length))
    for n, component in enumerate([
        init_i, The_i, subj_i, prep_i, the_i, distractor_id
    ]):
        component_masks[n, component] = 1
    
    if mask_i != 6:
        print(sentence_tokens)
        print(component_masks)
        print(mask_i)
        print()
    return component_masks, mask_i


number_df["beta_mask"] = number_df["sentence_tokens"].apply(make_component_masks)


[101, 1996, 8258, 3875, 1996, 4624, 2015, 103, 102, 0, 0, 0, 0, 0, 0, 0]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
7

[101, 1996, 8258, 2379, 1996, 4624, 2015, 103, 102, 0, 0, 0, 0, 0, 0, 0]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
7

[101, 1996, 5916, 2379, 1996, 4624, 2015, 103, 102, 0, 0, 0, 0, 0, 0, 0]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 

In [6]:
def get_proportion_contribution(n):
    beta_masks = torch.tensor(number_df["beta_mask"][n][0])
    verb_tokens = torch.tensor(number_df["verb_tokens"][n])
    inputs = torch.tensor(number_df["sentence_tokens"][n]).unsqueeze(0)
    mask_i = number_df["beta_mask"][n][1]

    contribution_logits = torch.zeros((6, 2))
    # print(tokenizer.convert_ids_to_tokens(verb_tokens))

    for i, mask in enumerate(beta_masks):
        beta_mask = torch.stack([mask, 1 - mask]).unsqueeze(0)

        with torch.no_grad():
            result = decomposed_model(input_ids=inputs,
                            beta_mask=beta_mask,
                            num_contributions=2)
        result = result[0, 0, mask_i, :]

        # normalize across all verbs
        result = result - result[verb_ids].mean()
        result = result / result[verb_ids].std()

        # logits = result[verb_tokens]
        # logits = result
        # beta_z_t / z_t
        # logits = decomp_activation(logits, torch.sigmoid)
        contribution = result[verb_tokens]
        contribution_logits[i, :] = contribution

    return contribution_logits


In [7]:
contribution_logits_sp = []
contribution_logits_ps = []

for i in range(len(number_df)):
    if number_df["subject_number"][i] == "singular":
        contribution_logits_sp.append(get_proportion_contribution(i))
    else:
        contribution_logits_ps.append(get_proportion_contribution(i))
    if i % 20 == 0:
        print(i)

contribution_logits_sp = torch.stack(contribution_logits_sp)
contribution_logits_ps = torch.stack(contribution_logits_ps)


0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560


In [10]:
contribution_logits_sp.mean(0)


tensor([[ 0.4641,  0.2864],
        [ 0.4492, -0.2628],
        [ 0.2477, -0.2646],
        [ 0.2281,  0.0806],
        [ 0.2286, -0.1768],
        [-0.1822, -0.0898]])

In [11]:
contribution_logits_ps.mean(0)


tensor([[ 0.0179,  0.2272],
        [-0.2740,  0.0579],
        [-0.1277, -0.3682],
        [ 0.0993,  0.0550],
        [-0.0427,  0.1321],
        [-0.1598,  0.0112]])