In [2]:
import pandas as pd
import torch
import numpy as np
from mechir import Cat, Dot
from mechir.data import CatDataCollator, DotDataCollator, MechDataset
from transformers import ElectraTokenizer, ElectraForSequenceClassification, ElectraConfig, AutoModelForSequenceClassification, AutoTokenizer
from mechir.modelling.hooked.HookedElectra import HookedElectraForSequenceClassification
from mechir.modelling.hooked.HookedEncoder import HookedEncoder
from tqdm.notebook import tqdm


def load_cross(model_name_or_path : str):
    return Cat(model_name_or_path), CatDataCollator


def process_frame(frame):

    output = {
        'qid': [],
        'query': [],
        'docno': [],
        'text': [],
        'perturbed': [],
    }

    for row in frame.itertuples():
        output['qid'].append(row.qid)
        output['query'].append(row.query)
        output['docno'].append(row.docno)
        output['text'].append(row.text)
        output['perturbed'].append(row.perturbed_text)
    
    return pd.DataFrame(output)


model_path = 'crystina-z/monoELECTRA_LCE_nneg31'

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f46b02deca0>

In [2]:
top_k_data = pd.read_csv('/mnt/qb/work/eickhoff/esx208/MechIR/data/topk_my/crystina-z-monoELECTRA_LCE_nneg31_cross_TFC1_topk_4000.tsv', sep='\t')
top_k_data_original = pd.read_csv('/mnt/qb/work/eickhoff/esx208/MechIR/data/topk/crystina-z-monoELECTRA_LCE_nneg31_cross_TFC1_topk_4000.tsv', sep='\t')

In [3]:
top_k_data

Unnamed: 0,qid,query,docno,text,relevance,perturbed,perturbed_score,rank,original_score,score_delta,perturbed_text
0,855410,what is theraderm used for,7128710,It is used to trap sunlight for the manufactur...,0,True,5.290899,0,-7.387448,12.678347,It is used to trap sunlight for the manufactur...
1,855410,what is theraderm used for,3276027,"A mental grouping of similar things, events, a...",0,True,2.807054,30,-9.529633,12.336688,"A mental grouping of similar things, events, a..."
2,855410,what is theraderm used for,8751749,": to add moisture to (something, such as a per...",0,True,2.844434,26,-9.467996,12.312429,": to add moisture to (something, such as a per..."
3,855410,what is theraderm used for,2945339,1 used to form adjectives Resembling or imitat...,0,True,2.434965,38,-9.695806,12.130772,1 used to form adjectives Resembling or imitat...
4,583468,what carvedilol used for,129686,Bottles of eau de toilette. Eau de toilette (F...,0,True,1.873736,226,-10.083253,11.956989,Bottles of eau de toilette. Eau de toilette (F...
...,...,...,...,...,...,...,...,...,...,...,...
3995,405163,is caffeine an narcotic,7339772,Combination or cocktail drugs that contain a c...,3,True,-4.797085,258,-4.803654,0.006569,Combination or cocktail drugs that contain a c...
3996,324585,how much money do motivational speakers make,405281,Fee structures for more experienced and profes...,3,True,2.972391,8,2.965918,0.006473,Fee structures for more experienced and profes...
3997,1132532,average annual income data analyst,6117745,What Data Analysts Make. Since the field of da...,3,True,2.125537,69,2.119307,0.006230,What Data Analysts Make. Since the field of da...
3998,1113256,what is reba mcentire's net worth,7561111,Reba McEntire is a popular American singer wit...,3,True,5.698447,7,5.692532,0.005915,Reba McEntire is a popular American singer wit...


In [4]:
model, collator = load_cross(model_path)
processed_frame = process_frame(top_k_data)

dataset = MechDataset(processed_frame, pre_perturbed=True)
collator = collator(model.tokenizer, pre_perturbed=True)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, collate_fn=collator)

If using BERT for interpretability research, keep in mind that BERT has some significant architectural differences to GPT. For example, LayerNorms are applied *after* the attention and MLP components, meaning that the last LayerNorm in a block cannot be folded.


Moving model to device:  cuda
Loaded pretrained model crystina-z/monoELECTRA_LCE_nneg31 into HookedTransformer


In [5]:
for batch in tqdm(dataloader):
    sequences = batch["sequences"]
    perturbed_sequences = batch["perturbed_sequences"]

    # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
    logits_original = model._forward(sequences['input_ids'], sequences['attention_mask'])
    logits_perturbed = model._forward(perturbed_sequences['input_ids'], perturbed_sequences['attention_mask'])
    print(logits_original, logits_perturbed)

  0%|          | 0/4000 [00:00<?, ?it/s]

tensor([[ 0.6007, -8.0836]], device='cuda:0') tensor([[-0.0714,  4.9900]], device='cuda:0')
tensor([[ 0.6595, -9.7391]], device='cuda:0') tensor([[-0.0164,  2.2244]], device='cuda:0')
tensor([[ 0.6405, -9.1719]], device='cuda:0') tensor([[-0.0137,  2.2152]], device='cuda:0')
tensor([[ 0.6579, -9.3539]], device='cuda:0') tensor([[-0.0051,  1.9446]], device='cuda:0')
tensor([[ 0.6588, -9.7837]], device='cuda:0') tensor([[0.0097, 1.8157]], device='cuda:0')
tensor([[ 0.6227, -8.9066]], device='cuda:0') tensor([[-5.3013e-04,  2.0975e+00]], device='cuda:0')
tensor([[  0.6547, -10.2642]], device='cuda:0') tensor([[0.0301, 0.9710]], device='cuda:0')
tensor([[  0.6671, -10.9238]], device='cuda:0') tensor([[0.0676, 0.3069]], device='cuda:0')
tensor([[ 0.6405, -9.6066]], device='cuda:0') tensor([[0.0128, 1.6630]], device='cuda:0')
tensor([[  0.6747, -10.2596]], device='cuda:0') tensor([[0.0399, 0.9523]], device='cuda:0')
tensor([[ 0.6504, -9.9952]], device='cuda:0') tensor([[0.0500, 0.6680]], dev

KeyboardInterrupt: 

In [6]:
model_hf = ElectraForSequenceClassification.from_pretrained(model_path)
model_hf.eval();

In [7]:
for batch in tqdm(dataloader):
    sequences = batch["sequences"]
    perturbed_sequences = batch["perturbed_sequences"]

    # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
    logits_original = model_hf(sequences['input_ids'], sequences['attention_mask'])
    logits_perturbed = model_hf(perturbed_sequences['input_ids'], perturbed_sequences['attention_mask'])
    print(logits_original, logits_perturbed)

  0%|          | 0/4000 [00:00<?, ?it/s]

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6007, -8.0835]]), hidden_states=None, attentions=None) SequenceClassifierOutput(loss=None, logits=tensor([[-0.0714,  4.9900]]), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6595, -9.7391]]), hidden_states=None, attentions=None) SequenceClassifierOutput(loss=None, logits=tensor([[-0.0164,  2.2244]]), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6405, -9.1719]]), hidden_states=None, attentions=None) SequenceClassifierOutput(loss=None, logits=tensor([[-0.0137,  2.2152]]), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6579, -9.3539]]), hidden_states=None, attentions=None) SequenceClassifierOutput(loss=None, logits=tensor([[-0.0051,  1.9446]]), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6588, -9.7837]]), hidden_states=None, attentions=None) SequenceCl

KeyboardInterrupt: 

In [46]:
model_hf_auto = AutoModelForSequenceClassification.from_pretrained(model_path).to('cuda')
model_hf_auto.eval();

In [None]:
# for batch in tqdm(dataloader):
#     sequences = batch["sequences"]
#     perturbed_sequences = batch["perturbed_sequences"]

#     # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
#     logits_original = model_hf_auto(sequences['input_ids'], sequences['attention_mask'])
#     logits_perturbed = model_hf_auto(perturbed_sequences['input_ids'], perturbed_sequences['attention_mask'])
#     print(logits_original, logits_perturbed)

In [12]:
hooked_electra = HookedElectraForSequenceClassification.from_pretrained(model_path, hf_model=model_hf_auto)
hooked_electra.eval();

If using BERT for interpretability research, keep in mind that BERT has some significant architectural differences to GPT. For example, LayerNorms are applied *after* the attention and MLP components, meaning that the last LayerNorm in a block cannot be folded.


Moving model to device:  cuda
Loaded pretrained model crystina-z/monoELECTRA_LCE_nneg31 into HookedTransformer


In [13]:
# for batch in tqdm(dataloader):
#     sequences = batch["sequences"]
#     perturbed_sequences = batch["perturbed_sequences"]

#     # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
#     logits_original = hooked_electra(sequences['input_ids'], sequences['attention_mask'])
#     logits_perturbed = hooked_electra(perturbed_sequences['input_ids'], perturbed_sequences['attention_mask'])
#     print(logits_original, logits_perturbed)

In [14]:
hooked_electra_encoder = HookedEncoder.from_pretrained(model_path, hf_model=model_hf_auto)
hooked_electra_encoder.eval();

If using BERT for interpretability research, keep in mind that BERT has some significant architectural differences to GPT. For example, LayerNorms are applied *after* the attention and MLP components, meaning that the last LayerNorm in a block cannot be folded.


Moving model to device:  cuda
Loaded pretrained model crystina-z/monoELECTRA_LCE_nneg31 into HookedEncoder


In [29]:
# for batch in tqdm(dataloader):
#     sequences = batch["sequences"]
#     perturbed_sequences = batch["perturbed_sequences"]

#     # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
#     logits_original = hooked_electra_encoder.forward(input = sequences['input_ids'], one_zero_attention_mask = sequences['attention_mask'], return_type='embeddings').squeeze()[0]
#     logits_perturbed = hooked_electra_encoder.forward(input = perturbed_sequences['input_ids'], one_zero_attention_mask = perturbed_sequences['attention_mask'], return_type='embeddings').squeeze()[0]
    
#     print(logits_original, logits_perturbed)

In [16]:
for batch in tqdm(dataloader):
    sequences = batch["sequences"]
    perturbed_sequences = batch["perturbed_sequences"]

    # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
    hidden_states_original_hf = model_hf_auto.forward(input_ids = sequences['input_ids'], attention_mask = sequences['attention_mask'], output_hidden_states=True).hidden_states[-1].cpu()
    hidden_states_hooked = hooked_electra_encoder.forward(input = sequences['input_ids'], one_zero_attention_mask = sequences['attention_mask'], return_type='embeddings').cpu()
    
    print(torch.norm(hidden_states_original_hf - hidden_states_hooked))
    # logits_perturbed = model_hf_auto.forward(input = perturbed_sequences['input_ids'], attention_mask = perturbed_sequences['attention_mask'], output_hidden_states=True)
    
    # print(logits_original, logits_perturbed)
    # print(logits_original.hidden_states[-1].shape)

  0%|          | 0/4000 [00:00<?, ?it/s]

tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0004)
tensor(0.0003)
tensor(0.0003)
tensor(0.0009)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)
tensor(0.0004)
tensor(0.0003)
tensor(0.0004)
tensor(0.0003)
tensor(0.0003)
tensor(0.0003)


KeyboardInterrupt: 

In [17]:
hidden_states_original_hf

tensor([[[-1.0663,  0.3791, -0.3446,  ...,  0.6238, -0.4696,  0.3691],
         [-1.3517,  0.2179, -0.2736,  ...,  0.2014, -0.6880,  0.0859],
         [-1.2336,  0.1570, -0.3159,  ...,  0.2386, -0.6920,  0.0036],
         ...,
         [-0.1231, -0.0299, -0.6617,  ...,  0.2959,  0.1174, -0.1434],
         [-0.1104, -0.0297, -0.6483,  ...,  0.3024,  0.1304, -0.1497],
         [-0.1025, -0.0275, -0.6361,  ...,  0.3083,  0.1314, -0.1626]]])

In [18]:
hidden_states_hooked

tensor([[[-1.0663,  0.3791, -0.3446,  ...,  0.6238, -0.4696,  0.3691],
         [-1.3517,  0.2179, -0.2736,  ...,  0.2014, -0.6880,  0.0859],
         [-1.2336,  0.1570, -0.3159,  ...,  0.2386, -0.6920,  0.0036],
         ...,
         [-0.1231, -0.0299, -0.6617,  ...,  0.2959,  0.1174, -0.1434],
         [-0.1104, -0.0297, -0.6483,  ...,  0.3024,  0.1304, -0.1497],
         [-0.1025, -0.0275, -0.6361,  ...,  0.3083,  0.1314, -0.1626]]])

In [22]:
hooked_electra.classifier(hidden_states_hooked.to('cuda')[:, 0, :])

tensor([[ 0.0528, -0.0423]], device='cuda:0')

In [26]:
model_hf_auto.classifier(hidden_states_hooked)

tensor([[ 0.6576, -9.9945]])

In [None]:
hooked_electra.classifier.dense.W

Parameter containing:
tensor([[ 0.0188, -0.0108,  0.0167,  ...,  0.0180, -0.0255, -0.0335],
        [ 0.0043,  0.0038,  0.0112,  ...,  0.0042,  0.0247, -0.0072],
        [ 0.0017, -0.0154, -0.0111,  ...,  0.0068, -0.0113,  0.0100],
        ...,
        [ 0.0045,  0.0078,  0.0109,  ..., -0.0085, -0.0171,  0.0083],
        [-0.0024,  0.0038, -0.0095,  ...,  0.0082, -0.0316, -0.0142],
        [-0.0014, -0.0061,  0.0148,  ..., -0.0226,  0.0201,  0.0098]],
       device='cuda:0', requires_grad=True)

In [36]:
hooked_electra.classifier.dense.b

Parameter containing:
tensor([ 4.3934e-03,  5.1398e-03,  6.4082e-03,  7.8073e-03,  2.2200e-03,
         2.3627e-03,  1.7209e-03, -1.3693e-03, -3.0374e-04,  3.3524e-03,
         2.8690e-03,  2.1403e-03,  1.6699e-03,  4.7163e-03,  3.8768e-03,
         4.6608e-03,  5.5910e-03,  3.7025e-03,  5.7490e-03,  3.9949e-03,
         7.5185e-03,  4.1993e-03,  4.4770e-03,  4.4284e-03,  5.2260e-04,
         5.5115e-03,  5.4868e-03,  8.5865e-04,  3.0334e-03,  2.5486e-03,
         3.0714e-03,  2.2496e-03,  2.7913e-03,  3.4346e-03,  3.5172e-03,
         4.6498e-03,  7.4464e-03,  6.1305e-03,  3.5923e-03,  9.9712e-05,
         3.1721e-03,  4.1063e-03,  6.0009e-03,  3.5450e-03,  4.3172e-03,
         3.6703e-03,  3.4783e-03,  3.7992e-03,  1.9989e-03,  2.8690e-03,
         4.3205e-03,  4.2617e-03,  3.6126e-05,  6.6863e-04,  1.8781e-03,
         3.1615e-03,  3.1901e-03,  3.5194e-03,  4.5127e-03,  4.7579e-03,
         4.1746e-03, -1.2534e-03,  1.0846e-03,  4.1976e-03,  5.2986e-03,
         4.8863e-03, -6.6672e

In [38]:
hooked_electra.classifier.out_proj.W

Parameter containing:
tensor([[-0.0011, -0.0206],
        [ 0.0122,  0.0090],
        [ 0.0002, -0.0078],
        ...,
        [ 0.0024,  0.0060],
        [ 0.0355, -0.0067],
        [ 0.0297,  0.0204]], device='cuda:0', requires_grad=True)

In [39]:
hooked_electra.classifier.out_proj.b

Parameter containing:
tensor([0.0000, 0.0018], device='cuda:0', requires_grad=True)

In [35]:
model_hf_auto.classifier.dense.weight

Parameter containing:
tensor([[ 0.0188, -0.0108,  0.0167,  ...,  0.0180, -0.0255, -0.0335],
        [ 0.0043,  0.0038,  0.0112,  ...,  0.0042,  0.0247, -0.0072],
        [ 0.0017, -0.0154, -0.0111,  ...,  0.0068, -0.0113,  0.0100],
        ...,
        [ 0.0045,  0.0078,  0.0109,  ..., -0.0085, -0.0171,  0.0083],
        [-0.0024,  0.0038, -0.0095,  ...,  0.0082, -0.0316, -0.0142],
        [-0.0014, -0.0061,  0.0148,  ..., -0.0226,  0.0201,  0.0098]])

In [37]:
model_hf_auto.classifier.dense.bias

Parameter containing:
tensor([ 4.3934e-03,  5.1398e-03,  6.4082e-03,  7.8073e-03,  2.2200e-03,
         2.3627e-03,  1.7209e-03, -1.3693e-03, -3.0374e-04,  3.3524e-03,
         2.8690e-03,  2.1403e-03,  1.6699e-03,  4.7163e-03,  3.8768e-03,
         4.6608e-03,  5.5910e-03,  3.7025e-03,  5.7490e-03,  3.9949e-03,
         7.5185e-03,  4.1993e-03,  4.4770e-03,  4.4284e-03,  5.2260e-04,
         5.5115e-03,  5.4868e-03,  8.5865e-04,  3.0334e-03,  2.5486e-03,
         3.0714e-03,  2.2496e-03,  2.7913e-03,  3.4346e-03,  3.5172e-03,
         4.6498e-03,  7.4464e-03,  6.1305e-03,  3.5923e-03,  9.9712e-05,
         3.1721e-03,  4.1063e-03,  6.0009e-03,  3.5450e-03,  4.3172e-03,
         3.6703e-03,  3.4783e-03,  3.7992e-03,  1.9989e-03,  2.8690e-03,
         4.3205e-03,  4.2617e-03,  3.6126e-05,  6.6863e-04,  1.8781e-03,
         3.1615e-03,  3.1901e-03,  3.5194e-03,  4.5127e-03,  4.7579e-03,
         4.1746e-03, -1.2534e-03,  1.0846e-03,  4.1976e-03,  5.2986e-03,
         4.8863e-03, -6.6672e

In [40]:
model_hf_auto.classifier.out_proj.weight

Parameter containing:
tensor([[-0.0011,  0.0122,  0.0002,  ...,  0.0024,  0.0355,  0.0297],
        [-0.0206,  0.0090, -0.0078,  ...,  0.0060, -0.0067,  0.0204]])

In [41]:
model_hf_auto.classifier.out_proj.bias

Parameter containing:
tensor([0.0000, 0.0018])

In [49]:
model_hf_auto.classifier.dense(hidden_states_hooked[:, 0, :].to('cuda')).shape

torch.Size([1, 768])

In [50]:
hooked_electra.classifier.dense(hidden_states_hooked[:, 0, :].to('cuda')).shape

torch.Size([1, 768])

In [51]:
torch.norm(model_hf_auto.classifier.dense(hidden_states_hooked[:, 0, :].to('cuda')) - hooked_electra.classifier.dense(hidden_states_hooked[:, 0, :].to('cuda')))

tensor(32.2623, device='cuda:0')

In [57]:
model_hf_auto.classifier.dense.weight

Parameter containing:
tensor([[ 0.0188, -0.0108,  0.0167,  ...,  0.0180, -0.0255, -0.0335],
        [ 0.0043,  0.0038,  0.0112,  ...,  0.0042,  0.0247, -0.0072],
        [ 0.0017, -0.0154, -0.0111,  ...,  0.0068, -0.0113,  0.0100],
        ...,
        [ 0.0045,  0.0078,  0.0109,  ..., -0.0085, -0.0171,  0.0083],
        [-0.0024,  0.0038, -0.0095,  ...,  0.0082, -0.0316, -0.0142],
        [-0.0014, -0.0061,  0.0148,  ..., -0.0226,  0.0201,  0.0098]],
       device='cuda:0', requires_grad=True)

In [55]:
hooked_electra.classifier.dense.W

Parameter containing:
tensor([[ 0.0188, -0.0108,  0.0167,  ...,  0.0180, -0.0255, -0.0335],
        [ 0.0043,  0.0038,  0.0112,  ...,  0.0042,  0.0247, -0.0072],
        [ 0.0017, -0.0154, -0.0111,  ...,  0.0068, -0.0113,  0.0100],
        ...,
        [ 0.0045,  0.0078,  0.0109,  ..., -0.0085, -0.0171,  0.0083],
        [-0.0024,  0.0038, -0.0095,  ...,  0.0082, -0.0316, -0.0142],
        [-0.0014, -0.0061,  0.0148,  ..., -0.0226,  0.0201,  0.0098]],
       device='cuda:0', requires_grad=True)

In [59]:
hidden_states_hooked[:, 0, :].to('cuda') @ model_hf_auto.classifier.dense.weight.T + model_hf_auto.classifier.dense.bias

tensor([[ 1.5833, -1.2568,  1.7047,  1.9894, -0.8320, -0.9838,  1.0394,  0.6531,
         -0.7329, -0.6433,  0.3173, -1.2022, -0.6212,  1.5306,  1.1218,  1.3165,
          1.8299,  1.5401, -1.2019,  1.4749,  1.7011, -0.8879, -1.3039,  1.4089,
         -0.9930,  1.4498,  1.6232, -0.6630, -0.8389, -0.4851, -0.8344,  0.9807,
          1.2160,  1.2959, -0.5652, -1.0649, -1.1156,  1.8938, -1.0564, -0.7823,
          1.3016, -0.8889,  1.6937, -0.8264, -1.0309, -1.1337, -1.1236,  1.8256,
         -0.9242, -0.7022, -1.0137,  1.4451,  1.2932,  1.0421,  0.8455, -0.8808,
          1.0303, -1.0192,  1.1962, -1.2161,  1.5343, -0.6149,  0.8130,  1.5367,
          1.4110, -0.7818, -0.4916,  2.0116,  1.3464,  1.5073, -0.6757,  1.6220,
          1.1449, -0.5913, -0.6790,  1.9272,  1.8581, -0.4916,  1.4508, -1.0788,
          0.7904,  1.5061,  1.5060, -0.6371, -0.8249, -0.9102,  1.3436, -1.0804,
         -0.7801, -0.4121, -1.0401, -0.8753, -0.3712, -1.0995, -0.7528,  1.5861,
         -0.7180,  1.3870, -

In [62]:
hidden_states_hooked[:, 0, :].to('cuda') @ hooked_electra.classifier.dense.W + hooked_electra.classifier.dense.b

tensor([[ 9.5852e-03, -8.0672e-02, -2.8371e-01,  2.7265e-01, -1.2491e-02,
         -5.6396e-01, -6.5331e-02, -4.1403e-01, -4.8150e-01,  2.2235e-01,
          2.4703e-01,  3.6307e-01,  3.4036e-01,  1.7205e-01,  3.2924e-01,
          2.3459e-01,  3.2488e-01,  2.1217e-01,  6.1755e-01, -7.4384e-02,
          5.6790e-02, -4.2148e-01,  2.5461e-01,  2.8157e-01, -1.1352e-02,
          2.1818e-01,  2.2884e-01,  6.6591e-01,  4.0139e-01, -1.5352e-01,
         -8.9636e-02,  1.9006e-01,  3.6362e-01, -1.9753e-01, -1.5544e-01,
          1.3740e-01, -3.6583e-01,  8.1561e-03, -2.5118e-01,  8.6504e-01,
          1.2900e-01,  2.7474e-02,  2.6120e-01, -1.1029e-02,  2.9492e-01,
          2.5391e-01, -8.6349e-01,  3.3272e-01, -2.4785e-01, -6.1354e-01,
          5.3755e-01, -2.6374e-01, -3.7037e-01,  4.6481e-01,  6.4079e-02,
          7.9898e-01, -3.6733e-01,  1.3056e-01, -2.3809e-01, -2.4169e-01,
          5.9942e-02, -5.2867e-02,  9.3116e-02,  4.6351e-01,  3.3255e-01,
         -4.2009e-01, -1.9807e-02,  1.

Let's now test why scores from pyterrier_dr are different from the scores I obtained myself

In [8]:
from pyterrier_dr import HgfBiEncoder, ElectraScorer, SimFn

In [43]:
model_pyterrier = ElectraScorer(model_path, batch_size=1, verbose=True, device='cuda')



In [22]:
for batch in tqdm(dataloader):
    sequences = batch["sequences"]
    perturbed_sequences = batch["perturbed_sequences"]
    
    print(model.tokenizer.batch_decode(sequences['input_ids']))

    # patch_head_out = model(sequences, perturbed_sequences, patch_type="head_all")
    logits_original = model._forward(sequences['input_ids'], sequences['attention_mask'])
    logits_perturbed = model._forward(perturbed_sequences['input_ids'], perturbed_sequences['attention_mask'])
    print(logits_original, logits_perturbed)

  0%|          | 0/4000 [00:00<?, ?it/s]

['[CLS] what is theraderm used for [SEP] it is used to trap sunlight for the manufacturing of food. a a a [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] 

KeyboardInterrupt: 

In [44]:
model_pyterrier.transform(top_k_data.head(10)).sort_index()

ELECTRA scoring: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 113.77record/s]


Unnamed: 0,qid,query,docno,text,relevance,perturbed,perturbed_score,original_score,score_delta,perturbed_text,score,rank
0,855410,what is theraderm used for,7128710,It is used to trap sunlight for the manufactur...,0,True,5.290899,-7.387448,12.678347,It is used to trap sunlight for the manufactur...,-7.38745,0
1,855410,what is theraderm used for,3276027,"A mental grouping of similar things, events, a...",0,True,2.807054,-9.529633,12.336688,"A mental grouping of similar things, events, a...",-9.529635,2
2,855410,what is theraderm used for,8751749,": to add moisture to (something, such as a per...",0,True,2.844434,-9.467996,12.312429,": to add moisture to (something, such as a per...",-9.467994,1
3,855410,what is theraderm used for,2945339,1 used to form adjectives Resembling or imitat...,0,True,2.434965,-9.695806,12.130772,1 used to form adjectives Resembling or imitat...,-9.69581,3
4,583468,what carvedilol used for,129686,Bottles of eau de toilette. Eau de toilette (F...,0,True,1.873736,-10.083253,11.956989,Bottles of eau de toilette. Eau de toilette (F...,-10.08325,2
5,583468,what carvedilol used for,129688,Eau de toilette. Eau de toilette (French: [o d...,0,True,2.034735,-9.781629,11.816364,Eau de toilette. Eau de toilette (French: [o d...,-9.781631,1
6,855410,what is theraderm used for,5708492,Plants use food to make food and oxygen. The p...,0,True,1.293939,-10.460969,11.754909,Plants use food to make food and oxygen. The p...,-10.46097,4
7,583468,what carvedilol used for,752932,"Heart failure (HF), often referred to as conge...",0,True,0.484719,-11.17853,11.663249,"Heart failure (HF), often referred to as conge...",-11.17853,3
8,583468,what carvedilol used for,7203107,Angiotensin II receptor blockers (ARBs) are a ...,0,True,1.982978,-9.657723,11.640701,Angiotensin II receptor blockers (ARBs) are a ...,-9.657728,0
9,855410,what is theraderm used for,941543,The ability of computer systems to exchange in...,0,True,1.022547,-10.617761,11.640307,The ability of computer systems to exchange in...,-10.617762,5


In [45]:
top_k_data_perturbed_text_field = top_k_data.copy()
top_k_data_perturbed_text_field['text'] = top_k_data_perturbed_text_field['perturbed_text']
model_pyterrier.transform(top_k_data_perturbed_text_field.head(10)).sort_index()

ELECTRA scoring: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 125.38record/s]


Unnamed: 0,qid,query,docno,text,relevance,perturbed,perturbed_score,original_score,score_delta,perturbed_text,score,rank
0,855410,what is theraderm used for,7128710,It is used to trap sunlight for the manufactur...,0,True,5.290899,-7.387448,12.678347,It is used to trap sunlight for the manufactur...,5.290899,0
1,855410,what is theraderm used for,3276027,"A mental grouping of similar things, events, a...",0,True,2.807054,-9.529633,12.336688,"A mental grouping of similar things, events, a...",2.807057,2
2,855410,what is theraderm used for,8751749,": to add moisture to (something, such as a per...",0,True,2.844434,-9.467996,12.312429,": to add moisture to (something, such as a per...",2.844433,1
3,855410,what is theraderm used for,2945339,1 used to form adjectives Resembling or imitat...,0,True,2.434965,-9.695806,12.130772,1 used to form adjectives Resembling or imitat...,2.434964,3
4,583468,what carvedilol used for,129686,Bottles of eau de toilette. Eau de toilette (F...,0,True,1.873736,-10.083253,11.956989,Bottles of eau de toilette. Eau de toilette (F...,1.873736,2
5,583468,what carvedilol used for,129688,Eau de toilette. Eau de toilette (French: [o d...,0,True,2.034735,-9.781629,11.816364,Eau de toilette. Eau de toilette (French: [o d...,2.034734,0
6,855410,what is theraderm used for,5708492,Plants use food to make food and oxygen. The p...,0,True,1.293939,-10.460969,11.754909,Plants use food to make food and oxygen. The p...,1.293938,4
7,583468,what carvedilol used for,752932,"Heart failure (HF), often referred to as conge...",0,True,0.484719,-11.17853,11.663249,"Heart failure (HF), often referred to as conge...",0.48472,3
8,583468,what carvedilol used for,7203107,Angiotensin II receptor blockers (ARBs) are a ...,0,True,1.982978,-9.657723,11.640701,Angiotensin II receptor blockers (ARBs) are a ...,1.982975,1
9,855410,what is theraderm used for,941543,The ability of computer systems to exchange in...,0,True,1.022547,-10.617761,11.640307,The ability of computer systems to exchange in...,1.022549,5


In [47]:
electra_tokenizer = AutoTokenizer.from_pretrained('google/electra-base-discriminator')



In [48]:
tokenized = electra_tokenizer(top_k_data_perturbed_text_field.head(10)['query'].values.tolist(), top_k_data_perturbed_text_field.head(10)['text'].values.tolist(), return_tensors='pt', padding=True, truncation=True)

In [49]:
electra_tokenizer.batch_decode(tokenized.input_ids)

['[CLS] what is theraderm used for [SEP] it is used to trap sunlight for the manufacturing of food. theraderm [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] what is theraderm used for [SEP] a mental grouping of similar things, events, and people that is used to remember and understand what things are, what they mean, and what categories or groups they belong to. theraderm [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 "[CLS] what is theraderm used for [SEP] : to add moisture to ( so

In [50]:
model._forward(tokenized.input_ids, tokenized.attention_mask)[:, 1]

tensor([4.9900, 2.2244, 2.2152, 1.9446, 1.8157, 2.0975, 0.9710, 0.3069, 1.6630,
        0.9523], device='cuda:0')

In [73]:
model_hf(tokenized.input_ids.to('cuda'), tokenized.attention_mask.to('cuda')).logits[:, 1].cpu().detach().numpy()

array([4.990023  , 2.2244287 , 2.2151794 , 1.9446223 , 1.8156813 ,
       2.097502  , 0.97096324, 0.30694097, 1.6629645 , 0.9523069 ],
      dtype=float32)

In [77]:
tokenized['token_type_ids']

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

: 

In [74]:
tokenized_cuda = {k: v.to('cuda') for k, v in tokenized.items()}
model_hf(**tokenized_cuda).logits[:, 1].cpu().detach().numpy()

array([5.2908993 , 2.807056  , 2.844432  , 2.434963  , 1.8737373 ,
       2.0347333 , 1.29394   , 0.48471978, 1.9829744 , 1.022547  ],
      dtype=float32)

In [62]:
import more_itertools

inp = top_k_data_perturbed_text_field.head(10).copy()
batch_size = 10
tokeniser = AutoTokenizer.from_pretrained('google/electra-base-discriminator')
device = 'cuda'
model = ElectraForSequenceClassification.from_pretrained('crystina-z/monoELECTRA_LCE_nneg31').eval().to(device)

scores = []
it = inp[['query', 'text']].itertuples(index=False)
with torch.no_grad():
    for chunk in more_itertools.chunked(it, batch_size):
        queries, texts = map(list, zip(*chunk))
        
        #print(queries, texts)
        
        inps = tokeniser(queries, texts, return_tensors='pt', padding=True, truncation=True)
        
        print(inps)
        
        print(tokeniser.batch_decode(inps['input_ids']))
        
        inps = {k: v.to(device) for k, v in inps.items()}
        # scores.append(model(**inps).logits[:, 1].cpu().detach().numpy())
        
        print(model(**inps).logits[:, 1].cpu().detach().numpy())
        print(model_hf(**inps).logits[:, 1].cpu().detach().numpy())
        
res = inp.assign(score=np.concatenate(scores))
res = res.sort_values(['qid', 'rank'])

res



['what is theraderm used for', 'what is theraderm used for', 'what is theraderm used for', 'what is theraderm used for', 'what carvedilol used for', 'what carvedilol used for', 'what is theraderm used for', 'what carvedilol used for', 'what carvedilol used for', 'what is theraderm used for'] ['It is used to trap sunlight for the manufacturing of food. theraderm', 'A mental grouping of similar things, events, and people that is used to remember and understand what things are, what they mean, and what categories or groups they belong to. theraderm', ": to add moisture to (something, such as a person's skin) theraderm", '1 used to form adjectives Resembling or imitating what is indicated: blitzoid/ cheesoid/ technoid/ zomboid.  used to form nouns Something resembling or imitating what is indicated: flakoid/ fusionoid/ Grouchoid/ klutzoid •This. theraderm', 'Bottles of eau de toilette. Eau de toilette (French: [o d(ə) twalɛt]) or toilet water is a lightly scented cologne used as a skin fre

ValueError: need at least one array to concatenate

In [4]:
patched_output = np.load('/mnt/qb/work/eickhoff/esx208/MechIR/data/patching_unnormalised/crystina-z-monoELECTRA_LCE_nneg31_cross_TFC1_4000_batch_size_20_patch_head.npy')
original_scores = np.load('/mnt/qb/work/eickhoff/esx208/MechIR/data/patching_unnormalised/crystina-z-monoELECTRA_LCE_nneg31_cross_TFC1_4000_batch_size_20_original_scores.npy')
perturbed_scores = np.load('/mnt/qb/work/eickhoff/esx208/MechIR/data/patching_unnormalised/crystina-z-monoELECTRA_LCE_nneg31_cross_TFC1_4000_batch_size_20_perturbed_scores.npy')

In [5]:
original_scores

array([ -8.083547 ,  -9.739095 ,  -9.171948 ,  -9.353897 ,  -9.783665 ,
        -8.906588 , -10.26425  , -10.923784 ,  -9.606563 , -10.259574 ,
        -9.995203 ,  -7.9573426,  -9.524867 , -10.705594 ,  -8.910649 ,
       -10.261069 ,  -8.192773 , -10.314049 ,  -9.618399 ,  -9.729961 ,
       -10.041317 ,  -8.41393  , -10.568574 ,  -8.703193 ,  -8.401725 ,
        -9.89158  ,  -9.380322 ,  -9.223088 , -10.513544 ,  -7.60634  ],
      dtype=float32)

In [6]:
perturbed_scores

array([4.9900246 , 2.2244265 , 2.2151825 , 1.9446224 , 1.815684  ,
       2.0975008 , 0.9709625 , 0.30694348, 1.6629642 , 0.952305  ,
       0.6680316 , 3.1431413 , 1.8378351 , 0.5786478 , 2.0614436 ,
       0.72059363, 2.5849004 , 0.24436995, 0.72003436, 1.3643235 ,
       1.1002392 , 2.4727538 , 0.38907465, 2.2115638 , 2.3947275 ,
       0.6379521 , 1.0395538 , 0.8764454 , 0.52425456, 3.337343  ],
      dtype=float32)

In [7]:
patched_output

array([[[ -8.190526 ,  -8.078038 ,  -8.147719 , ...,  -8.317542 ,
          -7.584893 ,  -8.143536 ],
        [ -8.167595 ,  -8.049901 ,  -8.069824 , ...,  -8.078355 ,
          -8.069011 ,  -8.08879  ],
        [ -8.042445 ,  -8.1173   ,  -8.072074 , ...,  -8.074381 ,
          -8.100089 ,  -8.0864525],
        ...,
        [ -8.130076 ,  -7.899965 ,  -8.25861  , ...,  -8.07823  ,
          -8.061559 ,  -8.162735 ],
        [ -7.2911386,  -6.9961653,  -4.8896236, ...,  -7.8796406,
          -8.389031 ,  -7.8989377],
        [ -6.7378535,  -7.2101526,  -6.921031 , ...,  -6.833537 ,
          -6.69362  ,  -6.758792 ]],

       [[ -9.782081 ,  -9.692332 ,  -9.76707  , ...,  -9.7749605,
          -9.614614 ,  -9.725783 ],
        [ -9.816423 ,  -9.707149 ,  -9.732882 , ...,  -9.73869  ,
          -9.73808  ,  -9.743195 ],
        [ -9.745952 ,  -9.745925 ,  -9.751831 , ...,  -9.723369 ,
          -9.691272 ,  -9.736196 ],
        ...,
        [ -9.772766 ,  -9.515644 ,  -9.598536 , ...,  