In [2]:
from nnsight import LanguageModel
from torch.nn import functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = LanguageModel('EleutherAI/pythia-70m-deduped', dispatch=True)

prompt = "The man"
pad_length = 10
unpadded = model.tokenizer(prompt, return_tensors="pt", padding=False).input_ids
padded = F.pad(unpadded, (0, pad_length), value=model.tokenizer.pad_token_id)

print(f'padded tensor: {padded}')
print(f'unpadded tensor: {unpadded}')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


padded tensor: tensor([[510, 637,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0]])
unpadded tensor: tensor([[510, 637]])


In [4]:
def load_submodule(model, submodule_str):
    if "." not in submodule_str:
        return getattr(model, submodule_str)
    
    submodules = submodule_str.split(".")
    curr_module = None
    for module in submodules:
        if module == "model":
            continue
        if not curr_module:
            curr_module = getattr(model, module)
            continue
        curr_module = getattr(curr_module, module)
    return curr_module

submod_name = 'model.gpt_neox.layers.5.mlp.dense_4h_to_h'
submodule = load_submodule(model, submod_name)

In [5]:
with model.invoke(padded, fwd_args={'inference' : True}) as invoker:
    mlp_act_pad = submodule.output.save()

with model.invoke(unpadded, fwd_args={'inference' : True}) as invoker:
    mlp_act_unpad = submodule.output.save()

You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [6]:
# Difference in mlp activations for token position
print(f'Difference in mlp activations for token "{model.tokenizer.decode(padded[0, 1])}" at position 1.')

mlp_abs_diff = (mlp_act_pad.value[0, 1, :] - mlp_act_unpad.value[0, 1, :]).abs().sum()
rel_diff = mlp_abs_diff / mlp_act_unpad.value[0, 1, :].sum()
print(f'shapes: {mlp_act_pad.value.shape}, {mlp_act_unpad.value.shape}')
print(f'abs diff: {mlp_abs_diff}')
print(f'rel diff: {rel_diff}')

Difference in mlp activations for token " man" at position 1.
shapes: torch.Size([1, 12, 512]), torch.Size([1, 2, 512])
abs diff: 0.0027733445167541504
rel diff: 0.0002457068476360291
