Installing an environment like conda is recommended. This notebook last ran on Python 3.8.18 without issues.

In [2]:
!pip install --upgrade accelerate transformers



In [3]:
from transformers import AutoTokenizer, DistilBertModel
import torch

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 17.5kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 540kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 4.79MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 10.5MB/s]
Downloading model.safetensors: 100%|██████████| 268M/268M [00:15<00:00, 17.5MB/s] 


In [41]:
inputs = tokenizer("We can fish in this tuna factory. On my boat, we can fish with rods. We can buy fish at the market.", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

In [42]:
print(inputs)
print(outputs)
print(last_hidden_states.shape)   # last_hidden_states = outputs.last_hidden_state

{'input_ids': tensor([[  101,  2057,  2064,  3869,  1999,  2023, 24799,  4713,  1012,  2006,
          2026,  4049,  1010,  2057,  2064,  3869,  2007, 19485,  1012,  2057,
          2064,  4965,  3869,  2012,  1996,  3006,  1012,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1]])}
BaseModelOutput(last_hidden_state=tensor([[[ 0.3900, -0.1034, -0.0069,  ..., -0.1726,  0.6129,  0.1126],
         [ 0.5231, -0.2701, -0.4278,  ...,  0.0057,  1.2746, -0.3986],
         [ 0.6302,  0.1093,  0.0641,  ..., -0.5545,  0.3978,  0.0500],
         ...,
         [ 0.5518, -0.3305,  0.2988,  ..., -0.0380,  0.1218, -0.7721],
         [ 0.7416, -0.2597, -0.3573,  ...,  0.1659, -0.0477, -0.6666],
         [ 0.5474,  0.1444,  0.1064,  ..., -0.1008,  0.1836, -0.4818]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)
torch.Size([1, 28, 768])


In [43]:
print("Length of inputs string: ", inputs['input_ids'].shape)

# print index, token, and matching token id
for i in range(len(inputs['input_ids'][0])):
    print(f'{i}: {inputs["input_ids"][0][i]}: {tokenizer.decode([inputs["input_ids"][0][i]])}')

Length of inputs string:  torch.Size([1, 28])
0: 101: [CLS]
1: 2057: we
2: 2064: can
3: 3869: fish
4: 1999: in
5: 2023: this
6: 24799: tuna
7: 4713: factory
8: 1012: .
9: 2006: on
10: 2026: my
11: 4049: boat
12: 1010: ,
13: 2057: we
14: 2064: can
15: 3869: fish
16: 2007: with
17: 19485: rods
18: 1012: .
19: 2057: we
20: 2064: can
21: 4965: buy
22: 3869: fish
23: 2012: at
24: 1996: the
25: 3006: market
26: 1012: .
27: 102: [SEP]


In [40]:
# Assert that the token input id is equal to "can"
assert inputs["input_ids"][0][2] == tokenizer.encode("can", add_special_tokens=False)[0]  # verb
assert inputs["input_ids"][0][14] == tokenizer.encode("can", add_special_tokens=False)[0]   # modal
assert inputs["input_ids"][0][20] == tokenizer.encode("can", add_special_tokens=False)[0]   # modal

# Assert that the token input id is equal to "fish"
assert inputs["input_ids"][0][3] == tokenizer.encode("fish", add_special_tokens=False)[0] # noun
assert inputs["input_ids"][0][15] == tokenizer.encode("fish", add_special_tokens=False)[0] # verb
assert inputs["input_ids"][0][22] == tokenizer.encode("fish", add_special_tokens=False)[0]  # noun

In [54]:
# Compare the last hidden state of the first two "can" tokens by subtracting the second one from the first one
# difference1_can = last_hidden_states[0][2] - last_hidden_states[0][14]
# print(difference1_can.shape)
# print(difference1_can[0:5])  # print first 5 elements

# Compare the last hidden state of the second and third "can" tokens by subtraction
# difference2_can = last_hidden_states[0][14] - last_hidden_states[0][20]
# print(difference2_can.shape)
# print(difference2_can[0:5])  # print first 5 elements

cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
# Cosine similarity between the 1st and 2nd "can" tokens
cos_sim_can1 = cos(last_hidden_states[0][2], last_hidden_states[0][14])
print("Cos similarity, verb, modal1: ", cos_sim_can1)

# Cosine similarity between the 2nd and 3rd "can" tokens
cos_sim_can2 = cos(last_hidden_states[0][14], last_hidden_states[0][20])
print("Cos similarity, modal1, modal2: ", cos_sim_can2)

# Cosine similarity between the 1nd and 3rd "can" tokens
cos_sim_can3 = cos(last_hidden_states[0][2], last_hidden_states[0][20])
print("Cos similarity, verb, modal2: ", cos_sim_can3)

# The cosine similarity between the 1st and 2nd "can" tokens should be less than that of the 2nd and 3rd "can" tokens
if cos_sim_can1 < cos_sim_can2:
  print("Hypothesis is true")
else:
  print("Hypothesis is false")

Cos similarity, verb, modal:  tensor(0.9615, grad_fn=<SumBackward1>)
Cos similarity, modal, modal:  tensor(0.9690, grad_fn=<SumBackward1>)
Cos similarity, verb, modal:  tensor(0.9540, grad_fn=<SumBackward1>)
Hypothesis is true


In [55]:
# Compare the last hidden state of the two "can" tokens by subtracting the second one from the first one
# difference_fish = last_hidden_states[0][3] - last_hidden_states[0][15]
# print(difference_fish.shape)
# print(difference_fish[0:5])  # print first 5 elements

# Cosine similarity between the 1st and 2nd "fish" tokens
cos_sim_fish1 = cos(last_hidden_states[0][3], last_hidden_states[0][15])
print("Cos similarity, noun1, verb: ", cos_sim_fish1)

# Cosine similarity between the 2st and 3rd "fish" tokens
cos_sim_fish2 = cos(last_hidden_states[0][15], last_hidden_states[0][22])
print("Cos similarity, verb, noun2: ", cos_sim_fish2)

# Cosine similarity between the 1st and 3rd "fish" tokens
cos_sim_fish3 = cos(last_hidden_states[0][3], last_hidden_states[0][22])
print("Cos similarity, noun1, noun2: ", cos_sim_fish3)

# The cosine similarity between the 1st and 2nd "fish" tokens should be less than that of the 1st and 3rd "fish" tokens
if cos_sim_fish1 < cos_sim_fish3:
  print("Hypothesis is true")
else:
  print("Hypothesis is false, suggesting that the model thinks that the first 2 'fish' are verbs.")

Cos similarity, noun1, verb:  tensor(0.9604, grad_fn=<SumBackward1>)
Cos similarity, verb, noun2:  tensor(0.8949, grad_fn=<SumBackward1>)
Cos similarity, noun1, noun2:  tensor(0.8968, grad_fn=<SumBackward1>)
Hypothesis is false, suggesting that the model thinks that the first 2 'fish' are verbs.
