In [33]:
# Following NLP Course here:
# https://huggingface.co/learn/nlp-course/chapter2/5

In [34]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [36]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"

In [38]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [39]:
sentence = "I've been waiting for so long!"

In [40]:
tokens = tokenizer.tokenize(sentence)
tokens

['i', "'", 've', 'been', 'waiting', 'for', 'so', 'long', '!']

In [41]:
ids = tokenizer.convert_tokens_to_ids(tokens)
ids

[1045, 1005, 2310, 2042, 3403, 2005, 2061, 2146, 999]

In [42]:
input_ids = torch.tensor(ids)
input_ids

tensor([1045, 1005, 2310, 2042, 3403, 2005, 2061, 2146,  999])

In [46]:
# This will fail because se sent a single sentence,
# while transformers expect multiple dimensions
try:
    model(input_ids)
except Exception as e:
    print(e)

too many indices for tensor of dimension 1


In [49]:
tokenized_inputs = tokenizer(sentence, return_tensors="pt")
tokenized_inputs['input_ids']

tensor([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 2061, 2146,  999,  102]])

In [51]:
# Trying again with a new dimension:
input_ids = torch.tensor([ids])
input_ids

tensor([[1045, 1005, 2310, 2042, 3403, 2005, 2061, 2146,  999]])

In [53]:
output = model(input_ids)
output

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.4705, -1.0068]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [54]:
output.logits

tensor([[ 1.4705, -1.0068]], grad_fn=<AddmmBackward0>)

In [57]:
# PADDING
# When sending multiple sentences, they need to have same length.
# So a padding id is added
padding_id = 100
batched_ids = [
    [200, 200, 200],
    [200, 200, padding_id],
]
model(torch.tensor(batched_ids)).logits

tensor([[ 1.5694, -1.3895],
        [ 0.9907, -0.9139]], grad_fn=<AddmmBackward0>)

In [59]:
# but they have different logits:
print(model(torch.tensor([[200,200,200]])).logits)
print(model(torch.tensor([[200,200]])).logits)

tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward0>)
tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)


In [61]:
# to make them the same, an attention mask is also passed:
attention_mask = [
    [1, 1, 1],
    [1, 1, 0]
]
model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask)).logits

tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)