In [20]:
from transformers import AutoTokenizer
from transformers import AutoModel

In [21]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)

In [22]:
raw_inputs = [
    "I've been waiting for this opportunity to learn how to code my whole life!",
    "I hate this kind of company that only focus on profit.",
    "I can't believe that I'm gonna study at an AI bachelor course!",
]

inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')
print(inputs)

{'input_ids': tensor([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 2023, 4495, 2000, 4553, 2129,
         2000, 3642, 2026, 2878, 2166,  999,  102],
        [ 101, 1045, 5223, 2023, 2785, 1997, 2194, 2008, 2069, 3579, 2006, 5618,
         1012,  102,    0,    0,    0,    0,    0],
        [ 101, 1045, 2064, 1005, 1056, 2903, 2008, 1045, 1005, 1049, 6069, 2817,
         2012, 2019, 9932, 5065, 2607,  999,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [23]:
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)

torch.Size([3, 19, 768])


In [24]:
from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)

In [25]:
print(outputs.logits.shape)

torch.Size([3, 2])


In [26]:
print(outputs.logits)

tensor([[-2.8511,  2.8940],
        [ 4.2973, -3.5317],
        [ 2.2624, -1.9607]], grad_fn=<AddmmBackward0>)


In [27]:
import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

tensor([[3.1883e-03, 9.9681e-01],
        [9.9960e-01, 3.9789e-04],
        [9.8556e-01, 1.4441e-02]], grad_fn=<SoftmaxBackward0>)


In [30]:
for i in range(len(raw_inputs)):
    print(f"Sentence: {raw_inputs[i]}")
    predicted_class_index = torch.argmax(predictions[i]).item()
    confidence = predictions[i][predicted_class_index].item()  # Get confidence
    print(f"Prediction: {model.config.id2label[predicted_class_index]}")
    print(f"Confidence: {confidence:.2f}")  # Print confidence with 2 decimal places
    print("---")

Sentence: I've been waiting for this opportunity to learn how to code my whole life!
Prediction: POSITIVE
Confidence: 1.00
---
Sentence: I hate this kind of company that only focus on profit.
Prediction: NEGATIVE
Confidence: 1.00
---
Sentence: I can't believe that I'm gonna study at an AI bachelor course!
Prediction: NEGATIVE
Confidence: 0.99
---
