<a href="https://colab.research.google.com/github/natsakh/IAD/blob/main/Pr_7/7_6_HF_without_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


In [6]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
#ця модель — DistilBERT, донавчена на SST-2 (Stanford Sentiment Treebank)
#SST-2 — бінарна класифікація настрою: позитивний або негативний.

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [7]:
texts = [
    "I love transformers!",
    "This movie is terrible.",
    "Electromagnetics is an important part of modern engineering."
]

# --- Tokenization ---
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")

print("=== INPUT IDS ===")
print(inputs["input_ids"])
print("\n=== ATTENTION MASK ===")
print(inputs["attention_mask"])

# токени для кожного речення
for i, ids in enumerate(inputs["input_ids"]):
    tokens = tokenizer.convert_ids_to_tokens(ids)
    print(f"\nSentence {i+1}: {texts[i]}")
    print("Tokens:", tokens)


=== INPUT IDS ===
tensor([[  101,  1045,  2293, 19081,   999,   102,     0,     0,     0,     0,
             0,     0],
        [  101,  2023,  3185,  2003,  6659,  1012,   102,     0,     0,     0,
             0,     0],
        [  101, 17225,  2015,  2003,  2019,  2590,  2112,  1997,  2715,  3330,
          1012,   102]])

=== ATTENTION MASK ===
tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

Sentence 1: I love transformers!
Tokens: ['[CLS]', 'i', 'love', 'transformers', '!', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']

Sentence 2: This movie is terrible.
Tokens: ['[CLS]', 'this', 'movie', 'is', 'terrible', '.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']

Sentence 3: Electromagnetics is an important part of modern engineering.
Tokens: ['[CLS]', 'electromagnetic', '##s', 'is', 'an', 'important', 'part', 'of', 'modern', 'engineering', '.', '[SEP]']


In [8]:
# --- Model forward ---
with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
print("\n=== RAW LOGITS ===")
print(logits)

# --- Softmax to probabilities ---
probs = F.softmax(logits, dim=-1)
print("\n=== PROBABILITIES ===")
print(probs)

# --- Final predictions (label + score) ---
id2label = model.config.id2label
pred_ids = probs.argmax(dim=-1)

for i, (cls_id, prob_vec) in enumerate(zip(pred_ids, probs)):
    label = id2label[cls_id.item()]
    score = prob_vec[cls_id].item()
    print(f"\nSentence {i+1} prediction")
    print(f"  text  : {texts[i]}")
    print(f"  label : {label}")
    print(f"  score : {score:.4f}")


=== RAW LOGITS ===
tensor([[-4.1216,  4.4303],
        [ 4.5200, -3.6825],
        [-3.4417,  3.5642]])

=== PROBABILITIES ===
tensor([[1.9314e-04, 9.9981e-01],
        [9.9973e-01, 2.7388e-04],
        [9.0567e-04, 9.9909e-01]])

Sentence 1 prediction
  text  : I love transformers!
  label : POSITIVE
  score : 0.9998

Sentence 2 prediction
  text  : This movie is terrible.
  label : NEGATIVE
  score : 0.9997

Sentence 3 prediction
  text  : Electromagnetics is an important part of modern engineering.
  label : POSITIVE
  score : 0.9991
