In [1]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification
)

# 1. 체크포인트 설정
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

# 2. 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# 3. 모델 로드 (Sequence Classification Head 포함) -> 특정 task를 위한 모델
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

# 4. 입력 문장
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!"
]

# 5. 전처리 (텍스트 → 토큰 ID → 텐서)
inputs = tokenizer(
    sentences,
    padding=True,
    truncation=True,
    return_tensors="pt" # 출력을 tensor로 반환
)

print("tokenizer 출력 값")
print(inputs)
print("===========================================================================")

# 6. 모델 추론
with torch.no_grad():  # 추론이므로 gradient 불필요
    outputs = model(**inputs)

# 7. 후처리: logits → 확률 (SoftMax)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

print("probs 출력 값")
print(probs)
print("===========================================================================")

# 8. 라벨 매핑
id2label = model.config.id2label

# 9. 결과 출력
for i, sentence in enumerate(sentences):
    print(f"\nSentence: {sentence}")
    for label_id, score in enumerate(probs[i]):
        print(f"  {id2label[label_id]}: {score.item():.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer 출력 값
{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
probs 출력 값
tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]])

Sentence: I've been waiting for a HuggingFace course my whole life.
  NEGATIVE: 0.0402
  POSITIVE: 0.9598

Sentence: I hate this so much!
  NEGATIVE: 0.9995
  POSITIVE: 0.0005
