# Behind the pipeline (PyTorch)

In [4]:
# Instalasi library yang diperlukan
!pip install datasets evaluate transformers[sentencepiece]
# Menggunakan pipeline untuk analisis sentimen
from transformers import pipeline




In [5]:
classifier = pipeline("sentiment-analysis")  # Membuat pipeline untuk analisis sentimen
# Menganalisis sentimen dari dua kalimat sekaligus
print(classifier(
    [
        "I've been waiting for a HuggingFace course my whole life.",  # Kalimat positif
        "I hate this so much!",  # Kalimat negatif
    ]
))


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9598049521446228}, {'label': 'NEGATIVE', 'score': 0.9994558691978455}]


In [6]:
# Mengimpor tokenizer dari Transformers
from transformers import AutoTokenizer

# Model checkpoint yang digunakan
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
# Menginisialisasi tokenizer berdasarkan checkpoint model
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [7]:
# Input teks mentah
raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",  # Kalimat positif
    "I hate this so much!",  # Kalimat negatif
]
# Mengubah teks mentah menjadi input yang dapat diproses oleh model
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")  # Padding dan truncation untuk kesesuaian input
print(inputs)  # Menampilkan input yang telah di-tokenisasi

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}


In [8]:
# Mengimpor AutoModel
from transformers import AutoModel

# Menginisialisasi model untuk representasi umum
model = AutoModel.from_pretrained(checkpoint)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [9]:
# Mendapatkan keluaran dari model menggunakan input yang telah di-tokenisasi
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)  # Menampilkan dimensi hidden state terakhir


torch.Size([2, 16, 768])


In [10]:
# Mengimpor AutoModelForSequenceClassification
from transformers import AutoModelForSequenceClassification

# Menginisialisasi model untuk klasifikasi urutan
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
# Mendapatkan logits dari output model
outputs = model(**inputs)

In [11]:
print(outputs.logits.shape)  # Menampilkan dimensi logits

torch.Size([2, 2])


In [12]:
print(outputs.logits)  # Menampilkan nilai logits

tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)


In [13]:
# Mengimpor torch untuk melakukan operasi tensor
import torch

# Menghitung probabilitas dari logits menggunakan fungsi softmax
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)  # Menampilkan probabilitas prediksi untuk setiap label

tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)


In [14]:
# Menampilkan label yang sesuai dengan indeks kelas
print(model.config.id2label)  # Label output: {0: 'NEGATIVE', 1: 'POSITIVE'}


{0: 'NEGATIVE', 1: 'POSITIVE'}
