# Behind the pipeline (PyTorch)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [None]:
!pip install datasets evaluate transformers[sentencepiece]

In [None]:
#This is a simplified process
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
classifier(
    [
        "I've been waiting for a HuggingFace course my whole life.",
        "I hate this so much!",
    ]
)

[{'label': 'POSITIVE', 'score': 0.9598047137260437},
 {'label': 'NEGATIVE', 'score': 0.9994558095932007}]

In [7]:
from transformers import AutoTokenizer,AutoModel
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

#Step1: Get the inputs as the tensors way to Transformer architecture
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
Raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!"
]
inputs = tokenizer(Raw_inputs, padding = True, truncation = True, return_tensors = "pt")
print(inputs)

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}


In [8]:
#Step2: Get the output of the Transformer architecture
model = AutoModel.from_pretrained(checkpoint)
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)

torch.Size([2, 16, 768])


In [9]:
#A specified example
from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)
print(outputs.logits.shape)

torch.Size([2, 2])


In [11]:
#All Transformers models output the logits
print(outputs.logits)

tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)


In [12]:
#Step3: Postprocessing
import torch

#the loss function for training will generally fuse the last activation function, such as SoftMax, with the actual loss function, such as cross entropy
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)#output the final prediction as recognizable probability scores
print(predictions)

tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)


In [14]:
#To get the labels corresponding to each position, we can inspect the id2label attribute of the model config (more on this in the next section):
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}