In [1]:
import torch
import transformers

  from .autonotebook import tqdm as notebook_tqdm


### Running Pipelines
Pipelines are the simplest way to use models out of the box in Transformers. In particular, the pipeline() function offers you a high-level abstraction over models in the Hugging Face Model Hub.

In [2]:
from transformers import pipeline

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
sentiment_classifier = pipeline(model=model_name)

text_input = "I'm really excited about using Hugging Face to run AI models!"
sentiment_classifier(text_input)


text_input = "I'm having a horrible day today."
sentiment_classifier(text_input)


text_input = "Most of the Earth is covered in water."
sentiment_classifier(text_input)


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'neutral', 'score': 0.767055869102478}]

In [3]:
text_inputs = [
    "What a great time to be alive!",
    "How are you doing today?",
    "I'm in a horrible mood.",
]

sentiment_classifier(text_inputs)

[{'label': 'positive', 'score': 0.9838393926620483},
 {'label': 'neutral', 'score': 0.7096880078315735},
 {'label': 'negative', 'score': 0.9238165616989136}]

In [4]:
model_name = "MoritzLaurer/deberta-v3-large-zeroshot-v2.0"
zs_text_classifier = pipeline(model=model_name)

candidate_labels = [
     "Billing Issues",
     "Technical Support",
     "Account Information",
     "General Inquiry",
]

hypothesis_template = "This text is about {}"

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [5]:
customer_text = "My account was charged twice for a single order."
zs_text_classifier(
    customer_text,
    candidate_labels,
    hypothesis_template=hypothesis_template,
    multi_label=True
)

{'sequence': 'My account was charged twice for a single order.',
 'labels': ['Billing Issues',
  'General Inquiry',
  'Account Information',
  'Technical Support'],
 'scores': [0.9884459376335144,
  0.012550137005746365,
  0.008041927590966225,
  0.00021988489606883377]}

### Image Classifier from hugging face

In [None]:
!pip3 install Pillow

: 

In [None]:
image_classifier = pipeline(task="image-classification")

In [6]:
predictions = image_classifier(["llamas.png"])
len(predictions[0])

NameError: name 'image_classifier' is not defined

image_classifier = pipeline(task="image-classification")

In [None]:
predictions = image_classifier(["llamas.png"])
len(predictions[0])




In [None]:

predictions[0][0]


In [None]:
predictions[0][1]


### Let's use the Class Behind the hoods

In [7]:
from transformers import AutoTokenizer

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(model_name)

input_text = "I really want to go to an island. Do you want to go?"
encoded_input = tokenizer(input_text)
encoded_input["input_ids"]


[0, 100, 269, 236, 7, 213, 7, 41, 2946, 4, 1832, 47, 236, 7, 213, 116, 2]

In [None]:
tokenizer.convert_ids_to_tokens(7)  

: 

In [None]:
tokenizer.convert_ids_to_tokens(2946)


: 

In [8]:
tokenizer.convert_ids_to_tokens(encoded_input["input_ids"])

['<s>',
 'I',
 'Ġreally',
 'Ġwant',
 'Ġto',
 'Ġgo',
 'Ġto',
 'Ġan',
 'Ġisland',
 '.',
 'ĠDo',
 'Ġyou',
 'Ġwant',
 'Ġto',
 'Ġgo',
 '?',
 '</s>']

In [9]:
tokenizer.vocab_size

50265

In [10]:
new_tokens = [
    "whaleshark",
    "unicorn",
]

tokenizer.convert_tokens_to_ids(new_tokens)
tokenizer.convert_ids_to_tokens(3)


tokenizer.add_tokens(new_tokens)


tokenizer.convert_tokens_to_ids(new_tokens)


[50265, 50266]

In [11]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification
)

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [12]:
model.roberta.embeddings

RobertaEmbeddings(
  (word_embeddings): Embedding(50265, 768, padding_idx=1)
  (position_embeddings): Embedding(514, 768, padding_idx=1)
  (token_type_embeddings): Embedding(1, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [13]:
text = "I love using the Transformers library!"
encoded_input = tokenizer(text, return_tensors="pt")

embedding_tensor = model.roberta.embeddings(encoded_input["input_ids"])
embedding_tensor.shape

torch.Size([1, 9, 768])

In [14]:
embedding_tensor

tensor([[[ 0.0633, -0.0212,  0.0193,  ..., -0.0826, -0.0200, -0.0056],
         [ 0.1453,  0.3706, -0.0322,  ...,  0.0359, -0.0750,  0.0376],
         [ 0.2900, -0.0814,  0.0955,  ...,  0.3262, -0.0559,  0.0819],
         ...,
         [ 0.1059, -0.5638, -0.2397,  ..., -0.2077, -0.0784, -0.0951],
         [ 0.1675, -0.3334,  0.0130,  ..., -0.4127,  0.0121,  0.0215],
         [ 0.1316, -0.0281, -0.0168,  ...,  0.1175,  0.0908, -0.0614]]],
       grad_fn=<NativeLayerNormBackward0>)

In [15]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig
)

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

text = "I love using the Transformers library!"
encoded_input = tokenizer(text, return_tensors="pt")

with torch.no_grad():
    output = model(**encoded_input)

scores = output.logits[0]
probabilities = torch.softmax(scores, dim=0)

for i, probability in enumerate(probabilities):
    label = config.id2label[i]
    print(f"{i+1}) {label}: {probability}")


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


1) negative: 0.002647021319717169
2) neutral: 0.010737831704318523
3) positive: 0.9866151213645935


In [16]:
from transformers import pipeline

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
text = "I love using the Transformers library!"

full_pipeline = pipeline(model=model_name)
full_pipeline(text)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'positive', 'score': 0.9866151213645935}]