In [1]:
import transformers

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch

In [3]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


In [4]:
text_input = "I'm really excited about using Hugging Face to run AI models!"
pipe(text_input)

[{'label': 'positive', 'score': 0.9863850474357605}]

In [5]:
text_input = "Im having the worst day of my life "
pipe(text_input)

[{'label': 'negative', 'score': 0.9412855505943298}]

In [6]:
text_input =  "Most of the Earth is covered in water."
pipe(text_input)

[{'label': 'neutral', 'score': 0.767055869102478}]

In [7]:
text_inputs = [
    "What a great time to be alive!",
    "How are you doing today?",
    "I'm in a horrible mood.",
]
pipe(text_inputs)

[{'label': 'positive', 'score': 0.9838393926620483},
 {'label': 'neutral', 'score': 0.7096900939941406},
 {'label': 'negative', 'score': 0.923816442489624}]

In [8]:
model_name = "MoritzLaurer/deberta-v3-large-zeroshot-v2.0"
zs_shot_classifier = pipeline(model=model_name)

Device set to use mps:0


In [9]:
text = "Angela Merkel is a politician in Germany and leader of the CDU"
hypothesis_template = "This text is about {}"
classes_verbalized = ["politics", "economy", "entertainment", "environment"]
output = zs_shot_classifier(text, classes_verbalized, hypothesis_template=hypothesis_template, multi_label=False)
print(output)

{'sequence': 'Angela Merkel is a politician in Germany and leader of the CDU', 'labels': ['politics', 'economy', 'environment', 'entertainment'], 'scores': [0.9994651079177856, 0.00020659725123550743, 0.0001747353671817109, 0.00015351509500760585]}


In [10]:
output

{'sequence': 'Angela Merkel is a politician in Germany and leader of the CDU',
 'labels': ['politics', 'economy', 'environment', 'entertainment'],
 'scores': [0.9994651079177856,
  0.00020659725123550743,
  0.0001747353671817109,
  0.00015351509500760585]}

In [None]:
image_classifier = pipeline(task='image-classification')


No model was supplied, defaulted to google/vit-base-patch16-224 and revision 3f49326 (https://huggingface.co/google/vit-base-patch16-224).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


In [14]:
predictions = image_classifier(["llamas.webp"])

In [17]:
predictions[0][0]

{'label': 'llama', 'score': 0.9989420771598816}

In [18]:
predictions[0][1]

{'label': 'Arabian camel, dromedary, Camelus dromedarius',
 'score': 9.739011875353754e-05}

In [19]:
predictions[0][2]

{'label': 'standard poodle', 'score': 3.74155497411266e-05}

### Classes: Behing the hoods

In [20]:
from transformers import AutoTokenizer

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

In [21]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [22]:
input_text = "I really want to go to an island. Do you want to go?"

In [23]:
encoded_input  = tokenizer(input_text)
encoded_input

{'input_ids': [0, 100, 269, 236, 7, 213, 7, 41, 2946, 4, 1832, 47, 236, 7, 213, 116, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [24]:
encoded_input['input_ids']

[0, 100, 269, 236, 7, 213, 7, 41, 2946, 4, 1832, 47, 236, 7, 213, 116, 2]

In [25]:
tokenizer.convert_ids_to_tokens(7)

'Ġto'

In [26]:
tokenizer.convert_ids_to_tokens(2946)

'Ġisland'

In [27]:
tokenizer.vocab_size

50265

In [28]:
new_tokens = [
    "whaleshark",
    "unicorn"
]
tokenizer.convert_tokens_to_ids(new_tokens)

[3, 3]

In [30]:
tokenizer.convert_ids_to_tokens(3)

'<unk>'

In [31]:
tokenizer.add_tokens(new_tokens)

2

In [32]:
tokenizer.convert_tokens_to_ids(new_tokens)

[50265, 50266]

In [33]:
tokenizer.vocab_size

50265

In [34]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [35]:
model_name

'cardiffnlp/twitter-roberta-base-sentiment-latest'

In [36]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [37]:
model 

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [39]:
text = "I love using transformers library!"
encoded_input = tokenizer(text, return_tensors="pt")

In [40]:
with torch.no_grad():
    output = model(**encoded_input)

In [42]:
scores = output.logits[0]

In [43]:
scores

tensor([-2.3656, -1.1385,  3.5298])

In [44]:
probabilities = torch.softmax(scores, dim=0)

In [45]:
probabilities

tensor([0.0027, 0.0093, 0.9880])

In [49]:
from transformers import AutoConfig

In [51]:
config = AutoConfig.from_pretrained(model_name)

In [53]:
config.label2id

{'negative': 0, 'neutral': 1, 'positive': 2}

In [54]:
for i, probability in enumerate(probabilities):
    label =  config.id2label[i]
    print(f' {i+1} {label} : {probability}')

 1 negative : 0.0027190586552023888
 2 neutral : 0.009275926277041435
 3 positive : 0.9880049824714661
