## Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio

In [None]:
### 📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, text generation, in over 100 languages.
### 🖼️ Images, for tasks like image classification, object detection, and segmentation.
### 🗣️ Audio, for tasks like speech recognition and audio classification.

In [19]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

In [22]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"

model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

classifier = pipeline("sentiment-analysis", model = model, tokenizer = tokenizer)
results = classifier(["We are very happy to show you the Transformers library.", "We hope you don't hate it"
                 ])
for result in results:
    print(result)

{'label': 'POSITIVE', 'score': 0.9997994303703308}
{'label': 'POSITIVE', 'score': 0.8319637179374695}


In [25]:
tokens = tokenizer.tokenize("We are very happy to show you the Transformers library.")
token_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = tokenizer("We are very happy to show you the Transformers library.")
print(tokens)
print(token_ids) #Numerical representation
print(input_ids) #Dict, same as token_ids, along with 101-102 points

['we', 'are', 'very', 'happy', 'to', 'show', 'you', 'the', 'transformers', 'library', '.']
[2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 19081, 3075, 1012]
{'input_ids': [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 19081, 3075, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [27]:
X_train = ["We are very happy to show you the Transformers library.", "We hope you don't hate it"]
batch = tokenizer(X_train, padding=True, truncation = True, max_length=512, return_tensors = "pt")
print(batch)

{'input_ids': tensor([[  101,  2057,  2024,  2200,  3407,  2000,  2265,  2017,  1996, 19081,
          3075,  1012,   102],
        [  101,  2057,  3246,  2017,  2123,  1005,  1056,  5223,  2009,   102,
             0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])}


In [30]:
with torch.no_grad():
    outputs = model(**batch)
    print(outputs)
    predictions = F.softmax(outputs.logits, dim = 1)
    print(predictions)
    labels = torch.argmax(predictions, dim=1)
    print(labels)
    labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
    print(labels)

SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (3746814370.py, line 6)