In [2]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
sentiment_analyzer = pipeline("sentiment-analysis", model = "distilbert-base-uncased-finetuned-sst-2-english")

Device set to use cpu


In [4]:
texts = [
    "I love playing and watching football!",
    "I hate when a player misses a penalty!"
]

results = sentiment_analyzer(texts)
for result in results:
    print(result)

{'label': 'POSITIVE', 'score': 0.9996734857559204}
{'label': 'NEGATIVE', 'score': 0.996977686882019}


In [5]:
## different examples of pipeline
# pipeline 2


generator = pipeline("text-generation", model = "distilgpt2")

res = generator("In this course, we will teach you how to",
                max_length = 30,
                num_return_sequences = 2
                
                )
print(res)

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'In this course, we will teach you how to use the language of your education.\n\n\n\n\n\n\n\n\nFor students in these courses, you may also learn how to use the language of your education.\n\nYou may also learn how to use the language of your education.\nThe following courses will be taught by a teacher:\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nThe language of your education\nTh

In [6]:
## pipeline 3

classifier = pipeline("zero-shot-classification")

res2 = classifier("This is a course about transformers",
                  candidate_labels = ["education","sports","business"]
                  )
print(res2)

No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


{'sequence': 'This is a course about transformers', 'labels': ['education', 'business', 'sports'], 'scores': [0.7889325022697449, 0.16456863284111023, 0.04649889096617699]}


In [7]:
## Tokenizer and Model

from transformers import AutoTokenizer, AutoModelForSequenceClassification
classifier = pipeline("sentiment-analysis")

res3 = classifier("I've been waiting to learn HuggingFace course my whole life.")

print(res3)
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

## Another way 
classifier = pipeline("sentiment-analysis", model=model, tokenizer = tokenizer)

res4 = classifier("I've been waiting to learn HuggingFace course my whole life.")
print(res4)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9768790602684021}]


Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9768790602684021}]


In [8]:
## Understanding Tokenizer

sequence = "Using a Transformer network is simple and the best"

res = tokenizer(sequence)
print(res)

tokens = tokenizer.tokenize(sequence)
print(tokens)

ids = tokenizer.convert_tokens_to_ids(tokens)
print(ids)

decoded_string = tokenizer.decode(ids)
print(decoded_string)

{'input_ids': [101, 2478, 1037, 10938, 2121, 2897, 2003, 3722, 1998, 1996, 2190, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
['using', 'a', 'transform', '##er', 'network', 'is', 'simple', 'and', 'the', 'best']
[2478, 1037, 10938, 2121, 2897, 2003, 3722, 1998, 1996, 2190]
using a transformer network is simple and the best
