In [None]:
# hugging face training
# https://huggingface.co/learn/nlp-course/chapter1/1
# transformers - what can they do

In [None]:
# !pip install datasets evaluate transformers[sentencepiece]

In [None]:
# pip install tf-keras

In [None]:
# pip install sentencepiece

### Pipelines

- Feature Extraction
    - Explanation: Converts text into numerical representations that can be used as input features for other machine learning models.
    - Example Use Case: Obtaining text embeddings for clustering or as input to another model.

- Fill-Mask
    - Explanation: Predicts masked words within a sentence, typically used with masked language models like BERT.
    - Example Use Case: Predicting missing words in sentences for data augmentation or sentence completion tasks.

- Named Entity Recognition (NER)
    - Explanation: Identifies and classifies named entities (e.g., persons, organizations, locations) in text.
    - Example Use Case: Extracting names of people and organizations from news articles.

- Question Answering
    - Explanation: Provides answers to questions based on a given context.
    - Example Use Case: Creating a Q&A system for customer support where users can ask questions and get answers from a knowledge base.

- Sentiment Analysis
    - Explanation: Determines the sentiment expressed in a piece of text (e.g., positive, negative, neutral).
    - Example Use Case: Analyzing customer reviews to gauge overall satisfaction.

- Summarization
    - Explanation: Summarizes long pieces of text into shorter, concise versions.
    - Example Use Case: Summarizing lengthy articles or reports to provide quick overviews.

- Text Generation
    - Explanation: Generates text based on a given prompt using models like GPT-3.
    - Example Use Case: Creating creative content such as stories, poems, or dialogue for games.

- Translation
    - Explanation: Translates text from one language to another.
    - Example Use Case: Translating user queries in multiple languages for a multilingual chatbot.

- Zero-Shot Classification
    - Explanation: Classifies text into categories without any prior training on those specific categories.
    - Example Use Case: Categorizing emails or documents into custom categories without having labeled training data.

In [None]:
from transformers import pipeline
# Use just the CPU
# classifier = pipeline("sentiment-analysis")
#
# use GPU
# score is confidence level
classifier = pipeline("sentiment-analysis", device=0)
classifier("I've been waiting for a HuggingFace course my whole life.")

In [None]:
from transformers import pipeline

# judges sentiment
# 1st phrase is positive
# 2nd phrase is negative
classifier = pipeline("sentiment-analysis", device=0)
classifier(
    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
)

In [None]:
from transformers import pipeline
# GPU
# zero-shot lets user select the labels for the classification
classifier = pipeline("zero-shot-classification", device=0)
classifier(
    "This is a course about the Transformers library",
    candidate_labels=["education", "politics", "business"],
)

In [None]:
from transformers import pipeline
# GPU
# zero-shot lets user select the labels for the classification
classifier = pipeline("zero-shot-classification", device=0)
classifier(
    "The republicans and democrats can only agree on the fact that neiman marcus is vastly overprices",
    candidate_labels=["education", "politics", "business"],
)

In [None]:
from transformers import pipeline
# GPU
# zero-shot lets user select the labels for the classification
classifier = pipeline("zero-shot-classification", device=0)
classifier(
    "The republicans and democrats can only agree on the fact that WalMart is great",
    candidate_labels=["education", "politics", "business"],
)

In [None]:
from transformers import pipeline
# uses prompt to generate text
generator = pipeline("text-generation", device=0)
generator("In this course, we will teach you how to")

In [None]:
from transformers import pipeline
# same generation but with model specified
generator = pipeline("text-generation", model="distilgpt2", device=0)
generator(
    "In this course, we will teach you how to",
    # max length to return
    max_length=30,
    # number of items to return
    num_return_sequences=3,
)

In [None]:
from transformers import pipeline
# fill-mask predicts missing words
unmasker = pipeline("fill-mask", device=0)
# top_k2 is how many liely values to return
unmasker("This course will teach you all about <mask> models.", top_k=5)

In [None]:
from transformers import pipeline
# ner tries to identify objects like persons, organizations, or locations
ner = pipeline("ner", grouped_entities=True, device=0)
ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")

In [None]:
# from transformers import pipeline
# exactly what is sounds like, answer a question
question_answerer = pipeline("question-answering", device=0)
question_answerer(
    question="Where do I work?",
    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
)

In [None]:
from transformers import pipeline
# summerize text
summarizer = pipeline("summarization", device=0)
summarizer(
    """
    America has changed dramatically during recent years. Not only has the number of 
    graduates in traditional engineering disciplines such as mechanical, civil, 
    electrical, chemical, and aeronautical engineering declined, but in most of 
    the premier American universities engineering curricula now concentrate on 
    and encourage largely the study of engineering science. As a result, there 
    are declining offerings in engineering subjects dealing with infrastructure, 
    the environment, and related issues, and greater concentration on high 
    technology subjects, largely supporting increasingly complex scientific 
    developments. While the latter is important, it should not be at the expense 
    of more traditional engineering.

    Rapidly developing economies such as China and India, as well as other 
    industrial countries in Europe and Asia, continue to encourage and advance 
    the teaching of engineering. Both China and India, respectively, graduate 
    six and eight times as many traditional engineers as does the United States. 
    Other industrial countries at minimum maintain their output, while America 
    suffers an increasingly serious decline in the number of engineering graduates 
    and a lack of well-educated engineers.
"""
)

In [None]:
from transformers import pipeline

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=0)
translator("Ce cours est produit par Hugging Face.")