# Hugging Face
This notebook demonstrates how to use transformers from [Hugging Face](https://huggingface.co/).

### Setup
[See the initial setup](../../README.md#setup) to start a virtual environment and install packages.

In [2]:
from transformers import pipeline

### Sentiment Analysis

In [4]:
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

response = classifier("Hugging Face allows me to automate tasks and unlocks new opportunities.",)
print(response)

[{'label': 'POSITIVE', 'score': 0.9995941519737244}]


### Text Generation
<smalL>Note: For open-end generation, HuggingFace will set the padding token ID to be equal to the end-of-sentence token ID, so this will configure that manually beforehand to avoid a warning message.</small>

In [10]:
generator = pipeline("text-generation", model="distilgpt2")

response = generator(
    "In the future, AI large language models will help to",
    max_length=20,
    num_return_sequences=2,
    pad_token_id=generator.tokenizer.eos_token_id # set to avoid warning
)
print(response)

[{'generated_text': 'In the future, AI large language models will help to bring intelligent AI into the world. For example'}, {'generated_text': 'In the future, AI large language models will help to develop more of the capabilities of AI, including'}]


### Zero-Shot Classification

In [13]:
from transformers import AutoModelForSequenceClassification

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

classifier = pipeline("zero-shot-classification", "facebook/bart-large-mnli")

response = classifier(
    "This is sample code for zero-shot classification using Python.",
    candidate_labels=["business","education","politics","technology","weather"],
    model=model
)
print(response)

{'sequence': 'This is sample code for zero-shot classification using Python.', 'labels': ['technology', 'business', 'weather', 'education', 'politics'], 'scores': [0.9024143815040588, 0.0479886494576931, 0.020019279792904854, 0.018473103642463684, 0.01110463123768568]}


### Auto Tokenizer

In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

X_train = ["Hugging Face allows me to automate tasks and unlocks new opportunities.",
           "Hugging Face is pretty cool!"]

response = classifier(X_train)
print(response)

[{'label': 'POSITIVE', 'score': 0.9995941519737244}, {'label': 'POSITIVE', 'score': 0.9998742341995239}]


### With PyTorch

In [15]:
import torch
import torch.nn.functional as F

batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
print(batch)

with torch.no_grad():
    outputs = model(**batch)
    print(outputs)
    predictions = F.softmax(outputs.logits, dim=1)
    print(predictions)
    labels = torch.argmax(predictions, dim=1)
    print(labels)

{'input_ids': tensor([[  101, 17662,  2227,  4473,  2033,  2000,  8285,  8585,  8518,  1998,
         19829,  2015,  2047,  6695,  1012,   102],
        [  101, 17662,  2227,  2003,  3492,  4658,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
SequenceClassifierOutput(loss=None, logits=tensor([[-3.7883,  4.0208],
        [-4.2771,  4.7040]]), hidden_states=None, attentions=None)
tensor([[4.0589e-04, 9.9959e-01],
        [1.2575e-04, 9.9987e-01]])
tensor([1, 1])
