In [None]:
from transformers import AutoModel, pipeline
from optimum.bettertransformer import BetterTransformer
from IPython.display import display, Markdown
import pandas as pd

# Load Model & Convert to Optimum

In this section we will see how to load a pre-trained model from the HuggingFace Hub. 
You can shop for models [here](https://huggingface.co/models).

Then it will be coverted using the [`BetterTransformer`](https://huggingface.co/docs/optimum/bettertransformer/overview) from the [optimum project](https://huggingface.co/docs/optimum/index).

In [None]:
model_name = "roberta-base" 
model = AutoModel.from_pretrained(model_name, device_map="auto")

# convert to BetterTransformer format to speed up inference
bt_model = BetterTransformer.transform(model, keep_original_model=True)

In [None]:
print("converted_model: ", bt_model)

# HuggingFace Pipeline API

In the previous section we saw how to load a model, in this section we see the easiest way to use HuggingFace models for inference.

Specifically, we will show the following APIs of the [HuggingFace Pipeline API](https://huggingface.co/docs/transformers/v4.34.0/en/main_classes/pipelines) and its cousin from the [optimum project](https://huggingface.co/docs/optimum/index), a collaboration between HuggingFace and PyTorch which improves inference latency with no performance hit:
* [Text Classsification](#text-classification)
* [Text Generation](#text-generation)
* [Text Mask Fill - Optimum](#optimum-for-faster-latency)

## Text Classification

In [None]:
# More text classification models: https://huggingface.co/models?pipeline_tag=text-classification&sort=trending
model_name = "SamLowe/roberta-base-go_emotions" 
classifier_pipe = pipeline("text-classification", model=model_name)

In [None]:
sentences = [
    "I am feeling inspired today.",
    "This talk is informative, but a bit high-level, where I can find more details?",
    "I wonder about all the hype around Generative AI, is smoke and mirrors?",
    "Building production machine learning systems is challenging."
]

In [None]:
classifier_pipe(sentences)

## Text Generation

In [None]:
model_name = "bigscience/bloom-560m" # https://huggingface.co/bigscience/bloom-560m
generator = pipeline("text-generation", model=model_name, device_map="auto")

In [None]:
prompt = "The Generative AI World Summit is a"
response = generator(prompt, do_sample=False, max_new_tokens=25)

In [None]:
Markdown(f"""
**Prompt**: {prompt}

**{model_name}'s continuation**: {response[0]['generated_text']}...
""")

## Optimum for Faster Latency

In [None]:
from optimum.pipelines import pipeline

model_name = "distilbert-base-uncased"
prompt = "I am attending the Generative AI Summit and I am a practicing [MASK]."

unmasked_optimum_pipeline = pipeline(task="fill-mask", model=model_name, accelerator="bettertransformer")
response = unmasked_optimum_pipeline(prompt)

In [None]:
pd.set_option('display.max_colwidth', 0)
col_mapping = {"score": "Score", "token_str": "Token mask fill", "token": "Token ID", "sequence": "Full generated text"}
pd.DataFrame(response).rename(columns=col_mapping)