In [None]:
# Notebook: Hugging Face Pre-Built Task Pipelines
# Author: Thomas Purk
# Date: 2025-04-01
# Reference: https://huggingface.co/docs/transformers/v4.50.0/en/main_classes/pipelines

# Hugging Face Pre-Built Task Pipelines

The Pipeline class of the Hugging Face Transformers package can instantiate new pipeline objects using a 'task' string. At this time there are 29 pre-defined pipeline strings covering text, audio, video, and image type tasks.

This notebook investigates common pre-defined task-based pipelines and their default parameters and metadata.

- Sentiment Analysis
- Text Generation
- Mask Filling
- Named Entity Recognition
- Question Answering
- Summarization
- Translation

In [None]:
# Package Installs

#!pip install transformers

!pip list | grep transformers

sentence-transformers              3.4.1
transformers                       4.50.2


In [None]:
# Setup the Notebook

# General
import os
import json
import logging
logging.getLogger("transformers").setLevel(logging.WARNING) # Suppress unnecessary logging

# Visualization
import pprint

# Data, Science, & Math
import numpy as np
import pandas as pd

# NLP
import transformers
from transformers.pipelines import SUPPORTED_TASKS
from transformers import pipeline

In [None]:
def convert_np_types(obj):
    '''Recursively convert NumPy types to standard Python types.
    This function originated from ChatGPT.

    Parameters:
        obj (object): The object to convert.

    Returns:
        object: The converted object.

    '''
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)  # Convert NumPy float to Python float
    elif isinstance(obj, (np.int32, np.int64)):
        return int(obj)  # Convert NumPy int to Python int
    elif isinstance(obj, dict):
        return {key: convert_np_types(value) for key, value in obj.items()}  # Recurse into dict
    elif isinstance(obj, list):
        return [convert_np_types(item) for item in obj]  # Recurse into list
    return obj  # Return unchanged if it's not a NumPy type



def get_task_metadata(task_name, framework):
    '''Assemble metadata about each predefined pipeline tasks. This is a
    work-around to the get_default_model_and_revision() function which
    returns an error for 'translation task'.

    Parameters:
        task_name (str): An items from transformers.pipeline.SUPPORTED_TASKS.
        framework (str): "pt" for Pytorch or "tf" for TensorFlow

    '''

    meta_dict = {}
    task = SUPPORTED_TASKS[task_name]
    meta_dict['task_name'] = task_name

    # Handle special cases where the default dict is parameratized by language
    if('model'in task['default']):
        model_name, model_revision = task['default']['model'][framework]
        meta_dict['model_name'] = model_name
        meta_dict['model_revision'] = model_revision
    elif(('en','fr') in task['default']):
        meta_dict['model_name'] = 'N/A'
        model_name, model_revision = task['default'][('en','fr')]['model'][framework]
        meta_dict['model_name'] = model_name
        meta_dict['model_revision'] = model_revision

    if(framework in task):
        meta_dict['model_class'] = str(task[framework][0])
    else:
        meta_dict['model_class'] = 'N/A'

    if('type' in task):
        meta_dict['model_type'] = task['type']
    else:
        meta_dict['model_type'] = 'N/A'

    if('impl' in task):
        meta_dict['model_impl'] = task['impl']
    else:
        meta_dict['model_impl'] = 'N/A'

    return meta_dict

# Report on each pre-built task
# Print meta data about each built in task
for task_name in SUPPORTED_TASKS.keys():
    m = get_task_metadata(task_name, 'pt')
    print(f'Task Name: {m["task_name"]}')
    print('-----------------------------')
    print(f'Model Type: {m["model_type"]}')
    print(f'Model Name: {m["model_name"]}')
    print(f'Model Revision: {m["model_revision"]}')
    print(f'Model Class: {m["model_class"]}')
    print(f'Model Implementation: {m["model_impl"]}')
    print()

Task Name: audio-classification
-----------------------------
Model Type: audio
Model Name: superb/wav2vec2-base-superb-ks
Model Revision: 372e048
Model Class: <class 'transformers.models.auto.modeling_auto.AutoModelForAudioClassification'>
Model Implementation: <class 'transformers.pipelines.audio_classification.AudioClassificationPipeline'>

Task Name: automatic-speech-recognition
-----------------------------
Model Type: multimodal
Model Name: facebook/wav2vec2-base-960h
Model Revision: 22aad52
Model Class: <class 'transformers.models.auto.modeling_auto.AutoModelForCTC'>
Model Implementation: <class 'transformers.pipelines.automatic_speech_recognition.AutomaticSpeechRecognitionPipeline'>

Task Name: text-to-audio
-----------------------------
Model Type: text
Model Name: suno/bark-small
Model Revision: 1dbd7a1
Model Class: <class 'transformers.models.auto.modeling_auto.AutoModelForTextToWaveform'>
Model Implementation: <class 'transformers.pipelines.text_to_audio.TextToAudioPipeline

In [None]:
# Example: Default Sentiment Analysis Pipeline
# Classifies text

# Create a Default 'text-classification'/
sa_pipeline = pipeline('sentiment-analysis')

# 'sentiment-analysis' is an alias for 'text-classification'
print('\nMetadata:')
display(get_task_metadata('text-classification', 'pt'))
print()


# Get model configuration
model_config = sa_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "I absolutely love this new phone! It's fast and the camera is amazing.",
    "The service at the restaurant was terrible, and the food was cold when it arrived.",
    "I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable.",
    "Winning the competition was the best experience of my life!",
    "I can't believe how frustrating this software update is; nothing works properly now."
]

# Execute the sentiment analysis model
results = sa_pipeline(inputs=inputs)

df = pd.DataFrame(results)
df['inputs'] = inputs
df

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu



Metadata:


{'task_name': 'text-classification',
 'model_name': 'distilbert/distilbert-base-uncased-finetuned-sst-2-english',
 'model_revision': '714eb0f',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForSequenceClassification'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.text_classification.TextClassificationPipeline}


Class Labels: {0: 'NEGATIVE', 1: 'POSITIVE'}

Model Card: https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english



Unnamed: 0,label,score,inputs
0,POSITIVE,0.999887,I absolutely love this new phone! It's fast an...
1,NEGATIVE,0.999531,"The service at the restaurant was terrible, an..."
2,NEGATIVE,0.999625,I'm feeling pretty neutral about the movie—it ...
3,POSITIVE,0.999847,Winning the competition was the best experienc...
4,NEGATIVE,0.999699,I can't believe how frustrating this software ...


In [None]:
# Example: Default Text Generation Pipeline
# Classifies text

# Create a Default 'text-classification'/
tg_pipeline = pipeline(
    task='text-generation'
)

# 'sentiment-analysis' is an alias for 'text-classification'
print('\nMetadata:')
display(get_task_metadata('text-generation', 'pt'))
print()


# Get model configuration
model_config = tg_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "I absolutely love this new phone! It's fast and the camera is amazing.",
    "The service at the restaurant was terrible, and the food was cold when it arrived.",
    "I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable.",
    "Winning the competition was the best experience of my life!",
    "I can't believe how frustrating this software update is; nothing works properly now."
]

# Execute the text generation model
results = tg_pipeline(text_inputs=inputs)

# Formate the resuls
results = [x[0]['generated_text'] for x in results]

# Create a table of inputs and results
df = pd.DataFrame(results, columns=['results'])
df['inputs'] = inputs

for i, r in df.iterrows():
    print(f'\nInput:')
    display(r["inputs"])
    print(f'\nGenerated Text: ')
    display(r["results"])
    print('\n---------------------------')

No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



Metadata:


{'task_name': 'text-generation',
 'model_name': 'openai-community/gpt2',
 'model_revision': '607a30d',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.text_generation.TextGenerationPipeline}

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Class Labels: {0: 'LABEL_0', 1: 'LABEL_1'}

Model Card: https://huggingface.co/openai-community/gpt2



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Input:


"I absolutely love this new phone! It's fast and the camera is amazing."


Generated Text: 


"I absolutely love this new phone! It's fast and the camera is amazing. I need to buy a third one for this thing I haven't got around to using already! Thanks, and don't forget it also makes me smile when I am reading"


---------------------------

Input:


'The service at the restaurant was terrible, and the food was cold when it arrived.'


Generated Text: 


"The service at the restaurant was terrible, and the food was cold when it arrived. The service is very nice at all locations. The waiter was pretty nice and happy to talk to me, and I don't think he gave me any questions. The"


---------------------------

Input:


"I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable."


Generated Text: 


"I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable. For one thing, director Dan Jurgens did a really good job of putting a good cast on it, without sacrificing plot depth. There are a"


---------------------------

Input:


'Winning the competition was the best experience of my life!'


Generated Text: 


'Winning the competition was the best experience of my life!\n\n-Frosty\n\n\nBest of luck, F\n\nIn the end, my challenge was actually very simple: find a team with good, decent players that play together ('


---------------------------

Input:


"I can't believe how frustrating this software update is; nothing works properly now."


Generated Text: 


"I can't believe how frustrating this software update is; nothing works properly now.\n\n4.0.9 + improvements.\n\n- Updated for iOS 8.3+, fix for some issues with a couple of other add-ons.\n"


---------------------------


In [None]:
# Example: Default Text Generation Pipeline, But Specify the Model
# Classifies text

# Create a Default 'text-generation'
tg_pipeline = pipeline(
    task='text-generation',
    model='distilgpt2'
)

# 'sentiment-analysis' is an alias for 'text-classification'
print('\nMetadata:')
display(get_task_metadata('text-generation', 'pt'))
print()


# Get model configuration
model_config = tg_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "I absolutely love this new phone! It's fast and the camera is amazing.",
    "The service at the restaurant was terrible, and the food was cold when it arrived.",
    "I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable.",
    "Winning the competition was the best experience of my life!",
    "I can't believe how frustrating this software update is; nothing works properly now."
]

# Execute the text generation model
results = tg_pipeline(text_inputs=inputs)

# Formate the resuls
results = [x[0]['generated_text'] for x in results]

# Create a table of inputs and results
df = pd.DataFrame(results, columns=['results'])
df['inputs'] = inputs

for i, r in df.iterrows():
    print(f'\nInput:')
    display(r["inputs"])
    print(f'\nGenerated Text: ')
    display(r["results"])
    print('\n---------------------------')

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



Metadata:


{'task_name': 'text-generation',
 'model_name': 'openai-community/gpt2',
 'model_revision': '607a30d',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.text_generation.TextGenerationPipeline}

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Class Labels: {0: 'LABEL_0'}

Model Card: https://huggingface.co/distilgpt2



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Input:


"I absolutely love this new phone! It's fast and the camera is amazing."


Generated Text: 


"I absolutely love this new phone! It's fast and the camera is amazing. It's gorgeous. I like that it's waterproof, the size is decent, the size has a lot of features compared to that of the previous Xperia Z10 and other"


---------------------------

Input:


'The service at the restaurant was terrible, and the food was cold when it arrived.'


Generated Text: 


'The service at the restaurant was terrible, and the food was cold when it arrived. I thought the wait was very long. After a few hours, all I could think of was a second or so. However, I kept going in and my food'


---------------------------

Input:


"I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable."


Generated Text: 


"I'm feeling pretty neutral about the movie—it had some good moments but was mostly forgettable. There was no really big moments in the movie. They would all just have been good so they wouldn't have had to fight back in the movie,"


---------------------------

Input:


'Winning the competition was the best experience of my life!'


Generated Text: 


"Winning the competition was the best experience of my life!\nI have an obsession with this book, a love of reading, it's about reading, I hate that! I love it.\nBut not for the faint of heart and those who"


---------------------------

Input:


"I can't believe how frustrating this software update is; nothing works properly now."


Generated Text: 


"I can't believe how frustrating this software update is; nothing works properly now.\n\nThe update brings the most critical bug report to our attention for this day at 7:30 p.m. Eastern.\nHere's where it all went wrong"


---------------------------


In [None]:
# Example: Default Fill Mask Pipeline
# Classifies text

# Create a Default 'text-classification'
fm_pipeline = pipeline(
    task='fill-mask',
)

# 'sentiment-analysis' is an alias for 'text-classification'
print('\nMetadata:')
display(get_task_metadata('fill-mask', 'pt'))
print()


# Get model configuration
model_config = fm_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "I absolutely love this <mask>! It's fast and the camera is <mask>.",
    "The service at the restaurant was <mask>, and the food was <mask> when it arrived.",
    "I'm feeling pretty <mask> about the movie—it had some good moments but was mostly <mask>.",
    "Winning the <mask> was the best experience of my life!",
    "I can't believe how frustrating this <mask> update is; nothing works properly now."
]


# Execute the mask fill model
results = fm_pipeline(
    inputs=inputs,
    top_k=2
)

# Format # Print the results
print(json.dumps(convert_np_types(results), indent=4))

No model was supplied, defaulted to distilbert/distilroberta-base and revision fb53ab8 (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu



Metadata:


{'task_name': 'fill-mask',
 'model_name': 'distilbert/distilroberta-base',
 'model_revision': 'fb53ab8',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForMaskedLM'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.fill_mask.FillMaskPipeline}


Class Labels: {0: 'LABEL_0', 1: 'LABEL_1'}

Model Card: https://huggingface.co/distilbert/distilroberta-base

[
    [
        [
            {
                "score": 0.3331633508205414,
                "token": 2280,
                "token_str": " camera",
                "sequence": "<s>I absolutely love this camera! It's fast and the camera is<mask>.</s>"
            },
            {
                "score": 0.03793039172887802,
                "token": 1028,
                "token_str": " phone",
                "sequence": "<s>I absolutely love this phone! It's fast and the camera is<mask>.</s>"
            }
        ],
        [
            {
                "score": 0.12083110958337784,
                "token": 12058,
                "token_str": " gorgeous",
                "sequence": "<s>I absolutely love this<mask>! It's fast and the camera is gorgeous.</s>"
            },
            {
                "score": 0.11521915346384048,
                "token": 4406,
           

In [None]:
# Example: Default Named Entity Recognition
# Classifies tokens
# NOTE: 'ner' is an alias for 'token-classification'

# Create a Default 'ner' pipeline
ner_pipeline = pipeline(
    task='ner',
)

# 'sentiment-analysis' is an alias for 'text-classification'
print('\nMetadata:')
display(get_task_metadata('token-classification', 'pt'))
print()


# Get model configuration
model_config = ner_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "The Titanic sank in the Atlantic Ocean in 1912.",
    "Pablo Picasso was a Spanish painter and sculptor known for co-founding Cubism.",
    "J.K. Rowling wrote the Harry Potter series, starting with 'Harry Potter and the Sorcerer’s Stone'.",
    "The Great Wall of China is one of the Seven Wonders of the World.",
    "The Oscars, also known as the Academy Awards, recognize excellence in the film industry.",

]


# Execute the token classification model
results = ner_pipeline(
    inputs=inputs,
    grouped_entities=True # Re-assembles "Pablo" & "Pacaso" tokens
)

# Format # Print the results
print(json.dumps(convert_np_types(results), indent=4))

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu



Metadata:


{'task_name': 'token-classification',
 'model_name': 'dbmdz/bert-large-cased-finetuned-conll03-english',
 'model_revision': '4c53496',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForTokenClassification'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.token_classification.TokenClassificationPipeline}


Class Labels: {0: 'O', 1: 'B-MISC', 2: 'I-MISC', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}

Model Card: https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english

[
    [
        {
            "entity_group": "MISC",
            "score": 0.8621680736541748,
            "word": "Titanic",
            "start": 4,
            "end": 11
        },
        {
            "entity_group": "LOC",
            "score": 0.9976879358291626,
            "word": "Atlantic Ocean",
            "start": 24,
            "end": 38
        }
    ],
    [
        {
            "entity_group": "PER",
            "score": 0.9983973503112793,
            "word": "Pablo Picasso",
            "start": 0,
            "end": 13
        },
        {
            "entity_group": "MISC",
            "score": 0.9976692795753479,
            "word": "Spanish",
            "start": 20,
            "end": 27
        },
        {
            "entity_group": "MISC",
            "

In [None]:
# Example: Default Summarization Pipeline
# Classifies text

# Create a Default 'summarization'
sum_pipeline = pipeline(
    task='summarization',
)

print('\nMetadata:')
display(get_task_metadata('summarization', 'pt'))
print()


# Get model configuration
model_config = sum_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = '''Traditional hand tool woodworking is a craft that emphasizes
precision, skill, and patience. Unlike modern power tools, hand tools such as
chisels, hand planes, and saws allow artisans to shape wood with a high level
of control. This method of woodworking has been practiced for centuries, relying
 on techniques passed down through generations to create furniture, cabinetry,
 and intricate wood carvings.

One of the key benefits of working with hand tools is the ability to produce
fine, detailed joinery without the noise and dust associated with power tools.
Techniques like dovetail joints, mortise and tenon connections, and hand-planed
surfaces showcase the craftsmanship involved. Woodworkers often select tools
based on the type of wood they are working with, ensuring smooth cuts and
precise fittings.

In addition to its practical advantages, hand tool woodworking fosters a deep
connection between the artisan and the material. The rhythmic motion of a hand
saw or the delicate shaving of a plane allows for a meditative experience that
many woodworkers find rewarding. As more people seek sustainable and traditional
 crafting methods, hand tool woodworking continues to thrive, preserving
 historical skills while promoting an appreciation for high-quality, handmade
 pieces.
'''


# Execute the text generation model
results = sum_pipeline(inputs)
results = results[0]['summary_text']

# Format # Print the
print('Result:')
display(HTML(f"<pre style='white-space: pre-wrap; word-wrap: break-word;'>{results}</pre>"))

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu



Metadata:


{'task_name': 'summarization',
 'model_name': 'sshleifer/distilbart-cnn-12-6',
 'model_revision': 'a4f8f3e',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.text2text_generation.SummarizationPipeline}


Class Labels: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}

Model Card: https://huggingface.co/sshleifer/distilbart-cnn-12-6

Result:


In [None]:
# Observation: The summary is ALMOST there, but only makes sense under great effort
# but the reader.

In [None]:
# Example: Default Question Answering
# Classifies tokens


# Create a Default 'question-answering' pipeline
qa_pipeline = pipeline(
    task='question-answering'
)

print('\nMetadata:')
display(get_task_metadata('question-answering', 'pt'))
print()


# Get model configuration
model_config = qa_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

# Use the three paragraphs about handtool woodworking defined above.
context = inputs

question = "What are some tools used in hand tool woodworking?"

# Execute the text generation model
results = qa_pipeline(
    question=question,
    context=context
)

# Format # Print the results
print(json.dumps(convert_np_types(results), indent=4))

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu



Metadata:


{'task_name': 'question-answering',
 'model_name': 'distilbert/distilbert-base-cased-distilled-squad',
 'model_revision': '564e9b5',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForQuestionAnswering'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.question_answering.QuestionAnsweringPipeline}


Class Labels: {0: 'LABEL_0', 1: 'LABEL_1'}

Model Card: https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad

{
    "score": 0.9732940793037415,
    "start": 141,
    "end": 171,
    "answer": "chisels, hand planes, and saws"
}


In [None]:
# Example: Default Translation, with specific model
# Classifies tokens


# Create a Default 'translation' pipeline
tr_pipeline = pipeline(
    task='translation',
    model="Helsinki-NLP/opus-mt-fr-en"
)

print('\nMetadata:')
display(get_task_metadata('translation', 'pt'))
print()


# Get model configuration
model_config = tr_pipeline.model.config
print(f'Class Labels: {model_config.id2label}')
print()
print(f'Model Card: https://huggingface.co/{model_config._name_or_path}')
print()

inputs = [
    "Bonjour, comment allez-vous ?",
    "Il fait beau aujourd'hui.",
    "Je voudrais un café, s'il vous plaît.",
    "Où est la gare ?",
    "Merci, au revoir."
]

'''English Reference

    "Hello, how are you?",
    "The weather is nice today.",
    "I would like a coffee, please.",
    "Where is the train station?",
    "Thank you, goodbye."
'''


# Execute the text generation model
results = tr_pipeline(inputs)

# Format # Print the results
print(json.dumps(convert_np_types(results), indent=4))

Device set to use cpu



Metadata:


{'task_name': 'translation',
 'model_name': 'google-t5/t5-base',
 'model_revision': 'a9723ea',
 'model_class': "<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>",
 'model_type': 'text',
 'model_impl': transformers.pipelines.text2text_generation.TranslationPipeline}


Class Labels: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}

Model Card: https://huggingface.co/Helsinki-NLP/opus-mt-fr-en

[
    {
        "translation_text": "Hello, how are you?"
    },
    {
        "translation_text": "It's beautiful today."
    },
    {
        "translation_text": "I'd like some coffee, please."
    },
    {
        "translation_text": "Where's the station?"
    },
    {
        "translation_text": "Thank you, bye."
    }
]
