In [1]:
!pip install torch
!pip install transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
from transformers import pipeline
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# You can also use this section to suppress warnings generated by your code:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

# Text classification with DistiBERT

## Load the model and tokenizer

In [4]:
# Load the tokenizer and model

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

## Preprocess the input text

In [5]:
# Sample text
text = "Congratulations! You've won a free ticket to the Bahamas. Reply WIN to claim."

# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt")

print(inputs)

{'input_ids': tensor([[  101, 23156,   999,  2017,  1005,  2310,  2180,  1037,  2489,  7281,
          2000,  1996, 17094,  1012,  7514,  2663,  2000,  4366,  1012,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


### Perform inference

In [6]:
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

In [7]:
#model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])

### Get the logits

In [8]:
logits = outputs.logits
logits.shape

torch.Size([1, 2])

## Post-process the ouput

Convert the logits to probabilities and get the predicted class:

In [9]:
# Convert logits to probabilities
probs = torch.softmax(logits, dim=-1)

# Get the predicted class
predicted_class = torch.argmax(probs, dim=-1)

# Map the predicted class to the label
labels = ["NEGATIVE", "POSITIVE"]
predicted_label = labels[predicted_class]

print(f"Predicted label: {predicted_label}")

Predicted label: POSITIVE


# Text generation with GPT-2

## Load tokenizer

Load the pretrained GPT-2 tokenizer.  The tokenizer is responsible for converting text into tokens that the model can understanc.

In [10]:
# Load the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [11]:
# Load the tokenizer and model

model = GPT2LMHeadModel.from_pretrained("gpt2")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

## Preprocess the input text

Tokenize the input text and convert it to a format suitable for the. odel, like before you have the token indexes, i.e., inputs.

In [12]:
# Prompt
prompt = "Once upon a time"

# Tokenize the input text
inputs = tokenizer(prompt, return_tensors="pt")
inputs

{'input_ids': tensor([[7454, 2402,  257,  640]]), 'attention_mask': tensor([[1, 1, 1, 1]])}

## Perform inference

Generate text using the model

inputs: Input token IDs from the tokenizer

attention_mask: Mask indicating which tokens to attend to

pad_token_id:Padding token ID set to the end-of-sequence token ID

max_length: Maximum length of the generated sequences

num_return_sequence: Number of sequences to generate

In [13]:
# Generate text
output_ids = model.generate(
    inputs.input_ids,
    attention_mask=inputs.attention_mask,
    pad_token_id=tokenizer.eos_token_id,
    max_length=50,
    num_return_sequences=1
)

output_ids

tensor([[7454, 2402,  257,  640,   11,  262,  995,  373,  257, 1295,  286, 1049,
         8737,  290, 1049, 3514,   13,  383,  995,  373,  257, 1295,  286, 1049,
         3514,   11,  290,  262,  995,  373,  257, 1295,  286, 1049, 3514,   13,
          383,  995,  373,  257, 1295,  286, 1049, 3514,   11,  290,  262,  995,
          373,  257]])

## Post-process the output

Decode the generated tokens to get the text:

In [14]:
# Decode the generated text
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(generated_text)

Once upon a time, the world was a place of great beauty and great danger. The world was a place of great danger, and the world was a place of great danger. The world was a place of great danger, and the world was a


# Hugging Face pipeline() function

The pipeline() function from the Hugging Face transformers library is a high-level API designed to simplify the usage of pretrined models for various natural language (NLP) tasks.  It abstracts the complexities of model loading, tokenization, inference, and post-processing, allowing users to perform complex NLP tasks with just a few lines of code.

## Definition

transformers.pipeline(
    task: str,
    model: Optional = None,
    config: Optional = None,
    tokenizer: Optional = None,
    feature_extractor: Optional = None,
    framework: Optional = None,
    revision: str = 'main',
    use_fast: bool = True,
    model_kwargs: Dict[str, Any] = None,
    **kwargs
)

## Parameters

task: str

The task to perform, such as "text-classifiction" "text-generation", "question-answering", erc.
Example: "text-classification"

model: Optional

The model to use.  This can be a string (model indentifier from Hugging Face model hub), a path to a directory containing model files, pr a pre-loaded model instance.
Example: "distilbert-base-uncased-finetuned-sst-w-englist"

config: Optional

The configutration to use.  This can be a string, a path to a directory, or a pre-loaded config object.  
Example: {"output-attentions": True}

tokenize: Optional

The tokenizer to use.  This can be a string, a path to a dictionary, or a pre-loaded tokenize instance.
Example:  "bert-base-uncased"

feature_extractor: Optional

The feature extractor to use for tasks that require it (e.g., image processing)
Example:  "facebook/detectron2"

frmaework: Optional

The framework to use, either "pt" for PyTorch or "rf" for TensorFlow.  If not specified, it will be infered.
Example:  "pt"

revision: str, default 'main'

The specific modle version to use (branch, tag, or commit hash).
Example:  "v1.0"

model_kwargs:  Dict[str, Any], default None

additional keyword arguments passed to the model during in itialization.
Example:  {"output_hidden_states": True}

kwargs:  Any

Additional keyword arguments passed to the pipeline components.


## Task types

The pipeline() function supports a wide range of NLP tasks.  Here are some of the common tasks:

1. Text Classification:  text-classification

Purpose:  Classifytext into predefined categories.
Use Cases:  Sentiment analysis, spam detection, topic classification.

2. Text Generation:  text-generation

Purpose:  Generate coherent text based on agiven prompt.
Use cases:  Crative writing, dialogue generation, story completion.

3. Question Answering:  question-answering

Purpose:  Answer questions based on a given context.
Use cases:  Building Q&A systems, information retrieval from documents.

4. Name Entity Recognization (NER): ner (or token-cdlassification)

Purpose:  Identify and classify names entities (like people, organizations, locations) in text.
Use cases:  Extract structured information from unstructured text.

5. Summarization:  summarization

Purpose:  Summarize long pieces of text into shorter, coherent summaries.
Use cases: Document summarization, news summarization.

6. Translation:  translation_xx_to_yy (e.g., translation_en_to_fr)

Purpose:  Translate text from one language to another.
Use cases:  Language translation, multilingual applications.

7. Fill-Mask:  fill-mask

Purpose:  Predict masked words in a sentence (useful for masked language modeling).
use cases:  Language modeling tasks, understanding model predictions.

8. Zero-Shot Classification:  Zero-shot-classification

Purpose:  Classify text into categories without needing training data for those categories.
Use Cases:  Flexible and adaptable classification tasks.

9. Feature Extraction:  feature-extraction

Purpose:  Extract hiddent state features from text.
Use Cases:  Downstream tasks requiring text representaions, such as clustering, similarity, or further custom model training.

## Eample 1:  Text classification using pipeline()

In this example, you will use the pipeline() function to perform text classification.  You will load a pretrained text classification model and use it to classify a sample text.

### Load the text classification model:

We initialize the pipeline for the text-classification task, specifying the model "distibert-base-uncased-finetuned-sst-2-english".  This m odel is fine-tuned for sentiment analysis.

### Classify the sample text:

We use the classifier to classify a sample text: "Congratulations! You've wond a free ticket to the Bahamas.  Reply WIN to claim."  The classifier function returns the classification result, which is then printed.

In [15]:
# Load a general text classification model
classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

# Classify a sample text
result = classifier("Congratulations! You've won a free ticket to the Bahamas. Reply WIN to claim.")
print(result)

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9997586607933044}]


### Output

The output will be a list of dictionaries, where each dictionary contains:

Label:  The predicted label (e.g., "POSITIVE" or "NEGATIVE").
score:  The confidence score for the prediction.

## Example 2: Language detection using pipeline()

I this example, you will use the pipeline() function to perform language detection.  You will load a pretrained language detection model and use it to identify the language of a sample text.

### Load the language detection model:

We initialize the pipeline for the text-classification task, specifying the m odel "papluca/xlm-roberta-base-language-detection".  This model is fine-tuned for language detection.

### Classify the sample tex:

We use the classifier to detect the language of a sample text: "Bonjour, commnet ca va?"  The classifier function returns the classification result, which is then printed.

In [16]:
from transformers import pipeline

classifier = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
result = classifier("Bonjour, comment ça va?")
print(result)

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


[{'label': 'fr', 'score': 0.9934879541397095}]


## Example 3: Text generation using pipeline()

In this example, you will use the pipeline() function to perform text generation.  You will load a pretrained text generation model and use it to generate text based on a given prompt.

### Initialize the text generation model:

We initializa the pipeline for the text-generation task, specifying the model "gpt2".  GPT-2 is well-known model for text generation taskes.

In [17]:
# Initialize the text generation pipeline with GPT-2
generator = pipeline("text-generation", model="gpt2")

Device set to use cpu


### Generate text based on a given prompt:

We use the generator to generate text based on a prompt: "Once upon a time".  Let's specify max-length=50, truncation=True to limit the gnerated text to 50 tokens and num_return_sequences=1 to generate one sequence.  The generator function  returns the generated text, which is then printed.

In [18]:
# Generate text based on a given prompt
prompt = "Once upon a time"
result = generator(prompt, max_length=50, num_return_sequences=1, truncation=True)

# Print the generated text
print(result[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time when she was working so hard to get a job, she felt that her voice carried some weight. She wasn't ready for work, but didn't think she was quite ready. "Thank you," she says, and walks out


In [19]:
print(result)

[{'generated_text': 'Once upon a time when she was working so hard to get a job, she felt that her voice carried some weight. She wasn\'t ready for work, but didn\'t think she was quite ready. "Thank you," she says, and walks out'}]


## Example 4: Text genertion using T5 with pipeline()

In this example, you will use the pipeline() function to perform text-to-text generation with the T5 model.  You will load a pretrained T5 model and use it to translate a sentence from Englist to French based on a given prompt.

### Initialize the text generation model:

We initialize the pipeline for the 'text2text-generation task, specifying the model 't5-small".  T5 is a versatile model that can perform various test-to-text generation tasks, including translation.

In [20]:
# Initialize the text generation pipeline with T5
generator = pipeline("text2text-generation", model="t5-small")

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


### Generate text based on a given prompt:

We use the generator to translate a sentence from English to French based on the prompt.  "transalte English to French:  How are you?".  Let's specify max_lenght=50 to limit the generated text to 50 tokens and num_return_sequence=1 to generate one sequence.  The generator function returns the tranlated text, which is then printed.

In [23]:
# Generate text based on agiven prompt
prompt = "tranlate Engolist to French: How are you?"
result = generator(prompt, max_length=50, num_return_sequences=1)

# Print the generated text
print(result[0]['generated_text'])

Comment êtes-vous?


# Exercise:  Fill-mask task using BERT with pipeline()

In this exercise, you will use the pipeline() function to perform a fill-mask task using the BERT model.  You will load a pretrained BERT model and use it to predict the masked word in a given sentence.

### Instructions

1. Initialize the fill-mask pipeline with BERT model.
2. Create a prompt with a masked token.
3. Generate text by filling in the masked token.
4. Print the generated text with the predictions.


In [26]:
# Initialize the fill-mask pipeline with BERT
generator = pipeline("fill-mask", model="bert-base-uncased")

# Generate text by filling in the masked token
prompt = "The capital of France is [MASK]."
result = generator(prompt)

# Print the generate text
print(result)

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu


[{'score': 0.4167894423007965, 'token': 3000, 'token_str': 'paris', 'sequence': 'the capital of france is paris.'}, {'score': 0.07141634821891785, 'token': 22479, 'token_str': 'lille', 'sequence': 'the capital of france is lille.'}, {'score': 0.06339266151189804, 'token': 10241, 'token_str': 'lyon', 'sequence': 'the capital of france is lyon.'}, {'score': 0.04444744810461998, 'token': 16766, 'token_str': 'marseille', 'sequence': 'the capital of france is marseille.'}, {'score': 0.030297260731458664, 'token': 7562, 'token_str': 'tours', 'sequence': 'the capital of france is tours.'}]
