In [1]:
# Import the necessary modules from the transformers and torch libraries
from transformers import pipeline  # For using Hugging Face models easily
import torch  # To manage the model data types and GPU handling
import gc  # To manually free up memory


In [3]:
# Load the NLLB translation model from the Hugging Face Model Hub.
# Use bfloat16 for GPU (if available) and float32 for CPU.
translator = pipeline(
    task="translation", 
    model="facebook/nllb-200-distilled-600M", 
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)

In [5]:
# Define the input text for translation
text = """\
My puppy is adorable, \
Your kitten is cute.
Her panda is friendly.
His llama is thoughtful. \
We all have nice pets!"""

# Perform translation from English to French using the correct language codes
text_translated = translator(text, src_lang="eng_Latn", tgt_lang="fra_Latn")

# Output the translated text
print("Translated text:", text_translated)


Translated text: [{'translation_text': 'Mon chiot est adorable, ton chaton est mignon, son panda est ami, son lama est attentionné, nous avons tous de beaux animaux de compagnie.'}]


In [7]:
# Clean up resources by deleting the translator model to free up memory
del translator

# Manually clear the GPU cache if applicable
torch.cuda.empty_cache()

# Trigger garbage collection to free up any remaining memory
gc.collect()


35

In [9]:
# Load the BART summarization model from the Hugging Face Model Hub.
# Use bfloat16 for GPU (if available) and float32 for CPU.
summarizer = pipeline(
    task="summarization", 
    model="facebook/bart-large-cnn", 
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [11]:
# Clean up resources by deleting the summarizer model to free up memory
del summarizer

# Manually clear the GPU cache if applicable
torch.cuda.empty_cache()

# Trigger garbage collection to free up any remaining memory
gc.collect()


17