In [None]:
!pip install transformers[sentencepiece]

In [None]:
from transformers import pipeline


#The transformers Python library is developed and maintained by Hugging Face.

It provides:

Pre-trained models for NLP, CV, and audio tasks (BERT, GPT, T5, BART, MarianMT, etc.)

Easy pipelines for common tasks like text generation, summarization, translation, and classification

Tools to fine-tune models on your own datasets

Tokenizers to convert text into model-friendly input


Examples of Text Pipeline

In [None]:
#Sentiment analysis
sentiment_class = pipeline("sentiment-analysis")
sentiment_class("I feel like i am on top of the world")

In [None]:
sentiment_class(["I am neither too happy nor too sad","i have mixed feelings about not getting the job","Roses are red, violets are blue"])

In [None]:
#Text Generation
#model not fine tuned so it's not giving proper responses. Also, The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text.
#num_return_sequences -> tells the model how many output sequences are to be returned for the same input prompt
#max_length -> max_length counts tokens, not characters or words. max_length = maximum output size (prompt + generated)
#max_new_tokens → counts only the newly generated tokens, ignoring the input length.
text_gen = pipeline("text-generation", model="gpt2-medium")
result = text_gen("give me some inspiration", temperature=0.7, max_new_tokens =100)
print(result[0]['generated_text'])
result2 = text_gen("My world is beautiful because", temperature=0.5, num_return_sequences=3, max_length=100)
print("2nd output")
#print(result2[0]['generated_text'])
print(result2)

In [None]:
#Text classification - classify texts into predefined categories

text_classify = pipeline("text-classification")
text_classify("If unicorns existed in this world, would the world be a better place?")

In [None]:
#Summarization - Create a shorter version of a text while preserving key information
#can use max_length or min_length parameter
summarize = pipeline("summarization",model="facebook/bart-large-cnn")
#Triple quotes let you write a string that spans multiple lines without using \n.
text = """Mr and Mrs. Dursley, of number four, Privet Drive, were
proud to say that they were perfectly normal, thank
you very much. They were the last people you’d expect to be involved in anything strange or mysterious, because they just didn’t
hold with such nonsense.
Mr. Dursley was the director of a firm called Grunnings, which
made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin
and blonde and had nearly twice the usual amount of neck, which
came in very useful as she spent so much of her time craning over
garden fences, spying on the neighbors. The Dursleys had a small
son called Dudley and in their opinion there was no finer boy anywhere.
The Dursleys had everything they wanted, but they also had a
secret, and their greatest fear was that somebody would discover it.
They didn’t think they could bear it if anyone found out about the
Potters. Mrs. Potter was Mrs. Dursley’s sister, but they hadn’t met
for several years; in fact, Mrs. Dursley pretended she didn’t have a
sister, because her sister and her good-for-nothing husband were as
unDursleyish as it was possible to be. The Dursleys shuddered to
think what the neighbors would say if the Potters arrived in the
street. The Dursleys knew that the Potters had a small son, too, but
they had never even seen him. This boy was another good reason
for keeping the Potters away; they didn’t want Dudley mixing with
a child like that. """

summary1 = summarize(text, do_sample=True, max_length = 100) #do_sample=True → allows randomness
print(summary1)
summary2 = summarize(text, do_sample=True, min_length = 50, max_length = 50) #temperature doesn’t have much impact if the model produces a short, factual, single-best summary. It matters more in creative generation tasks (storytelling, text generation, dialog).
print(summary2[0]['summary_text'])


In [None]:
#Translation - translate text from one language to another
#english to hindi translation
translate = pipeline('translation', model='Helsinki-NLP/opus-mt-en-hi') #model for english to hindi translation
trans_ans = translate("I tried so hard and got so far, but in the end, it doesn't even matter")
print(trans_ans[0]['translation_text'])
trans_ans2 = translate("AI is transforming the world and making life easier for humans.")
print(trans_ans2[0]['translation_text'])

In [None]:
#Zero shot classification - Classify text without prior training on specific labels - it allows you to specify which labels to use for the classification, so you don’t have to rely on the labels of the pretrained model.
zero_shot = pipeline("zero-shot-classification")
zclassifier = zero_shot("Don't know where the world is heading and where our future stands", candidate_labels =["eductaion","politics","philosophy"])
print(zclassifier)
zclassifier2 = zero_shot("I would like to know about the world's best leaders",candidate_labels =["eductaion","politics","philosophy"])
print(zclassifier2)

In [None]:
#feature-extraction: Extract vector representations of text
feature_ex = pipeline("feature-extraction")
features = feature_ex("All is well that ends well")
print(len(features), len(features[0]), len(features[0][0])) #batch_size → number of input sentences, sequence_length → number of tokens in the sentence, hidden_size → vector size (for BERT-base = 768)
print(features)

In [None]:
#Question-answering - The question-answering pipeline answers questions using information from a given context:
#this pipeline works by extracting information from the provided context; it does not generate the answer.

#Using a model from Hugging Face Model Hub

ques_ans = pipeline("question-answering", model="deepset/roberta-base-squad2")
ques_ans(question = "How can a person try to succeed in life?", context = "Hard work is necessary if you want to achieve something and consistency is as much important")

In [None]:
#Mask Filling - The idea of this task is to fill in the blanks in a given text

#top_k -> At each step, the model predicts a probability distribution over the vocabulary (say 50,000 words).top_k=N → keep only the top N highest-probability tokens, set the rest to zero, and sample from those. Good for controlled creativity
#top_p -> Instead of picking a fixed number of tokens, top_p picks the smallest set of tokens whose cumulative probability ≥ p. Example with top_p=0.9:
mask_f = pipeline("fill-mask")
m1 = mask_f("The capital of France is <mask>", top_k = 2)
print(m1)
m2 = mask_f("My name is <mask> and I am a <mask> ", top_k=5)
print(m2)

In [None]:
?pipeline


**Image pipeline examples**

In [None]:
#Image Classification
image_class = pipeline("image-classification")
image_class("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
#

In [None]:
image_class2 = pipeline("image-classification", model="Falconsai/nsfw_image_detection") #NSFW -> not safe for work - two labels - normal and NSFW
image_ans1 = image_class2("https://datasets-server.huggingface.co/assets/ethz/food101/--/e06acf2a88084f04bce4d4a525165d68e0a36c38/--/default/train/3/image/image.jpg?Expires=1757798333&Signature=Tbr00-PbwBOFNFyut~I1H75FM1qUpxTYrlR~~hOdqCuql4M6wxnqiXpyVwRE5MoYC5y0AhVfxfnd-enE012xEgQbbIDjtRVqGsYk0VwBb7P7ze2i6Z34gef0zGwYtYdTOv1N1ushprDPKcDgsPGemauB8Yn9s6cm48~-dwVzldF6T1Ro2FNfN8nmBVfdxn~NqpqQwNt3vgbwC-M1q1MAHAymJiO-66k1Db-j4CEl2deTYGXdotggeOv1TOGnBxmuWCPejczNEMi5Z4GIWhCjRIKiYfUENKb-H2I06v3t79k5fWpBqpHIAwciz30eSRQuX~3XiVdZ2xepUMbrVaubEQ__&Key-Pair-Id=K3EI6M078Z3AC3")
image_ans2 = image_class2("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
print(image_ans1)
print(image_ans2)

In [None]:
#Image-to-text
image_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
image_text("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")

In [None]:
image_text("https://datasets-server.huggingface.co/assets/HuggingFaceM4/the_cauldron/--/847a98a779b1652d65111daf20c972dfcd333605/--/ai2d/train/0/images/image-1d100e9.jpg?Expires=1757800004&Signature=mUysdwqTcH9jvcc0Wi~KMjKKsHrkyhbesAIuzrEgYGJH~A8ATU6DT9BLZgKRYKh7324OWzfhNu~ruWDc~Fa5lPBcShK3rUmArWZNhkWSdNBmCvIPEAzI13J3ChhkpZqPWW1v6nrPcbCfuvG8UE~QTK2kdNBgtvRNS1c5guQQMwyYUr3ESkjk0P6mH5az1LBNcP6HO75cUoLIf5hE~Y~6jSlBmqhrOESvujtip~IoCPwdmWN1DGCcXFz-ctqv5iN6nigkjm2K1uF0KpHJKfv58-q1xWq~7py8RzTsNG55HyBCJT2xExLpz3Z1iEALoKWH5cxspJkSH9bJTFoOEt-uNw__&Key-Pair-Id=K3EI6M078Z3AC3")

In [None]:
#Object Detection
object_detect = pipeline("object-detection")
object_detect("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")

In [None]:
object_detect("https://datasets-server.huggingface.co/assets/ethz/food101/--/e06acf2a88084f04bce4d4a525165d68e0a36c38/--/default/train/17/image/image.jpg?Expires=1757798333&Signature=xFZsuos84R742y9MS8pXhk~jhlnbypQH8~2fuzpRj~Dc8k0YG-aNUwGUZaMC8Wzfcc3WbCeYO4ZKwjdiYpVc~1Td3olzfhm-Fn1dTUH9PXTZVNvoDvwk0mLDO41GJ9lyKsKn8Eu-eEGoZjtUW0D1odMfUQK0ARUBbpGvOTgtZY5ogsRtaYTRnKvtta9gm28B4MHnW9VHJDgiX~T1~XExz9dKykmFA7pK7WrP9jjLYio-8WtNaR4a5mlh5wXCj7v3Tc8lNcTQQd3zRdBrWyEXbJOMfh5NAk1oQ0l1yd7NoXxiK-S-V4L0hBlmdATOkMTNPQGwd1bWKWBkyWo8BkOTPw__&Key-Pair-Id=K3EI6M078Z3AC3")


**Audio Pipeline examples**

In [None]:
#automatic-speech-recognition: Convert speech to text
speech_rec = pipeline("automatic-speech-recognition")
speech_rec("https://datasets-server.huggingface.co/cached-assets/Nexdata/American_English_Natural_Dialogue_Speech_Data/--/7a6a0ec7674ac88e80eb6ca30e9108e2b03453a6/--/default/train/1/audio/audio.wav?Expires=1757802806&Signature=Mm21GmLkPC0vDPu5PNkuLaRCMMMz3P5YSs2hOT5ZGLqoAgJ5eJKzNyAU-0mI9NYzBVjPvXlNWUzP~grMRgQn9acY~k86YbIUc-UyhKgb-uQSB0zhxmqxvQj5oS2MBBJDihRHCICvUkRL99gp6dbo6QmmK23blyxXQLO6-97ub790R5QESYUvtAq2MIMUETkGc1cnTCgnYdEHIm1JDBnapiO15dapVIs604igmL7hc1JtQKUVu4JJNWE5BVdmWYe66CFKw4ar3X9XJmJl9i7tUKKbI9oLE9To9yWbdiGlQvyTmbPMYKwGuQY-QYNp1eAbLNTV7Ggxey687dGQMbyMXA__&Key-Pair-Id=K3EI6M078Z3AC3")

In [None]:
#audio-classification - classify audio into categories
audio_class = pipeline("audio-classification")
audio_class("https://datasets-server.huggingface.co/cached-assets/Nexdata/American_English_Natural_Dialogue_Speech_Data/--/7a6a0ec7674ac88e80eb6ca30e9108e2b03453a6/--/default/train/0/audio/audio.wav?Expires=1757802806&Signature=RcBS4VWLR5t0y4oEMbHT-NyK3agwBv7BPGO~kzQTEofFiU98zHYSaISQcpCeQXZUYOquW7S0wT3R6z8Ll892JgGvzhVL70oZhL9ExrmbHEKfzgCLvuGtm0s8fvmuCdCC~oKgv8pMP67P-4x5hhFsKEu5s9DjUx173jBery0ov5oNVDrCA~9w-F7nsoCHgE-UlkNZaAEmo0MQ9e5p2gJvHhvrpYsiPBclZy-GozB0XhJTaMOnz6lzRJZUS6TQ5KYEhQkrE8WQGleCRaglaJeyZ9xfy13WiCxauwM-FSzAV8LxqKw4oRBjx3tS77FvcFiU~A9~0J~xlc2~ex9JudXd1g__&Key-Pair-Id=K3EI6M078Z3AC3")

In [None]:
#text-to-speech - convert text to spoken audio
text_speech = pipeline("text-to-speech")
text_speech("I am having a wonderful day here")