In [None]:
!pip install transformers

In [3]:
from transformers import pipeline

In [None]:
#-------------------------------------------------------#
#                NLP TASKS                              #
#-------------------------------------------------------#

'''
1. Text Classification: Assigning a category to a piece of text
Sentiment analysis
Spam detection
'''

classifier = pipeline("text-classification")

'''
2. Token classification: Assigning a label to each token in a sequence
Named entity recognition (NER)
Part-of-speech tagging
'''

token_classifier = pipeline("token-classification")

'''
3. Question answering: Answering a question based on a context
'''

question_answerer = pipeline("question-answering")


'''
4. Text generation: Generating new text based on a given prompt
Language modeling
Story generation
'''

generator = pipeline("text-generation")


'''
5. Summarization: Generating a concise summary of a longer text
'''

summarizer = pipeline("summarization")


'''
5. Translation: Converting text from one language to another
'''

translator = pipeline("translation",model="Helsinki-NLP/opus-mt-en-fr")


'''
6. Text2Text generation: Generating new text based on a given prompt, General purpose text summarization including
translation and summarization
'''

text2text_generator = pipeline("text2text-generation")


'''
7. Fill-mask: Filling in the blanks in a given text, predicting missing words in a sentence
'''

unmasker = pipeline("fill-mask")


'''
8. Feature extraction: Extracting hidden states or features from text.
'''

feature_extractor = pipeline("feature-extraction")


'''
9. Sentence similarity: Measuring the similarity between two sentences
'''

similarity = pipeline("sentence-similarity")



#-------------------------------------------------------#
#                COMPUTER VISION TASKS                  #
#-------------------------------------------------------#

'''
1. Image classification: Classifying the main content of an image
'''

image_classifier = pipeline("image-classification")


'''
2. Object detection: Identifying and localizing objects in an image
'''

object_detector = pipeline("object-detection")


'''
3. Image segmentation: Assigning a label to each pixel in an image
'''

image_segmenter = pipeline("image-segmentation")


'''
4. Image generation: Generating new images based on a given prompt (Using DALL-E or similar models)
'''

image_generator = pipeline("image-generation")


#-------------------------------------------------------#
#                SPEECH PROCESSING TASKS                #
#-------------------------------------------------------#

'''
1. Automatic speech recognition(ASR): Converting spoken language into text
'''
speech_recognizer = pipeline("automatic-speech-recognition")


'''
2. Text-to-speech(TTS): Converting text into spoken language
'''

text_to_speech = pipeline("text-to-speech")

'''
3. Speech translation: Converting speech from one language to another
'''

speech_translator = pipeline("speech-translation")


'''
4. Audio classification: Classifying the main content of an audio file
'''

audio_classifier = pipeline("audio-classification")


'''
5. Audio transcription: Converting spoken language into text
'''
audio_recognizer = pipeline("audio-transcription")


#-------------------------------------------------------#
#                MULTIMODAL TASKS                       #
#-------------------------------------------------------#

'''
1. Image captioning: Generating a descriptive caption for an image
'''

image_captioner = pipeline("image-to-text")


'''
2. Visual question answering (VQA) : Answering a question about an image
'''

visual_question_answerer = pipeline("visual-question-answering")


#-------------------------------------------------------#
#                OTHER TASKS                            #
#-------------------------------------------------------#

'''
1. Table question answering: Answering a question about a table
'''

table_question_answerer = pipeline("table-question-answering")


'''
2. Document question answering: Extracting answers from documents like PDF
'''

document_question_answerer = pipeline("document-question-answering")


'''
3. Time series forecasting: Predicting future values of a time series
'''

time_series_forecaster = pipeline("time-series-forecasting")


'''
4. Anomaly detection: Identifying unusual patterns in a dataset
'''

anomaly_detector = pipeline("anomaly-detection")
'''


# **NLP TASKS**

## **Sentiment analysis**

In [1]:
from transformers import pipeline

In [2]:
classifier = pipeline("sentiment-analysis")

result = classifier("I hate you")[0]
print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


label: NEGATIVE, with score: 0.9991


In [3]:
pipeline(task = 'sentiment-analysis')('I was very confused with the new batman movie')

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'NEGATIVE', 'score': 0.9988514184951782}]

In [4]:
pipeline(task = 'sentiment-analysis', model = 'facebook/bart-large-mnli')\
                            ('I was very confused with the new batman movie')

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


[{'label': 'neutral', 'score': 0.9624118804931641}]

## **Batch Sentiment Analysis**

In [6]:
classifier = pipeline('sentiment-analysis')

task_list = ['I really like autoencoders, best model for anomaly detection', \
             'I am not sure if we can actually evaluate LLMS.', \
             'PassiveAggressive is the name of linear regression model that so many people dont know.' , \
             'I hate long meetings.']

classifier(task_list)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9984299540519714},
 {'label': 'NEGATIVE', 'score': 0.9995476603507996},
 {'label': 'NEGATIVE', 'score': 0.997748076915741},
 {'label': 'NEGATIVE', 'score': 0.9969879984855652}]

In [8]:
classifier = pipeline('sentiment-analysis', model='SamLowe/roberta-base-go_emotions')

task_list = ['I really like autoencoders, best model for anomaly detection', \
             'I am not sure if we can actually evaluate LLMS.', \
             'PassiveAggressive is the name of linear regression model that so many people dont know.' , \
             'I hate long meetings.']

classifier(task_list)

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cpu
  return forward_call(*args, **kwargs)


[{'label': 'admiration', 'score': 0.8294069170951843},
 {'label': 'confusion', 'score': 0.8987987041473389},
 {'label': 'neutral', 'score': 0.7055688500404358},
 {'label': 'anger', 'score': 0.772042453289032}]

## **Text Generation**

In [10]:
from transformers import pipeline

text_generator = pipeline('text-generation' , model = 'distilbert/distilgpt2')
generated_text = text_generator('Today is a rainy day in London',
                                truncation = True,
                                num_return_sequences=2)
print('Generated text:\n', generated_text[0]['generated_text'])

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text:
 Today is a rainy day in London.


The day after the storm came to an end, the BBC has been reporting that the rain was expected to continue until late Wednesday morning.
The storm, which will be expected to be the heaviest ever recorded in London, was predicted to have hit about 40,000 homes, according to the BBC.
The BBC is reporting that the storm was expected to reach its peak on Thursday night, giving it a 10% chance of hitting the UK on Thursday.
The day after the storm came to an end, the BBC says that the BBC will now report on the storm.
The BBC's meteorologist John Goss, who has been in charge of the BBC, says that the storm had also been "a result of a very strong breeze" and that "the cold wind was mostly quite strong."
"The storm has a very strong wind around the back of the city and the storm has a very strong wind over the city, which will be very strong for the whole of London," he told BBC News.


## **Question Answering**

In [12]:
from transformers import pipeline

qa_model = pipeline('question-answering')
question = 'What is my job?'
context = 'I am developing AI models with Python'
answer = qa_model(question=question , context = context)
answer

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


{'score': 0.8267208337783813,
 'start': 5,
 'end': 25,
 'answer': 'developing AI models'}