In [1]:
#The main package that contains functions to use Hugging Face
import transformers

#Set to avoid warning messages.
transformers.logging.set_verbosity_error()

  from .autonotebook import tqdm as notebook_tqdm


## 05.02. Reviewing the Pipeline

Use the pipeline registry to look at available pipeline tasks and also explore a specific pipeline

In [2]:
from transformers.pipelines import PIPELINE_REGISTRY

#Get the list of tasks that are supported by Huggingface pipeline
print(PIPELINE_REGISTRY.get_supported_tasks())


['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-text', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection']


In [3]:
#Get information about a specific task
print("\nDefault Model for Sentiment Analysis: ")
print(PIPELINE_REGISTRY.check_task('sentiment-analysis')[1].get('default'))


Default Model for Sentiment Analysis: 
{'model': {'pt': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b'), 'tf': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b')}}


## 05.03 Loading a Pipeline

In [4]:
from transformers import pipeline
import os

#Load a pipeline. This will download the model checkpoint from huggingface and cache it 
#locally on disk. If model is already available in cache, it will simply use the cached version
#Download will usually take a long time, depending on network bandwidth

sentiment_classifier = pipeline("sentiment-analysis")

#Cache usually available at : <<user-home>>.cache\huggingface\hub

cache_dir = os.path.expanduser('~') + "/.cache/huggingface/hub"
print("Huggingface Cache directory is : ", cache_dir)

#Contents of cache directory
os.listdir(cache_dir)

Downloading (…)lve/main/config.json: 100%|█████████████████████████████████████████████| 629/629 [00:00<00:00, 315kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading model.safetensors: 100%|████████████████████████████████████████████████| 268M/268M [01:14<00:00, 3.57MB/s]
Downloading (…)okenizer_config.json: 100%|██████████████████████████████████████████| 48.0/48.0 [00:00<00:00, 7.99kB/s]
Downloading (…)solve/main/vocab.txt: 100%|███████████████████████████████████████████| 232k/232k [00:02<00:00, 103kB/s]


Huggingface Cache directory is :  C:\Users\hp/.cache/huggingface/hub


['models--distilbert-base-uncased-finetuned-sst-2-english', 'version.txt']

## 05.04. Predicting Sentiment with Default Model

In [5]:
#Predict sentiment using the pipeline
sentiment_results=sentiment_classifier("This is a great course")
print(sentiment_results)


[{'label': 'POSITIVE', 'score': 0.9998713731765747}]


In [6]:
#A second example
sentiment_results=sentiment_classifier("The download speed is really bad")
print(sentiment_results)

[{'label': 'NEGATIVE', 'score': 0.9998070597648621}]


## 05.05. Using a custom Model

In [None]:
sentiment_classifier = pipeline(task="sentiment-analysis",
                                model="finiteautomata/bertweet-base-sentiment-analysis")

sentiment_result=sentiment_classifier("This is a great course")

print(sentiment_result)

#Contents of cache directory
os.listdir(cache_dir)

Downloading (…)lve/main/config.json: 100%|█████████████████████████████████████████████| 949/949 [00:00<00:00, 318kB/s]
Downloading pytorch_model.bin:  16%|███████▎                                       | 83.9M/540M [00:55<05:01, 1.51MB/s]
Downloading tf_model.h5:   0%|                                                              | 0.00/540M [00:00<?, ?B/s]