In [1]:
!pip install transformers datasets
!pip install librosa



In [2]:
import torch
from transformers import pipeline
from datasets import load_dataset, Audio
from transformers import AutoTokenizer, AutoModelForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm
2023-11-22 16:59:35.808212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Text classification (sentiment analysis)

In [3]:
# The pipeline() is the easiest and fastest way to use a pretrained model for inference
# One can use the pipeline() out-of-the-box for many tasks across different modalities
classifier = pipeline("sentiment-analysis")
results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.5309


# Speech recognition

In [4]:
speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")

# need to make sure the sampling rate of the dataset matches the sampling rate \ 
# facebook/wav2vec2-base-960h was trained on
dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))

# audio files are automatically loaded and resampled when calling the "audio" column
# Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline
# NB: For larger datasets where the inputs are big (like in speech or vision), you’ll want to pass a generator \
# instead of a list to load all the inputs in memory
result = speech_recognizer(dataset[:4]["audio"])
print([d["text"] for d in result])

Some weights of the model checkpoint at facebook/wav2vec2-base-960h were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You sho

Ignored unknown kwarg option normalize
Ignored unknown kwarg option normalize
Ignored unknown kwarg option normalize
Ignored unknown kwarg option normalize
['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']


# Use another model from the Hub

In [5]:
# pipeline() can accommodate any model from the Huggingface Hub; just use the tags on the Hub to filter for an appropriate model.
# easy to adapt the pipeline() for other use-cases
# example: find model capable of handling French text for sentiment analysis
# The top filtered result returns a multilingual BERT model finetuned for sentiment analysis 
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
# NB: If you can’t find a model for your use-case, you’ll need to finetune a pretrained model on your data. 
# Please consider sharing the model with the community on the Hub to democratise machine learning for everyone!

config.json: 100%|█████████████████████████████| 953/953 [00:00<00:00, 1.86MB/s]
pytorch_model.bin: 100%|█████████████████████| 669M/669M [00:14<00:00, 47.4MB/s]
tokenizer_config.json: 100%|██████████████████| 39.0/39.0 [00:00<00:00, 382kB/s]
vocab.txt: 100%|█████████████████████████████| 872k/872k [00:00<00:00, 12.1MB/s]
special_tokens_map.json: 100%|██████████████████| 112/112 [00:00<00:00, 159kB/s]


[{'label': '5 stars', 'score': 0.7272651791572571}]