# Audio Classification

# Libraries

Balancing, torch, torchaudio, and transformers can be tricky! Here are the versions used for this notebook:

## Library and Versions

In [None]:
import torch, transformers, torchaudio
print("These are the versions used for this notebook, but watch the lecture for an important note on this")
print(torch.__version__)
print(torchaudio.__version__)
print(transformers.__version__)


In [None]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification

In [None]:
%%time
feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")

In [None]:
import librosa
audio_path = 'example.mp3'
y, sr = librosa.load(audio_path, sr=None)

## Sampling Rate Issues

Recall that most ML models are trained on 16 kHz sampling rate, you will run into issues if you try to force your own sampling rate:

In [None]:
# ERROR!
#result = feature_extractor(y,sampling_rate=sr)

In [None]:
result = feature_extractor(y, return_tensors="pt")

In [None]:
result

In [None]:
model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")

In [None]:
prediction_logits = model(result['input_values']).logits

In [None]:
#prediction_logits

In [None]:
predicted_class_ids = torch.argmax(prediction_logits, dim=-1).item()

In [None]:
predicted_label = model.config.id2label[predicted_class_ids]

In [None]:
predicted_label

In [None]:
#model.config.id2label

## Pipeline for Audio Classification

In [None]:
%%time
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")

In [None]:
pipe.model

In [None]:
pipe('example.mp3')

In [None]:
len(pipe.model.config.id2label)