In [1]:
%pip install -qU pinecone-client panns-inference datasets librosa python-dotenv

[33m  DEPRECATION: audioread is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset

# load the dataset from huggingface model hub
data = load_dataset("ashraq/esc50", split="train")
data

  from .autonotebook import tqdm as notebook_tqdm
Downloading readme: 100%|██████████| 345/345 [00:00<00:00, 1.05MB/s]
Repo card metadata block was not found. Setting CardData to empty.
Downloading metadata: 100%|██████████| 1.61k/1.61k [00:00<00:00, 5.42MB/s]
Downloading data: 100%|██████████| 387M/387M [01:22<00:00, 4.68MB/s]
Downloading data: 100%|██████████| 387M/387M [01:25<00:00, 4.55MB/s]
Downloading data files: 100%|██████████| 1/1 [02:47<00:00, 167.59s/it]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 235.32it/s]
Generating train split: 100%|██████████| 2000/2000 [00:05<00:00, 377.42 examples/s]


Dataset({
    features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],
    num_rows: 2000
})

In [None]:
# select the audio feature and display top three
audios = data["audio"]
audios[:3]

In [None]:
import numpy as np

# select only the audio data from the dataset and store in a numpy array
audios = np.array([a["array"] for a in data["audio"]])

In [None]:
from panns_inference import AudioTagging

# load the default model into the gpu.
model = AudioTagging(checkpoint_path=None, device='cuda') # change device to cpu if a gpu is not available

In [None]:
import pinecone
import os
from dotenv import load_dotenv

# load environment variables
load_dotenv()

# connect to pinecone environment
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),
    environment=os.getenv("PINECONE_ENVIRONMENT")
)

In [None]:
index_name = "audio-search-demo"

# check if the audio-search index exists
if index_name not in pinecone.list_indexes():
    # create the index if it does not exist
    pinecone.create_index(
        index_name,
        dimension=2048,
        metric="cosine"
    )

# connect to audio-search index we created
index = pinecone.Index(index_name)

In [None]:
from tqdm.auto import tqdm

# we will use batches of 64
batch_size = 64

for i in tqdm(range(0, len(audios), batch_size)):
    # find end of batch
    i_end = min(i+batch_size, len(audios))
    # extract batch
    batch = audios[i:i_end]
    # generate embeddings for all the audios in the batch
    _, emb = model.inference(batch)
    # create unique IDs
    ids = [f"{idx}" for idx in range(i, i_end)]
    # add all to upsert list
    to_upsert = list(zip(ids, emb.tolist()))
    # upsert/insert these records to pinecone
    _ = index.upsert(vectors=to_upsert)

# check that we have all vectors in index
index.describe_index_stats()

In [None]:
from IPython.display import Audio, display

# we set an audio number to select from the dataset
audio_num = 400
# get the audio data of the audio number
query_audio = data[audio_num]["audio"]["array"]
# get the category of the audio number
category = data[audio_num]["category"]
# print the category and play the audio
print("Query Audio:", category)
Audio(query_audio, rate=44100)