# Humming search
# Author: Alex Salgado
## Obs: Code from blog 'Searching by Music: Leveraging Vector Search for Music Information Retrieval'

In [1]:
!git clone https://github.com/salgado/music-search.git

Cloning into 'music-search'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 28 (delta 2), reused 21 (delta 0), pack-reused 0[K
Receiving objects: 100% (28/28), 8.37 MiB | 3.26 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [24]:
!pip install elasticsearch



In [25]:
#index data in elasticsearch
from elasticsearch import Elasticsearch
import getpass

es_cloud_id = getpass.getpass('Enter Elastic Cloud ID:  ')
es_user = getpass.getpass('Enter cluster username:  ')
es_pass = getpass.getpass('Enter cluster password:  ')
es = Elasticsearch(cloud_id=es_cloud_id,
                   basic_auth=(es_user, es_pass)
                   )
es.info() # should return cluster info

ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'df2380a9e6b0425f9d4bc01639e59cf5', 'cluster_uuid': 'FvCTlZHYQqasErU8cbn4_A', 'version': {'number': '8.8.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'f8edfccba429b6477927a7c1ce1bc6729521305e', 'build_date': '2023-06-05T21:32:25.188464208Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

## create index

In [2]:
index_name = "my-audio-index"


In [36]:
from elasticsearch import Elasticsearch


# Specify index configuration
index_config = {
  "mappings": {
    "properties": {
      "audio-embedding": {
        "type": "dense_vector",
        "dims": 2048,
        "index": True,
        "similarity": "cosine"
      },
      "path": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "timestamp": {
        "type": "date"
      },
      "title": {
        "type": "text"
      },
      "genre": {
        "type": "text"
      }
    }
  }
}

# Create index
if not es.indices.exists(index=index_name):
    index_creation = es.indices.create(index=index_name, ignore=400, body=index_config)
    print("index created: ", index_creation)
else:
    print("Index  already exists.")


index created:  {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my-audio-index'}


  index_creation = es.indices.create(index=index_name, ignore=400, body=index_config)
  index_creation = es.indices.create(index=index_name, ignore=400, body=index_config)


In [37]:
import os

def list_audio_files(directory):
    # The list to store the names of .wav files
    audio_files = []

    # Check if the path exists
    if os.path.exists(directory):
        # Walk the directory
        for root, dirs, files in os.walk(directory):
            for file in files:
                # Check if the file is a .wav file
                if file.endswith('.wav'):
                    # Extract the filename from the path
                    filename = os.path.splitext(file)[0]
                    print(filename)

                    # Add the file to the list
                    audio_files.append(file)
    else:
        print(f"The directory '{directory}' does not exist.")

    # Return the list of .mp3 files
    return audio_files

# Use the function
audio_path = "/content/music-search/dataset/"
audio_files = list_audio_files(audio_path)




bella_ciao_humming
bella_ciao_a-cappella-chorus
bella_ciao_electronic-synth-lead
bella_ciao_guitar-solo
bella_ciao_jazz-with-saxophone
bella_ciao_opera-singer
bella_ciao_piano-solo
bella_ciao_string-quartet
bella_ciao_tribal-drums-and-flute
mozart_symphony25_prompt
a-cappella-chorus
mozart_symphony25_electronic-synth-lead
mozart_symphony25_guitar-solo
mozart_symphony25_jazz-with-saxophone
mozart_symphony25_opera-singer
mozart_symphony25_piano-solo
mozart_symphony25_string-quartet
mozart_symphony25_tribal-drums-and-flute


In [38]:
!pip install -qU panns-inference librosa

In [39]:
from panns_inference import AudioTagging

# load the default model into the gpu.
model = AudioTagging(checkpoint_path=None, device='cuda') # change device to cpu if a gpu is not available

Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth
Using CPU.


In [40]:
import numpy as np
import librosa

# Function to normalize a vector. Normalizing a vector means adjusting the values measured in different scales to a common scale.
def normalize(v):
   # np.linalg.norm computes the vector's norm (magnitude). The norm is the total length of all vectors in a space.
   norm = np.linalg.norm(v)
   if norm == 0:
        return v

   # Return the normalized vector.
   return v / norm

# Function to get an embedding of an audio file. An embedding is a reduced-dimensionality representation of the file.
def get_embedding (audio_file):

  # Load the audio file using librosa's load function, which returns an audio time series and its corresponding sample rate.
  a, _ = librosa.load(audio_file, sr=44100)

  # Reshape the audio time series to have an extra dimension, which is required by the model's inference function.
  query_audio = a[None, :]

  # Perform inference on the reshaped audio using the model. This returns an embedding of the audio.
  _, emb = model.inference(query_audio)

  # Normalize the embedding. This scales the embedding to have a length (magnitude) of 1, while maintaining its direction.
  normalized_v = normalize(emb[0])

  # Return the normalized embedding required for dot_product elastic similarity dense vector
  return normalized_v

In [41]:
from datetime import datetime

#Storing Songs in Elasticsearch with Vector Embeddings:
def store_in_elasticsearch(song, embedding, path, index_name, genre, vec_field):
  body = {
      'audio-embedding' : embedding,
      'title': song,
      'timestamp': datetime.now(),
      'path' : path,
      'genre' : genre

  }

  es.index(index=index_name, document=body)
  print ("stored...",song, embedding, path, genre, index_name)



In [42]:

# Initialize a list genre for test
genre_lst = ['jazz', 'opera', 'piano','prompt', 'humming', 'string', 'capella', 'eletronic', 'guitar']

for filename in audio_files:
  audio_file = audio_path + filename

  emb = get_embedding(audio_file)

  song = filename.lower()

  # Compare if genre list exists inside the song
  genre = next((g for g in genre_lst if g in song), "generic")

  store_in_elasticsearch(song, emb, audio_file, index_name, genre, 2 )


stored... bella_ciao_humming.wav [0.         0.01029461 0.         ... 0.07024138 0.00545542 0.        ] /content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_humming.wav humming my-audio-index
stored... bella_ciao_a-cappella-chorus.wav [0.         0.03191019 0.         ... 0.03001107 0.0001489  0.        ] /content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_a-cappella-chorus.wav generic my-audio-index
stored... bella_ciao_electronic-synth-lead.wav [0.         0.         0.         ... 0.02660546 0.03412616 0.        ] /content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_electronic-synth-lead.wav generic my-audio-index
stored... bella_ciao_guitar-solo.wav [0.         0.02219189 0.         ... 0.00023983 0.         0.        ] /content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_guitar-solo.wav guitar my-audio-index
stored... bella_ciao_jazz-with-saxophone.wav [0.         0.00063777 0.         ... 0.00766881 0.00809751 0.        ] /content/drive/MyDrive/@Blog

In [43]:
# Define a function to query audio vector in Elasticsearch
def query_audio_vector(es, emb, field_key, index_name):
    # Initialize the query structure
    # It's a bool filter query that checks if the field exists
    query = {
        "bool": {
            "filter": [{
                "exists": {
                    "field": field_key
                }
            }]
        }
    }

    # KNN search parameters
    # field is the name of the field to perform the search on
    # k is the number of nearest neighbors to find
    # num_candidates is the number of candidates to consider (more means slower but potentially more accurate results)
    # query_vector is the vector to find nearest neighbors for
    # boost is the multiplier for scores (higher means this match is considered more important)
    knn = {
        "field": field_key,
        "k": 2,
        "num_candidates": 100,
        "query_vector": emb,
        "boost": 100
    }

    # The fields to retrieve from the matching documents
    fields = ["title", "path", "genre", "body_content", "url"]

    # The name of the index to search
    index = index_name

    # Perform the search
    # index is the name of the index to search
    # query is the query to use to find matching documents
    # knn is the parameters for KNN search
    # fields is the fields to retrieve from the matching documents
    # size is the maximum number of matches to return
    # source is whether to include the source document in the results
    resp = es.search(index=index,
                     query=query,
                     knn=knn,
                     fields=fields,
                     size=5,
                     source=False)

    # Return the search results
    return resp


In [44]:
# Import necessary modules for audio display from IPython
from IPython.display import Audio, display

# Provide the URL of the audio file
my_audio = "/content/music-search/dataset/bella_ciao_humming.wav"

# Display the audio file in the notebook
Audio(my_audio)


In [45]:
audio_file = "/content/music-search/dataset/bella_ciao_humming.wav"
# Generate the embedding vector from the provided audio file
# 'get_embedding' is a function that presumably converts the audio file into a numerical vector
emb = get_embedding(audio_file)

# Query the Elasticsearch instance 'es' with the embedding vector 'emb', field key 'audio-embedding',
# and index name 'my-audio-index'
# 'query_audio_vector' is a function that performs a search in Elasticsearch using a vector embedding.
# 'tolist()' method is used to convert numpy array to python list if 'emb' is a numpy array.
resp = query_audio_vector (es, emb.tolist(), "audio-embedding","my-audio-index")


In [46]:
resp['hits']


{'total': {'value': 18, 'relation': 'eq'},
 'max_score': 100.0,
 'hits': [{'_index': 'my-audio-index',
   '_id': 'tt44nokBwzxpWbqUfVwN',
   '_score': 100.0,
   'fields': {'path': ['/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_humming.wav'],
    'genre': ['humming'],
    'title': ['bella_ciao_humming.wav']}},
  {'_index': 'my-audio-index',
   '_id': 'u944nokBwzxpWbqUj1zy',
   '_score': 86.1148,
   'fields': {'path': ['/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_opera-singer.wav'],
    'genre': ['opera'],
    'title': ['bella_ciao_opera-singer.wav']}},
  {'_index': 'my-audio-index',
   '_id': 'vt44nokBwzxpWbqUm1xK',
   '_score': 0.0,
   'fields': {'path': ['/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_tribal-drums-and-flute.wav'],
    'genre': ['generic'],
    'title': ['bella_ciao_tribal-drums-and-flute.wav']}},
  {'_index': 'my-audio-index',
   '_id': 'uN44nokBwzxpWbqUhFye',
   '_score': 0.0,
   'fields': {'path': ['/content/drive/MyDrive/@

In [47]:
NUM_MUSIC = 5  # example value

for i in range(NUM_MUSIC):
    path = resp['hits']['hits'][i]['fields']['path'][0]
    print(path)

/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_humming.wav
/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_opera-singer.wav
/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_tribal-drums-and-flute.wav
/content/drive/MyDrive/@Blogs/MusicSearch/audios/bella_ciao_electronic-synth-lead.wav
/content/drive/MyDrive/@Blogs/MusicSearch/audios/a-cappella-chorus.wav


In [49]:
Audio("/content/music-search/dataset/bella_ciao_opera-singer.wav")

In [50]:
Audio("/content/music-search/dataset/bella_ciao_tribal-drums-and-flute.wav")