<a href="https://colab.research.google.com/github/quantumhome/DataAnalysisCaseStudy/blob/master/Silver_Assignment_2_Compare_Audio_Files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***Assignment 2*** -
**Write a python program to find similar audio files to the one you upload. Store many audio files and compare the one you upload with the store one.
using torchaudio famework or any other appropriate framwork.**

In [32]:
import torch
import librosa
import numpy as np
from transformers import AutoProcessor, ClapModel
from sklearn.metrics.pairwise import cosine_similarity
import os
import torchaudio
import torchaudio.transforms as T

In [33]:
!pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install soundfile

Looking in indexes: https://download.pytorch.org/whl/cpu


In [34]:
class AudioSimilarityFinder:

    def __init__(self, model_id="laion/clap-htsat-unfused"):
        print(f"Loading model: {model_id}...")
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.processor = AutoProcessor.from_pretrained(model_id)
        self.model = ClapModel.from_pretrained(model_id).to(self.device)
        self.database = {} # Dictionary to store {filename: embedding}

    def _load_and_preprocess(self, file_path):
          """Loads audio using torchaudio and resamples to 48kHz (required by CLAP)."""
          waveform, sample_rate = torchaudio.load(file_path)

          # Convert to mono if stereo
          if waveform.shape[0] > 1:
              waveform = torch.mean(waveform, dim=0, keepdim=True)

          # Resample to 48000Hz (CLAP's expected frequency)
          if sample_rate != 48000:
              resampler = T.Resample(sample_rate, 48000)
              waveform = resampler(waveform)

          return waveform.squeeze().numpy()

    def add_to_library(self, folder_path):
        """Indexes all audio files in a local folder."""
        print(f"Indexing library in {folder_path}...")
        for file in os.listdir(folder_path):
            if file.endswith(('.wav', '.mp3', '.flac')):
                full_path = os.path.join(folder_path, file)
                try:
                    audio_np = self._load_and_preprocess(full_path)
                    inputs = self.processor(audios=audio_np, return_tensors="pt", sampling_rate=48000).to(self.device)

                    with torch.no_grad():
                        embedding = self.model.get_audio_features(**inputs)
                        self.database[file] = embedding.cpu().numpy()
                except Exception as e:
                    print(f"Skipping {file}: {e}")
        print(f"Successfully indexed {len(self.database)} files.")

    def find_matches(self, query_file_path, top_k=3):
        """Finds the most similar files in the database."""
        query_audio = self._load_and_preprocess(query_file_path)
        inputs = self.processor(audios=query_audio, return_tensors="pt", sampling_rate=48000).to(self.device)

        with torch.no_grad():
            query_embedding = self.model.get_audio_features(**inputs).cpu().numpy()

        results = []
        for filename, stored_embedding in self.database.items():
            # Calculate Cosine Similarity
            score = cosine_similarity(query_embedding, stored_embedding)[0][0]
            results.append((filename, score))

        # Sort by similarity score (descending)
        results.sort(key=lambda x: x[1], reverse=True)
        return results[:top_k]

In [40]:
# Calling the audio files from the folder
if __name__ == "__main__":
    engine = AudioSimilarityFinder()

    # 1. Store your audio files it should be your local folder where you can save your audio music files.
    engine.add_to_library("/content/sample_data/Music")

    # 2. Upload/Provide a query file to find similarities
    matches = engine.find_matches("/content/sample_data/Search/AnimalSongSearch.mp3")

    for name, score in matches:
        print(f"Match: {name} (Confidence: {score:.4f})")

Loading model: laion/clap-htsat-unfused...
Indexing library in /content/sample_data/Music...


  inputs = self.processor(audios=audio_np, return_tensors="pt", sampling_rate=48000).to(self.device)


Successfully indexed 12 files.


  inputs = self.processor(audios=query_audio, return_tensors="pt", sampling_rate=48000).to(self.device)


Match: kaise-bhula-dun-ashir-acoustic-hindi-new-sad-song-250585.mp3 (Confidence: 0.7308)
Match: Papa Meri Jaan Ringtone - Sonu Nigam Ringtone [128 Kbps]-(SongsPk.com.se).mp3 (Confidence: 0.6674)
Match: christmas-christmas-music-453991.mp3 (Confidence: 0.6136)
