In [2]:
import os
import librosa
import numpy as np
import pickle
import hashlib
from scipy.ndimage import maximum_filter, binary_erosion, generate_binary_structure

In [8]:
# Parameters
FAN_VALUE = 15
MAX_TIME_DELTA = 200
DB_FILE = 'fingerprints.pkl'

def fingerprint(audio, sr):
    S = np.abs(librosa.stft(audio))
    S_db = librosa.amplitude_to_db(S, ref=np.max)

    struct = generate_binary_structure(2, 1)
    local_max = maximum_filter(S_db, footprint=struct) == S_db
    background = (S_db == 0)
    detected_peaks = local_max ^ binary_erosion(background)

    peaks = np.argwhere(detected_peaks)
    hashes = []

    for i in range(len(peaks)):
        for j in range(1, FAN_VALUE):
            if i + j < len(peaks):
                freq1, time1 = peaks[i]
                freq2, time2 = peaks[i + j]
                delta_t = time2 - time1
                if 0 < delta_t <= MAX_TIME_DELTA:
                    hash_input = f"{freq1}|{freq2}|{delta_t}"
                    hash_val = hashlib.sha1(hash_input.encode()).hexdigest()[:20]
                    hashes.append((hash_val, time1))
    return hashes

def build_database(folder_path, db_file=DB_FILE):
    database = {}
    for fname in os.listdir(folder_path):
        if fname.lower().endswith(('.mp3', '.wav', '.flac')):
            path = os.path.join(folder_path, fname)
            print(f"Processing: {fname}")
            audio, sr = librosa.load(path, sr=None, mono=True, duration=20.0)
            hashes = fingerprint(audio, sr)
            database[fname] = hashes

    with open(db_file, 'wb') as f:
        pickle.dump(database, f)
    return database

def load_database(db_file=DB_FILE):
    if os.path.exists(db_file):
        with open(db_file, 'rb') as f:
            return pickle.load(f)
    return {}

def match_sample(sample_hashes, database):
    match_counts = {}
    for song, hashes in database.items():
        hash_set = set(h[0] for h in hashes)
        match_count = sum(1 for h, _ in sample_hashes if h in hash_set)
        match_counts[song] = match_count

    if match_counts:
        best_match = max(match_counts, key=match_counts.get)
        return best_match, match_counts[best_match]
    return None, 0


In [9]:
build_database("temp", "fingerprints.pkl") 

Processing: 01. Sparkle.mp3


{'01. Sparkle.mp3': [('cde7513eae496de544fd', np.int64(3)),
  ('3694746eebc8702d6640', np.int64(3)),
  ('17d78d00cd43240abfd5', np.int64(3)),
  ('ba598f1265790ce7911e', np.int64(3)),
  ('ec0f784fb11df14882b1', np.int64(3)),
  ('59ae50c7098cf4d18097', np.int64(3)),
  ('a010ec50b9b201bd7bfe', np.int64(3)),
  ('fa72c1557c480437b62a', np.int64(3)),
  ('2c1b2c33178723b47d65', np.int64(3)),
  ('d2153cc6921cd5973029', np.int64(3)),
  ('36abedcd89af0cd1ce74', np.int64(3)),
  ('33b62df9f25cf1a7c048', np.int64(3)),
  ('ba23c04a93eba6c9b7fa', np.int64(3)),
  ('9a475df6593249e899b7', np.int64(3)),
  ('cde7513eae496de544fd', np.int64(7)),
  ('9eb3a8fd565e1683ac34', np.int64(7)),
  ('3829cb8a930e92e27e46', np.int64(7)),
  ('ba598f1265790ce7911e', np.int64(7)),
  ('ec0f784fb11df14882b1', np.int64(7)),
  ('59ae50c7098cf4d18097', np.int64(7)),
  ('72172c9e84431d0bb3f3', np.int64(7)),
  ('5b6c2c8677c88d08c6e8', np.int64(7)),
  ('1d5337670673f9e56e8f', np.int64(7)),
  ('e56a3d1a60331f1a54f6', np.int64(7)