In [24]:
# This file contains functions for:
# song fingerprinting (finding peaks)
# fingerprint comparision (comparing peaks to those looked up in the database_
# determining the best match for a song sample
# determining whether the best match is sufficient to identify the song
import numpy as np
import itertools
import collections
# the imports below could be removed if you didn't wanna visualize things!
from scipy.ndimage.filters import maximum_filter
from scipy.ndimage.morphology import generate_binary_structure, binary_erosion
from scipy.ndimage.morphology import iterate_structure

In [109]:
def find_peaks(song):
    """
    Find the peaks in the two-dimensional array that describes a song
    Parameters:
    ----------
    song: numpy.ndarray (MxN)
        the two dimensional array of Fourier-constants describing the song
        song[i,j] is the magnitude of the Fourier-constant for frequency i at time j
    Returns:
    --------
    peaks: binary array (MxN)
        the binaray "mask" that identifies the locations of peaks
        peaks[i,j] is True if there is a local peak for frequency i at time j 
    """
    #generates proper neighborhood
    struct = generate_binary_structure(2, 1)
    neighborhood = iterate_structure(struct, 25)  # this incorporates roughly 20 nearest neighbors
    #finds foreground
    ys, xs = np.histogram(song.flatten(), bins=len(freqs)//2, normed=True)
    dx = xs[-1] - xs[-2]
    cdf = np.cumsum(ys)*dx  # this gives you the cumulative distribution of amplitudes
    cutoff = xs[np.searchsorted(cdf, 0.77)]
    foreground =  (song >= cutoff)
    #generates boolean array of peaks that are both peaks and in the foreground
    peaks = np.logical_and((song == maximum_filter(song, footprint=neighborhood)), foreground)
    return peaks

In [110]:
def find_fingerprint(peaks, freqs, times):
    """
    Find the features (which are each a tuple of two peaks and the distance between them) of a song based on its peaks
    Parameters:
    ----------
    peaks: binary array (MxN)
        the binary "mask" that identifies the locations of peaks
        peaks[i,j] is True if there is a local peak for frequency i at time j
    freqs: float array (MxN)
        the array in which freqs[k] is the real value of the frequency value in bin k
    times: float array (MxN)
        the array in which time[k] is the real value of the time value in bin k
    Returns:
    --------
    song_fp: list of tuples (arbitrary length, all peaks in the song)
        the list of of tuples tuples of length three, each containing with two peaks and the distance between the two peaks
        of the form ((f1,f2,delta t), t1)
    """
    song_fp_t = []
    indices = np.argwhere(peaks == True)[::-1]
    comparisons = itertools.combinations(indices, 2)
    threshold = 15
    filtered = itertools.filterfalse(lambda x: abs(x[1][1]- x[1][0]) > threshold, comparisons)
    for (f1, t1), (f2,t2) in filtered:
        song_fp_t.append(tuple([tuple([round(freqs[f1],2), round(freqs[f2],2), round(abs(times[t2] - times[t1]),2)]), round(times[t1],2)]))
    print(len(song_fp_t))
    return song_fp_t

In [36]:
def get_matches(sample_fp_t, db):
    """
    Find the features (which are each a tuple of two peaks and the distance between them) of a song based on its peaks
    Parameters:
    ----------
    sample_fp: list of tuples (arbitrary length, all peaks in the sample)
        the list of tuples of length three, each containing with two peaks and the distance between the two peaks 
    db: dictionary
        the dictionary with features as keys and song names as values
    Returns:
    --------
    matches: list of tuples of song ids and time differences 
        the list of song ids in the database that share features with the supplied sample
        and the amount of time between the feature occuring in the sample and in the 
    """
    matches = []
    for feature, time in sample_fp_t:
        if feautre in db:
            match = db.get(feature)
            matches += tuple(match[0], round(match[1] - time)) 
    return matches

In [85]:
def best_match(matches):
    """
    Find the features (which are each a tuple of two peaks and the distance between them) of a song based on its peaks
    Parameters:
    ----------
    matches: list of song names
        the list of song names in the database that share features with the supplied sample    
    Returns:
    --------
    best_match: song name
        the song name that occurs the most frequently in the list
    """
    c = collections.Counter(x[0] for x in matches)
    best_matches = c.most_common(2)
    threshold = 20
    if round(c.get(best_matches[0]) - c.get(best_matches[1])) < threshold:
        return "Not found"
    return best_matches[0]

In [87]:
#testing
import librosa
samples_42, fs = librosa.load(r"C:\Users\Jaden\Dropbox\COG\Songs\42-_Coldplay_.mp3", sr=44100, mono=True)

import matplotlib.pyplot as plt
%matplotlib notebook


fig, ax = plt.subplots()
S, freqs, times, im = ax.specgram(samples_42, NFFT=4096, Fs=fs, window=mlab.window_hanning, noverlap=(4096 // 2))

<IPython.core.display.Javascript object>

In [111]:
v = find_fingerprint(find_peaks(S), freqs, times)
v

50931


[((22050.0, 21996.169999999998, 142.66), 237.31),
 ((22050.0, 21748.540000000001, 143.59), 237.31),
 ((22050.0, 21274.799999999999, 144.80000000000001), 237.31),
 ((22050.0, 20833.369999999999, 146.94), 237.31),
 ((22050.0, 20542.68, 149.25999999999999), 237.31),
 ((22050.0, 20122.779999999999, 150.0), 237.31),
 ((22050.0, 19918.209999999999, 151.94999999999999), 237.31),
 ((22050.0, 19186.080000000002, 154.46000000000001), 237.31),
 ((22050.0, 18615.450000000001, 156.55000000000001), 237.31),
 ((22050.0, 17463.43, 162.08000000000001), 237.31),
 ((22050.0, 17086.599999999999, 163.61000000000001), 237.31),
 ((22050.0, 13953.52, 176.75), 237.31),
 ((22050.0, 13318.290000000001, 179.91), 237.31),
 ((22050.0, 12790.719999999999, 182.50999999999999), 237.31),
 ((22050.0, 12069.360000000001, 184.78), 237.31),
 ((22050.0, 11767.9, 186.91999999999999), 237.31),
 ((22050.0, 9646.8799999999992, 195.93000000000001), 237.31),
 ((22050.0, 8667.1100000000006, 200.47999999999999), 237.31),
 ((22050.0

In [10]:
fp = generate_binary_structure(rank=2,connectivity=1)
fp = iterate_structure(fp, 10)

In [90]:
fig, ax = plt.subplots()
ax.imshow(find_peaks(S))

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x12c924a7828>

In [112]:
len(v)

50931