In [3]:
from pathlib import Path
import librosa
import numpy as np
from skimage.feature import peak_local_max
import sys 
from tqdm import tqdm
np.set_printoptions(threshold=sys.maxsize)


In [4]:
sample_rate = 44100

In [5]:
qp = Path("jazz.00005-snippet-10-0.wav")
dp = Path("jazz.00005.wav")

In [6]:
q, sr = librosa.load(qp, sr=sample_rate)
d, sr = librosa.load(dp, sr=sample_rate)

In [7]:
n_fft = 2048
hop_length = 1024

Q = np.abs(librosa.stft(q, n_fft=n_fft, hop_length=hop_length))
D = np.abs(librosa.stft(d, n_fft=n_fft, hop_length=hop_length))

In [8]:
D.shape

(1025, 1293)

In [9]:
D_coords = peak_local_max(np.log(D), min_distance=10,threshold_rel=0.05)
Q_coords = peak_local_max(np.log(Q), min_distance=10,threshold_rel=0.05)

In [10]:
D_coords.shape

(632, 2)

In [11]:
D_coords.shape

(632, 2)

In [12]:
D_coords[D_coords[:, 0].argsort()]

array([[  15,  909],
       [  16,  995],
       [  16, 1187],
       [  16,  246],
       [  16,  155],
       [  18,  187],
       [  18,  265],
       [  19,  317],
       [  19,  446],
       [  19,  287],
       [  19,  333],
       [  20,  468],
       [  20,  538],
       [  20,  412],
       [  20,  561],
       [  20,  480],
       [  20,  434],
       [  20,  516],
       [  21,  573],
       [  22,  589],
       [  22,  674],
       [  22,  693],
       [  22,  660],
       [  22,  731],
       [  23,  838],
       [  23,  788],
       [  23,  815],
       [  23,   88],
       [  23,  302],
       [  23,  859],
       [  24, 1051],
       [  24,  742],
       [  24,  969],
       [  25,  763],
       [  25, 1081],
       [  25,  803],
       [  25,   56],
       [  26, 1115],
       [  26,   11],
       [  26,  940],
       [  27,  635],
       [  28, 1134],
       [  29, 1157],
       [  30,  898],
       [  30,  213],
       [  32,  882],
       [  33, 1093],
       [  33,

In [13]:
def create_inverted_list(coords):
    Ld = {}
    for i in range(len(coords)):
        if coords[i][0] not in Ld:
            Ld[coords[i][0]] = []
        Ld[coords[i][0]].append(coords[i][1])
    return Ld

In [14]:
D_L = create_inverted_list(D_coords)

In [15]:
Q_coords[0]

array([27, 13])

In [16]:
[x - 13 for x in D_L[27]]

[622]

In [17]:
def create_indicator_functions(coords, Ld):
    max_time = 1293
    indicator_functions = []
    for i in tqdm(range(len(coords))):
        # time zero is at -max_time
        indicator_function = np.zeros(max_time * 2 + 1)
        if coords[i][0] in Ld:
            for ts in Ld[coords[i][0]]:
                indicator_function[ts - coords[i][1] + max_time] = 1
        indicator_functions.append(indicator_function)
    return np.array(indicator_functions)

In [22]:
Q_coords

array([[  27,   13],
       [  68,   78],
       [  37,  182],
       [  42,   79],
       [  27,  147],
       [  38,  168],
       [  28,  314],
       [  23,  248],
       [  44,  370],
       [  34,   34],
       [  42,  400],
       [  68,  305],
       [  27,  393],
       [  49,  171],
       [  36,  232],
       [  49,  150],
       [  24,  269],
       [  83,  249],
       [  68,   96],
       [  82,   11],
       [  27,   76],
       [  69,  184],
       [  27,   56],
       [  24,  302],
       [  47,  270],
       [  34,  207],
       [  70,  148],
       [  65,  268],
       [ 176,   84],
       [  30,  219],
       [  67,  220],
       [  61,  168],
       [  85,   37],
       [ 107,   79],
       [ 187,   86],
       [  49,  120],
       [  49,   20],
       [  63,   17],
       [  45,  107],
       [  48,  319],
       [  47,   56],
       [  64,   33],
       [ 194,  121],
       [  33,  123],
       [  82,  175],
       [ 106,  233],
       [  16,  200],
       [  68,

In [18]:
indicators = create_indicator_functions(Q_coords, D_L)

100%|██████████| 195/195 [00:00<00:00, 118175.02it/s]


In [24]:
matching_function = np.sum(indicators, axis=0)

In [35]:
max_val = np.max(matching_function)
max_idx = np.argmax(matching_function) - 1293

In [36]:
max_idx

35

In [39]:
35 * 1024 / 44100

0.8126984126984127