## All libraries

In [1]:
from matplotlib.patches import ConnectionPatch
import matplotlib.pyplot as plt
import numpy as np
import scipy.spatial.distance as dist
import warnings
warnings.filterwarnings('ignore')

## Dynamic programming function

In [2]:
def dp(dist_mat):
    N, M = dist_mat.shape
    
    # Initialize the cost matrix
    cost_mat = np.zeros((N + 1, M + 1))
    for i in range(1, N + 1):
        cost_mat[i, 0] = np.inf
    for i in range(1, M + 1):
        cost_mat[0, i] = np.inf

    # Fill the cost matrix while keeping traceback information
    traceback_mat = np.zeros((N, M))
    for i in range(N):
        for j in range(M):
            penalty = [
                cost_mat[i, j],      # match (0)
                cost_mat[i, j + 1],  # insertion (1)
                cost_mat[i + 1, j]]  # deletion (2)
            i_penalty = np.argmin(penalty)
            cost_mat[i + 1, j + 1] = dist_mat[i, j] + penalty[i_penalty]
            traceback_mat[i, j] = i_penalty

    # Traceback from bottom right
    i = N - 1
    j = M - 1
    path = [(i, j)]
    while i > 0 or j > 0:
        tb_type = traceback_mat[i, j]
        if tb_type == 0:
            # Match
            i = i - 1
            j = j - 1
        elif tb_type == 1:
            # Insertion
            i = i - 1
        elif tb_type == 2:
            # Deletion
            j = j - 1
        path.append((i, j))

    # Strip infinity edges from cost_mat before returning
    cost_mat = cost_mat[1:, 1:]
    return (path[::-1], cost_mat)

## Univariate examples

### Example 3

In [3]:
from scipy.io import wavfile
import IPython.display as ipyd
import librosa
import librosa.display

In [46]:
# # x_fn = "audio/hello1.wav" ---- SAMPLE TO compare 
# x_fn = "audio/aula.wav"
# # x_fn = "D:/Management-PRODIJ/ProMassaPy/FINAL-PRO-vr/audios/cuts/agente2.wav"
# # x_fn = "audio/goodbye.wav"
# f_s, x = wavfile.read(x_fn)
# print(f'The file {f_s} {x}')
# ipyd.Audio(rate=f_s, data=x)

### Nearest neighbour

In [4]:
# Audio
query_fn = "audio/chunk9.wav" #Archivo a comparar
f_s, x = wavfile.read(query_fn)

#------ the hop length
n_fft = int(0.025*f_s)      # 25 ms
hop_length = int(0.01*f_s)  # 10 ms
# Mel-scale spectrogram
mel_spec_x = librosa.feature.melspectrogram(
    y=x/1.0, sr=f_s, n_mels=40,
    n_fft=n_fft, hop_length=hop_length
    )
log_mel_spec_x = np.log(mel_spec_x)
x_seq = log_mel_spec_x.T
ipyd.Audio(rate=f_s, data=x)

In [5]:
# audio_files = [
#     "audio/hello2.wav", "audio/hello3.wav",
#     "audio/bye.wav", "audio/cat.wav", "audio/goodbye.wav"
#     ]

print(f'Archivo a comparar: ---> {query_fn}')
print(f'---------------------------')
min_values = np.array([],dtype=object)
audio_files = ["audio/agente5.wav","audio/limpia1.wav","audio/aula.wav","audio/aula2.wav", "audio/limpia0.wav", "audio/agente.wav"]
for neighbour_fn in audio_files:    
    # Mel-scale spectrogram
    print("Reading:", neighbour_fn)
    f_s, y = wavfile.read(neighbour_fn)
    mel_spec_y = librosa.feature.melspectrogram(
        y=y/1.0, sr=f_s, n_mels=40,
        n_fft=n_fft, hop_length=hop_length
        )
    log_mel_spec_y = np.log(mel_spec_y)
    y_seq = log_mel_spec_y.T

    dist_mat = dist.cdist(x_seq, y_seq, "cosine")
    path, cost_mat = dp(dist_mat)
    print("Alignment cost: {:.4f}".format(cost_mat[-1, -1]))
    M = y_seq.shape[0]
    N = x_seq.shape[0]
    # print(f'Tamaños comparados: M->{M}; N->{N}')
    print(
        "Normalized alignment cost: {:.8f}".format(
        cost_mat[-1, -1]/(M + N))
        )
    min_values = np.append(min_values,{"filename":neighbour_fn,"costo":cost_mat[-1, -1]/(M + N)})
    print()

The given SAMPLE: ---> audio/chunk9.wav
---------------------------
Reading: audio/agente5.wav


ValueError: XA must be a 2-dimensional array.

In [40]:
minPricedItem = min(min_values, key=lambda x:x['costo'])
minPricedItem

{'filename': 'audio/agente5.wav', 'costo': 0.009778813796247714}

In [25]:
# # Audio sample - to compare 
# tmp_fn = "audio/cat.wav"
# f_s, tmp = wavfile.read(tmp_fn)
# ipyd.Audio(rate=f_s, data=tmp)