In [1]:
# define the Dynamic Time Warping Function
import numpy as np 

def dp(dist_matrix):
    N, M = dist_matrix.shape

    # initialize the cost matrix by extracting the dimensionality
    cost_matrix = np.zeros((N+1, M+1))
    for i in range(1, N+1):
        cost_matrix[i,0] = np.inf
    for i in range(1, M+1):
        cost_matrix[0,i] = np.inf

    # fill the cost matrix; keep the traceback information
    traceback_matrix = np.zeros((N, M))

    for i in range(N):
        for j in range(M):
            penalty = [
                cost_matrix[i,j], #match(0)
                cost_matrix[i, j+1], # insertion(1)
                cost_matrix[i+1, j] # deletion(2)
            ]

            i_penalty = np.argmin(penalty)

            cost_matrix[i+1, j+1] = dist_matrix[i,j]+penalty[i_penalty]
            traceback_matrix[i,j] = i_penalty

    # taking our traceback from bottom right
    i = N-1
    j = M-1
    path = [(i,j)]

    while i > 0 or j > 0:
        tb_type = traceback_matrix[i,j]
        if tb_type == 0:
            # match
            i = j-1
            j = j-1
        elif tb_type == 1:
            # insertion
            i = i-1
        elif tb_type == 2:
            # deletion
            j = j-1
        path.append((i,j))

    # strip infinity edges before returning
    cost_matrix = cost_matrix[1:, 1:]
    return (path[::-1], cost_matrix)

In [5]:
# define a function that will receive audio data
import wave, os, struct, array, csv

def read_loaded_file(audio_path):
    get_file = wave.open(audio_path, "rb")
    # return number of audio frames
    audio_sample = get_file.readframes(get_file.getnframes()) # PCM 1. SAMPLED THE ANALOG SIGNAL
    sampling_frequency = get_file.readframes(get_file.getframerate())
    get_file.close()
    
    # convert audio samples into an array
    audio_sample = array.array("h",audio_sample)
    # audio_sample = np.frombuffer(audio_sample, dtype="int16" )
    return audio_sample


In [6]:
# load the analog signal.
audio_sample = read_loaded_file("./isigi_ac.wav")
import IPython.display as ipyd
ipyd.Audio(rate=44100, data=audio_sample)

In [7]:
# quantize the audio and encode it
quantization_level = 16 # bits/sample
quantized_audio = audio_sample

for audio in range(len(quantized_audio)): # PCM 2. QUANTIZATION
    quantized_audio[audio] = int((quantized_audio[audio]/(2**15/quantization_level)) * (2**15/quantization_level))

# convert data into bytes for transmission : PCM 3. ENCODE
encoded_audio = struct.pack("h"*len(quantized_audio), *quantized_audio)

`The signal (.wav) has undergone a process: PCM -sample, quantize & encode- it is ready for transmission and decoding for DTW`

In [8]:
# need to define : sampling frequency - 44100 & array data -> decode (byte to array)
# then extract features 
import librosa

# define rate manually for now ******
sampling_frequency = 44100

In [15]:
nfft = int(0.025*sampling_frequency)
hop_length = int(0.01*sampling_frequency)

In [18]:
# extract features
audio_sample = np.array(audio_sample) # numpy array into librosa
mel_spectogram = librosa.feature.melspectrogram(

    y = audio_sample/1.0,
    sr = sampling_frequency,
    n_fft = nfft,
    hop_length = hop_length,
    n_mels = 40)

mel_spectogram_log = np.log(mel_spectogram)
audio_signal = mel_spectogram_log.T

In [23]:
custom_lib = [
    "../DTW/Lecture/audio/bye.wav",
    "../DTW/Lecture/audio/goodbye.wav",
    "../DTW/isigi_ac.wav"
]

from scipy.io import wavfile

In [25]:
# exract features from the sample audio in the library and find the cost

for sample in custom_lib:
    sampling_frequency, audio_data = wavfile.read(sample)

    nfft = int(0.025*sampling_frequency)
    hop_length = int(0.01*sampling_frequency)

    mel_spec = librosa.feature.melspectrogram(
        y = audio_data/1.0,
        sr = sampling_frequency,
        hop_length = hop_length,
        n_fft = nfft,
        n_mels = 40
    )

    log_mel_spec = np.log(mel_spec)
    compare_signal = log_mel_spec.T 