# Query-by-Example (QbE) Search Using DTW

## Preliminary

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import division
from __future__ import print_function
from os import path
from python_speech_features import delta
from python_speech_features import mfcc
import glob
import matplotlib.pyplot as plt
import numpy as np
import scipy.io.wavfile as wav
import sys
import simpleaudio as sa
from pydub import AudioSegment
from pydub.playback import play
from shennong.audio import Audio
from shennong.features.processor.mfcc import MfccProcessor
from shennong.features.postprocessor.delta import DeltaPostProcessor
from shennong.features.postprocessor.cmvn import CmvnPostProcessor

sys.path.append("..")
sys.path.append(path.join("..", "utils"))

from speech_dtw import qbe



## Extract features

In [24]:
def get_mfcc_dd(wav_fn):
    """Return the MFCCs with deltas and delta-deltas for a audio file."""
    audio = Audio.load(wav_fn)
    processor = MfccProcessor(sample_rate=audio.sample_rate, window_type="hamming",frame_length=0.025, frame_shift=0.01,
                              cepstral_lifter=26.0,low_freq=0, vtln_low=60, vtln_high=7200, high_freq=audio.sample_rate/2)
    d_processor = DeltaPostProcessor(order=2)
    mfcc_static = processor.process(audio.channel(0), vtln_warp=1.0)
    mfcc_deltas = d_processor.process(mfcc_static)
    features = np.float64(mfcc_deltas._to_dict()["data"])
    features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)
    return features

## Take Common English Words

In [25]:
import json# Grab spoken lexicon and return word, crop and duration of the word
txt_file = open("english_words/common_words_100.txt", "r")
word_dict = txt_file.read().split(' ')
common_word_list = []
i = 0
for wav_fn in sorted(glob.glob(path.join("english_words/common", "common_word_??.wav"))):
    rate, signal = wav.read(wav_fn)
    length = len(signal) / rate * 1000
    query_mfcc = get_mfcc_dd(wav_fn)
    query = {}
    print(word_dict[i], end=' ')
    query["length"] = length
    query["data"] = query_mfcc
    query["word"] = word_dict[i]
    query["audio"] = AudioSegment.from_file(wav_fn, format="wav")
    query["thres"] = 1
    #play(query["audio"])
    common_word_list.append(query)
    i+=1

one word use said time way many write would like long make thing see two look day could go come number sound people know water call first may side find new work part take get place made live back little round man year came show every good give name form sentence great think say help low line differ turn cause much mean move right boy old tell set three want air well also play small end put home read hand port large spell add even land must big high follow act ask men change went light kind need house picture 

In [27]:
txt_file = open("english_words/redfern_words_30.txt", "r")
word_dict = txt_file.read().split(' ')
redfern_word_list = []
i = 0
for wav_fn in sorted(glob.glob(path.join("english_words/redfern", "redfern_word_??.wav"))):
    rate, signal = wav.read(wav_fn)
    length = len(signal) / rate * 1000
    query_mfcc = get_mfcc_dd(wav_fn)
    query = {}
    print(word_dict[i], end=' ')
    query["length"] = length
    query["data"] = query_mfcc
    query["word"] = word_dict[i]
    query["audio"] = AudioSegment.from_file(wav_fn, format="wav")
    query["thres"] = 1
    #play(query["audio"])
    redfern_word_list.append(query)
    i+=1

aboriginal australia australians imagine us indigenous people cannot think things know justice world say recognise much non year torres strait social injustice see well history identity many australian reality done 

In [28]:
print(redfern_word_list[29]["word"])
play(redfern_word_list[29]["audio"])

done


In [30]:
redfern_rspk = []
for wav_fn in sorted(glob.glob(path.join("utterances", "redfern_rspk_??.wav"))):
    print(wav_fn, end=" ")
    dic = {}
    dic["file"] = AudioSegment.from_file(wav_fn, format="wav")
    dic["data"] = get_mfcc_dd(wav_fn)
    redfern_rspk.append(dic)
    
for wav_fn in sorted(glob.glob(path.join("utterances", "redfern_rspk_???.wav"))):
    print(wav_fn, end=" ")
    dic = {}
    dic["file"] = AudioSegment.from_file(wav_fn, format="wav")
    dic["data"] = get_mfcc_dd(wav_fn)
    redfern_rspk.append(dic)

utterances/redfern_rspk_00.wav utterances/redfern_rspk_01.wav utterances/redfern_rspk_02.wav utterances/redfern_rspk_03.wav utterances/redfern_rspk_04.wav utterances/redfern_rspk_05.wav utterances/redfern_rspk_06.wav utterances/redfern_rspk_07.wav utterances/redfern_rspk_08.wav utterances/redfern_rspk_09.wav utterances/redfern_rspk_10.wav utterances/redfern_rspk_11.wav utterances/redfern_rspk_12.wav utterances/redfern_rspk_13.wav utterances/redfern_rspk_14.wav utterances/redfern_rspk_15.wav utterances/redfern_rspk_16.wav utterances/redfern_rspk_17.wav utterances/redfern_rspk_18.wav utterances/redfern_rspk_19.wav utterances/redfern_rspk_20.wav utterances/redfern_rspk_21.wav utterances/redfern_rspk_22.wav utterances/redfern_rspk_23.wav utterances/redfern_rspk_24.wav utterances/redfern_rspk_25.wav utterances/redfern_rspk_26.wav utterances/redfern_rspk_27.wav utterances/redfern_rspk_28.wav utterances/redfern_rspk_29.wav utterances/redfern_rspk_30.wav utterances/redfern_rspk_31.wav utteranc

In [None]:
sweep_original = qbe.dtw_sweep(redfern_word_list[0]["audio"], redfern_rspk[2])
plt.plot(sweep_original,color='green')
play(common_word_list[0]['audio'])
play(rudd_original_wav[0])