In [29]:
import os
import sys
sys.path.append(r'C:\Study\SpeechAcquisitionModel')
print(os.getcwd())
import numpy as np
from scipy.spatial.distance import euclidean, minkowski
from sklearn.metrics import precision_recall_curve, average_precision_score

import scipy
import torch

import dtw
import dtwalign
from fastdtw import fastdtw

from sklearn.manifold import TSNE
import pandas as pd

from src.speech_classification.audio_processing import AudioPreprocessorFbank, SpeechCommandsDataCollector
from src.siamese_net_sound_similarity.slstm_train import SiameseSpeechCommandsDataCollector, StochasticSiameseLSTMNet
from src.siamese_net_sound_similarity.train import SiameseLSTMNet
from src.siamese_net_sound_similarity.soft_dtw import SoftDTW


import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
sns.set()
sns.set(font_scale=1.4, rc={'text.usetex' : False})
sns.set_style("whitegrid")

%matplotlib inline

C:\Study\SpeechAcquisitionModel\src\dtw


In [30]:
DISTANCE='euclidean'
STEP_PATTERN = 'typeIc'
CLOSED_END_STEP_PATTERN = "typeIc"
# DISTANCE=lambda x, y : minkowski(x,y, 1)

## Load data

In [31]:
wanted_words = ['bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left',
                'marvin',
                'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two',
                'up', 'wow', 'yes', 'zero']

wanted_words_combined = wanted_words

model_settings = {
    'dct_coefficient_count': 26,
    'label_count': len(wanted_words_combined) + 2,
    'hidden_reccurent_cells_count': 128,
    'winlen': 0.04,
    'winstep': 0.02
}

In [None]:
preproc = AudioPreprocessorFbank(nfilt=model_settings['dct_coefficient_count'], winlen=model_settings['winlen'],
                                     winstep=model_settings['winstep'])

data_iter = SiameseSpeechCommandsDataCollector(preproc,
                                        data_dir=r'C:\Study\Speech_command_classification\speech_dataset',
                                        wanted_words=wanted_words_combined,
                                        testing_percentage=10,
                                        validation_percentage=10
                                        )

index_to_word = {v:k for k,v in data_iter.word_to_index.items()}

In [None]:
n_mini_batch_size = 1
data = data_iter.get_data(n_mini_batch_size, 0, 'training')
labels = data['y']

duplicates = data_iter.get_duplicates(labels, 0, 'training')
assert np.any(labels == duplicates['y'])

non_duplicates = data_iter.get_nonduplicates(labels, 0, 'training')
assert np.any(labels != non_duplicates['y'])

In [None]:

i = np.random.randint(data['y'].shape[0])
sample_idx = i

fig, axes = plt.subplots(2, 3, figsize=(15,10))

# plt.figure()
im = axes[0][0].imshow(data['x'][i].T)
# fig.colorbar(im)
label = index_to_word[data['y'][0]]
axes[0][0].title.set_text(f'{label}')

axes[0][1].imshow(duplicates['x'][i].T)
# plt.colorbar()
label = index_to_word[data['y'][0]]
axes[0][1].title.set_text(f'{label}')


axes[0][2].imshow(non_duplicates['x'][i].T)
# plt.colorbar()
label = index_to_word[non_duplicates['y'][0]]
axes[0][2].title.set_text(f'{label}')

######################################## DTW

x = data['x'][i]

# self dtw (extpected to be 0)

y = data['x'][i]
res = dtwalign.dtw(x, y, dist=DISTANCE, step_pattern=STEP_PATTERN, open_end=True)
res.plot_path(ax=axes[1][0])
axes[1][0].set_title(f"anchor DTW = {res.normalized_distance:.4f}")


# positive dtw

y = duplicates['x'][sample_idx]
res = dtwalign.dtw(x, y, dist=DISTANCE, step_pattern=STEP_PATTERN, open_end=True)
res.plot_path(ax=axes[1][1])
axes[1][1].set_title(f"positive DTW = {res.normalized_distance:.4f}")


# negative dtw

y = non_duplicates['x'][sample_idx]
res = dtwalign.dtw(x, y, dist=DISTANCE, step_pattern=STEP_PATTERN, open_end=True)
res.plot_path(ax=axes[1][2])
axes[1][2].set_title(f"negative DTW = {res.normalized_distance:.4f}")

# Gather Data for analysis

In [None]:
n_mini_batch_size = 1000
n_duplicates_size= 100
# gather data


data = data_iter.get_data(n_mini_batch_size, 0, 'training')
labels = data['y']

duplicates = data_iter.get_duplicates(labels, 0, 'training')
assert np.any(labels == duplicates['y'])
duplicates['x'] = duplicates['x'][:n_duplicates_size]
print(duplicates['x'].shape)

non_duplicates = data_iter.get_nonduplicates(labels, 0, 'training')
assert np.any(labels != non_duplicates['y'])

y_true = np.concatenate((np.ones((n_duplicates_size)), np.zeros((n_mini_batch_size))))
print(y_true)
print(labels)
print(sum(y_true))

## 1. MFCC Precision/Recall

In [None]:

# initialize dist_lists

duplicates_dtw = []
non_duplicates_dtw = []

for i in range(n_mini_batch_size):
    print(i, end='\r')
    x = data['x'][i].squeeze()
    
    if i < n_duplicates_size :
        y_duplicate = duplicates['x'][i].squeeze()
        duplicates_dtw.append(dtwalign.dtw(x, y_duplicate, dist=DISTANCE, step_pattern=STEP_PATTERN, open_end=False, dist_only=True).normalized_distance)

    y_non_duplicate = non_duplicates['x'][i].squeeze()
    non_duplicates_dtw.append(dtwalign.dtw(x, y_non_duplicate, dist=DISTANCE, step_pattern=STEP_PATTERN, open_end=False, dist_only=True).normalized_distance)    

In [None]:

mfcc_dtw_dist = np.concatenate((duplicates_dtw, non_duplicates_dtw))
precision, recall, _ = precision_recall_curve(y_true, -mfcc_dtw_dist)
plt.figure(figsize=(10,8))
sns.lineplot(recall, precision)

# 2. Improving DTW using latent variables from LSTM classifier as a signal

In [None]:
# load nn

nn_fname = r'C:/Study/SpeechAcquisitionModel/reports/seamise_net_10_03_2019_02_43_PM/net_0.484375.net'

nn = torch.load(nn_fname)

## 2.1 - DTW

In [None]:
PREFIX_LENS = [50]

for PREFIX_LEN in PREFIX_LENS:
    print(f"############################################################################\n\
# PREFIX LENGTH : {PREFIX_LEN}\n\
############################################################################")
    nn_input = torch.from_numpy(data['x'][:, :PREFIX_LEN, :]).cuda().float()


    z, *_ = nn.single_forward(nn_input)
    z = z.detach().cpu().numpy()

    duplicates_z, *_ = nn.single_forward(torch.from_numpy(duplicates['x']).cuda().float())
    duplicates_z = duplicates_z.detach().cpu().numpy()

    non_duplicates_z, *_ = nn.single_forward(torch.from_numpy(non_duplicates['x']).cuda().float())
    non_duplicates_z = non_duplicates_z.detach().cpu().numpy()

    ##########################################################################
    # OPEN END = False
    ##########################################################################

    # initialize dist_lists

    duplicates_dtw_z = []
    non_duplicates_dtw_z = []

    duplicates_bmis = []
    non_duplicates_bmis = []

    for i in range(n_mini_batch_size):
        print(i, end='\r')
        x = z[i].squeeze()
        
        if i < n_duplicates_size :
            y_duplicate = duplicates_z[i].squeeze()
            d = dtwalign.dtw(x, y_duplicate, dist=DISTANCE, step_pattern=CLOSED_END_STEP_PATTERN, open_end=False)
            duplicates_dtw_z.append(d.normalized_distance)
            duplicates_bmis.append(d.path[-1, -1])

        y_non_duplicate = non_duplicates_z[i].squeeze()
        d = dtwalign.dtw(x, y_non_duplicate, dist=DISTANCE, step_pattern=CLOSED_END_STEP_PATTERN, open_end=False)
        non_duplicates_dtw_z.append(d.normalized_distance)
        non_duplicates_bmis.append(d.path[-1, -1])
    
    ce_dtw_dist_ce = np.concatenate((duplicates_dtw_z, non_duplicates_dtw_z))
    precision, recall, _ = precision_recall_curve(y_true, -ce_dtw_dist_ce)
    plt.figure(figsize=(10,8))
    sns.lineplot(recall, precision)
        
    




### 2.2 - SOFT-DTW

In [None]:
print(f"############################################################################\n\
# SOFT DTW\n\
############################################################################")

PREFIX_LENS = [50]

for PREFIX_LEN in PREFIX_LENS:
    print(f"############################################################################\n\
# PREFIX LENGTH : {PREFIX_LEN}\n\
############################################################################")
    nn_input = torch.from_numpy(data['x'][:, :PREFIX_LEN, :]).cuda().float()


    z, *_ = nn.single_forward(nn_input)
    z = z
    z_detach = z.detach().cpu().numpy()

    duplicates_z, *_ = nn.single_forward(torch.from_numpy(duplicates['x']).cuda().float())
    duplicates_z = duplicates_z

    non_duplicates_z, *_ = nn.single_forward(torch.from_numpy(non_duplicates['x']).cuda().float())
    non_duplicates_z = non_duplicates_z

    ###########################################################################
    # DTW
    ###########################################################################

    soft_dtw_loss_open_end = SoftDTW(open_end=True, dist='l1')
    soft_dtw_loss_close_end = SoftDTW(open_end=False, dist='l1')

    ##########################################################################
    # OPEN END = False
    ##########################################################################

    # initialize dist_lists

    duplicates_dtw_z = []
    non_duplicates_dtw_z = []

    duplicates_bmis = []
    non_duplicates_bmis = []

    for i in range(n_mini_batch_size):
        print(i, end='\r')
        x = z[i].squeeze()
        
        if i < n_duplicates_size :
            y_duplicate = duplicates_z[i].squeeze()
            d = soft_dtw_loss_close_end(x, y_duplicate)
            duplicates_dtw_z.append(d.detach().cpu().numpy())

        y_non_duplicate = non_duplicates_z[i].squeeze()
        d = soft_dtw_loss_close_end(x, y_non_duplicate)
        non_duplicates_dtw_z.append(d.detach().cpu().numpy())

    ce_dtw_dist = np.concatenate((duplicates_dtw_z, non_duplicates_dtw_z))
    precision, recall, _ = precision_recall_curve(y_true, -ce_dtw_dist)
    plt.figure(figsize=(10,8))
    sns.lineplot(recall, precision)

# 3. Improving DTW using latent variables from L2-Triplet network

In [None]:
# load nn
nn_fname = r'C:/Study/SpeechAcquisitionModel/reports/seamise_net_10_03_2019_05_09_PM/net_0.421875.net'


nn = torch.load(nn_fname)

### 3.2 - SOFT-DTW

In [None]:
print(f"############################################################################\n\
# SOFT DTW\n\
############################################################################")

PREFIX_LENS = [50]

for PREFIX_LEN in PREFIX_LENS:
    print(f"############################################################################\n\
# PREFIX LENGTH : {PREFIX_LEN}\n\
############################################################################")
    nn_input = torch.from_numpy(data['x'][:, :PREFIX_LEN, :]).cuda().float()


    z, *_ = nn.single_forward(nn_input)
    z = z
    z_detach = z.detach().cpu().numpy()

    duplicates_z, *_ = nn.single_forward(torch.from_numpy(duplicates['x']).cuda().float())
    duplicates_z = duplicates_z

    non_duplicates_z, *_ = nn.single_forward(torch.from_numpy(non_duplicates['x']).cuda().float())
    non_duplicates_z = non_duplicates_z

    ###########################################################################
    # DTW
    ###########################################################################

    soft_dtw_loss_open_end = SoftDTW(open_end=True, dist='l1')
    soft_dtw_loss_close_end = SoftDTW(open_end=False, dist='l1')

    ##########################################################################
    # OPEN END = False
    ##########################################################################

    # initialize dist_lists

    duplicates_dtw_z = []
    non_duplicates_dtw_z = []

    duplicates_bmis = []
    non_duplicates_bmis = []

    for i in range(n_mini_batch_size):
        print(i, end='\r')
        x = z[i].squeeze()
        
        if i < n_duplicates_size :
            y_duplicate = duplicates_z[i].squeeze()
            d = soft_dtw_loss_close_end(x, y_duplicate)
            duplicates_dtw_z.append(d.detach().cpu().numpy())

        y_non_duplicate = non_duplicates_z[i].squeeze()
        d = soft_dtw_loss_close_end(x, y_non_duplicate)
        non_duplicates_dtw_z.append(d.detach().cpu().numpy())

    ce_l2_dtw_dist = np.concatenate((duplicates_dtw_z, non_duplicates_dtw_z))
    precision, recall, _ = precision_recall_curve(y_true, -ce_l2_dtw_dist)
    plt.figure(figsize=(10,8))
    sns.lineplot(recall, precision)


# 4. Improving DTW using latent variables from DTW-Triplet network

In [None]:
# load nn

nn_fname = r'C:/Study/SpeechAcquisitionModel/reports/seamise_net_10_01_2019_03_37_PM/net_0.46875.net'

nn = torch.load(nn_fname)

### 4.2 - SOFT-DTW

In [None]:
print(f"############################################################################\n\
# SOFT DTW\n\
############################################################################")

PREFIX_LENS = [50]

for PREFIX_LEN in PREFIX_LENS:
    print(f"############################################################################\n\
# PREFIX LENGTH : {PREFIX_LEN}\n\
############################################################################")
    nn_input = torch.from_numpy(data['x'][:, :PREFIX_LEN, :]).cuda().float()


    z, *_ = nn.single_forward(nn_input)
    z = z
    z_detach = z.detach().cpu().numpy()

    duplicates_z, *_ = nn.single_forward(torch.from_numpy(duplicates['x']).cuda().float())
    duplicates_z = duplicates_z

    non_duplicates_z, *_ = nn.single_forward(torch.from_numpy(non_duplicates['x']).cuda().float())
    non_duplicates_z = non_duplicates_z

    ##########################################################################
    # OPEN END = False
    ##########################################################################

    # initialize dist_lists

    duplicates_dtw_z = []
    non_duplicates_dtw_z = []

    duplicates_bmis = []
    non_duplicates_bmis = []

    for i in range(n_mini_batch_size):
        print(i, end='\r')
        x = z[i].squeeze()

        if i < n_duplicates_size :
            y_duplicate = duplicates_z[i].squeeze()
            d = soft_dtw_loss_close_end(x, y_duplicate)
            duplicates_dtw_z.append(d.detach().cpu().numpy())

        y_non_duplicate = non_duplicates_z[i].squeeze()
        d = soft_dtw_loss_close_end(x, y_non_duplicate)
        non_duplicates_dtw_z.append(d.detach().cpu().numpy())

    ce_dtw_dtw_dist = np.concatenate((duplicates_dtw_z, non_duplicates_dtw_z))
    precision, recall, _ = precision_recall_curve(y_true, -ce_dtw_dtw_dist)
    plt.figure(figsize=(10,8))
    sns.lineplot(recall, precision)




# 5. All Together

## 5.1 Precision_Recall Curve

In [None]:
plt.figure(figsize=(10,8))

# # no skill model
# precision, recall, _ = precision_recall_curve(y_true, np.zeros(len(y_true)), pos_label=1)
# ax = sns.lineplot(recall, precision)
# ax.lines[0].set_linestyle("--")


models_dists = [mfcc_dtw_dist, ce_dtw_dist, ce_l2_dtw_dist, ce_dtw_dtw_dist]
model_names = ['MFCC', 'CE', 'CE+L2-triplet', 'CE+DTW-triplet']
for i in range(len(model_names)):
    precision, recall, thresholds = precision_recall_curve(y_true, -models_dists[i], pos_label=1)
    print(thresholds)
    sns.lineplot(recall, precision, label=model_names[i], linewidth=3)
plt.legend()
plt.show()

## 5.2 Average Precision 

In [None]:
models_dists = [mfcc_dtw_dist, ce_dtw_dist, ce_l2_dtw_dist, ce_dtw_dtw_dist]
model_names = ['MFCC', 'CE', 'CE+L2-triplet', 'CE+DTW-triplet']
for i in range(len(model_names)):
    average_precision = average_precision_score(y_true, -models_dists[i])
    print(model_names[i], average_precision)
#     precision, recall, thresholds = precision_recall_curve(y_true, models_dists[i])