# Speech + Phonetic AAI Model

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import matplotlib.pyplot as plt
import os
from os.path import join
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from time import time
from datetime import datetime
import shutil 
import argparse
from functools import partial
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Reshape, GRU, InputLayer, AlphaDropout, Activation, BatchNormalization, Dropout, Flatten, Dense, Bidirectional, LSTM, Conv1D

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder

## Utility Functions

In [4]:
#Function for preprocessing data
def delete_params(params):
    '''
    This function remove JX, WC, TRX, TRY, and MS1,2,3 paramter
    '''
    DEL_PARAMS_LIST = [2,8,15,16,21,22,23]
    return np.delete(params,DEL_PARAMS_LIST , axis=1)

## Load Data 

In [5]:
def prep_data():

    # load data from preprocess pipeline
    dataset = np.load(join('../data/d_dataset_p1/prep_data_13','training_subsets.npz'))
    X_train =dataset['X_train']
    y_train= dataset['y_train']
    z_train= dataset['z_train']
    X_val = dataset['X_val']
    y_val = dataset['y_val']
    z_val = dataset['z_val']
    X_test = dataset['X_test']
    y_test = dataset['y_test']
    z_test = dataset['z_test']
    
    y_train = delete_params(y_train)
    y_val = delete_params(y_val)
    y_test = delete_params(y_test)

    print('Train features and labels %s %s'%(str(X_train.shape),str(y_train.shape)))
    print('Validating features and labels %s %s'%(str(X_val.shape),str(y_val.shape)))
    print('Test features and labels %s %s'%(str(X_test.shape),str(y_test.shape)))

    return X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test

In [6]:
X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test = prep_data()

Train features and labels (98746, 24, 39) (98746, 17)
Validating features and labels (12094, 24, 39) (12094, 17)
Test features and labels (12092, 24, 39) (12092, 17)


In [10]:
print(set(z_test))

{'9', '2', 'u', 'E', 'V', 'i', 'A', '&', 'e', '7', 'E:', 'O', 'y', 'M', 'o', 'a', 'U', '@', 'Q'}


In [13]:
print('Train features and labels {} {} {}'.format(str(X_train.shape),str(y_train.shape),str(z_train.shape)))
print('Validating features and labels {} {} {}'.format(str(X_val.shape),str(y_val.shape),str(z_val.shape)))
print('Test features and labels {} {} {}'.format(str(X_test.shape),str(y_test.shape),str(z_test.shape)))

Train features and labels (98746, 24, 39) (98746, 17) (98746,)
Validating features and labels (12094, 24, 39) (12094, 17) (12094,)
Test features and labels (12092, 24, 39) (12092, 17) (12092,)


### One-hot label 

In [14]:
z_train_oe=z_train.reshape(-1,1)
z_test_oe=z_val.reshape(-1,1)
z_val_oe=z_test.reshape(-1,1)

In [15]:
enc = OneHotEncoder(handle_unknown = 'ignore')
enc.fit(z_train_oe)

z_train_oe = enc.transform(z_train_oe).toarray()
z_test_oe = enc.transform(z_test_oe).toarray()
z_val_oe = enc.transform(z_val_oe).toarray()

In [16]:
z_train_oe.shape

(98746, 19)

In [17]:
z_train_oe[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0.])

## Defined Model 

In [18]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

def init_bilstm(unit=128, bi_layer_num=3, drop_rate=0.2):

    def bilstm(input_shape_1,input_shape_2):
        
        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        # feature extraction layers
        x = input_x
        for i in range(bi_layer_num-1):
            x = Bidirectional(pLSTM(unit))(x)
            if drop_rate: 
                x = SpatialDropout1D(rate=drop_rate)(x)
        # output layers
        x = Bidirectional(pLSTM(unit, return_sequences=False))(x)
        if drop_rate: 
            x = Dropout(rate=drop_rate)(x)
        outputs_param = pDense(N_OUTPUTS, activation='linear')(x)
        
        model = keras.Model(inputs=input_x, outputs=[outputs_param, outputs_pho])
        # model.summary()
        return model
    return bilstm

In [12]:
model = init_bilstm()(24, 39)

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 24, 256)           172032    
_________________________________________________________________
dropout (Dropout)            (None, 24, 256)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 24, 256)           394240    
_________________________________________________________________
dropout_1 (Dropout)          (None, 24, 256)           0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 256)               394240    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 19)                4

In [14]:
model.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

## Train Model 

In [15]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 50

In [16]:
early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='max', baseline=None, restore_best_weights=False)
callback_list = [early]

In [17]:
history = model.fit(X_train,y_train_oe,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val_oe),
    callbacks=callback_list)

Train on 98746 samples, validate on 6046 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 00011: early stopping


In [19]:
model.save('model_embedding.hdf5')

## Evaluation 

In [20]:
model.evaluate(X_test,y_test_oe,verbose=0)

[2.146353620036406, 0.88488257]

In [24]:
def evaluate_model(feature, label, model):
    y_pred = model.predict(feature)
#     y_pred = np.argmax(y_pred, axis=1)
#     label = np.argmax(label, axis=1)
    y_pred = enc.inverse_transform(y_pred)
    label = enc.inverse_transform(label)
    print('Confusion Matrix')
    print(confusion_matrix(label, y_pred))
    print('Classification Report')
    print(classification_report(label, y_pred))

In [25]:
evaluate_model(X_test, y_test_oe, model)

Confusion Matrix
[[296   0   0   9   0   0   1   0   0   1   9   0   4  24   0   0   0   0
    0]
 [  0 322   0   1   0   0   0   0   0   0   0   1   0   0   0   0   0   1
   11]
 [  5   0 156   5   0   2   0   0   5  45   3  13  43   3   0   0   1   1
    0]
 [  3   0   0 291   1   0   1   0   1   0   3   0   0   2   0   0   0   0
    0]
 [  0   0   0   3 303   0  21   1   0   0   0   0   0   0   0   0   0   0
    0]
 [  0   0   0   0   0 272   0   1   0   0   9   0  11   3   0   0   0   0
    0]
 [  0   0   0   1   7   0 311  11   0   0   1   0   0   2   0   0   0   0
    0]
 [  0   0   0   0   0   0  12 307   0   0   0   0   0   0   4   0   0   0
    1]
 [  0   6   1   1   0   0   0   0 308   0   0   5   0   0   0   0   0   2
    1]
 [  1   0   4   1   0   0   0   0   1 288   4   1  18   0   0   0   1   2
    0]
 [  5   0   0   2   0   9   3   0   0   2 275   0  12   4   0   0   0   0
    0]
 [  0   2   3   1   0   0   0   0  47   8   1 252   0   0   0   0   2   5
    0]
 [  4   0  