# Experiment on AAI Model Architectures
- Created: 10 Mar 2020
- Data: param_draft2
- Increase data size and change model architecture
- Identity SE, between (cnn and bilstm) embedding feature 
- expeirment 18 19

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from os.path import join
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from time import time
from datetime import datetime
import shutil 
import argparse
from functools import partial
import pickle
from IPython.display import Audio

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Reshape, GRU, InputLayer, AlphaDropout, Activation, BatchNormalization, Dropout, Flatten, Dense, Bidirectional, LSTM, Conv1D, SpatialDropout1D, Concatenate, Multiply, Add

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder

In [2]:
np_load_old = partial(np.load)
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

In [3]:
tf.random.set_seed(42)

## Metric Function

In [4]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def R2(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred), axis = 0) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true, axis = 0)), axis = 0) 
    return K.mean(1 - (SS_res/SS_tot), axis=0)

## Utility Functions

In [5]:
#Function for preprocessing data
def delete_params(params):
    '''
    This function remove JX, WC, TRX, TRY, and MS1,2,3 paramter
    '''
    DEL_PARAMS_LIST = [2,8,15,16,21,22,23]
    return np.delete(params,DEL_PARAMS_LIST , axis=1)

In [6]:
def compute_rmse(actual,pred, axis=1):
    # Compute RMSE by row (axis=1) result in rmse of each data
    # Compute RMSE by column (axis=0) result in rmse of each label
    return np.sqrt((np.square(actual - pred)).mean(axis=axis))

In [7]:
def evaluate_model(y_pred, label):
    y_pred = enc.inverse_transform(y_pred)
    label = enc.inverse_transform(label)
#     print('Confusion Matrix')
#     print(confusion_matrix(label, y_pred))
    print('Classification Report')
    print(classification_report(label, y_pred))

In [8]:
def compute_se(actual,pred):
    return np.sqrt((np.square(actual - pred)))

## Load Data 

In [9]:
def prep_data():

    # load data from preprocess pipeline
    dataset = np.load(join('../../data/d_dataset_p2/prep_data_13','training_subsets.npz'))
    X_train =dataset['X_train']
    y_train= dataset['y_train']
    z_train= dataset['z_train']
    X_val = dataset['X_val']
    y_val = dataset['y_val']
    z_val = dataset['z_val']
    X_test = dataset['X_test']
    y_test = dataset['y_test']
    z_test = dataset['z_test']
    a_test = dataset['audio_test']
    a_val = dataset['audio_val']
    
    y_train = delete_params(y_train)
    y_val = delete_params(y_val)
    y_test = delete_params(y_test)

    print('Train features and labels %s %s'%(str(X_train.shape),str(y_train.shape)))
    print('Validating features and labels %s %s'%(str(X_val.shape),str(y_val.shape)))
    print('Test features and labels %s %s'%(str(X_test.shape),str(y_test.shape)))

    return X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test, a_test, a_val

In [10]:
X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test, a_test, a_val = prep_data()

Train features and labels (76260, 24, 39) (76260, 17)
Validating features and labels (9340, 24, 39) (9340, 17)
Test features and labels (9338, 24, 39) (9338, 17)


In [11]:
print(set(z_test))

{'@', 'e', 'A', '9', 'E', 'u', '2', 'U', 'E:', 'i', 'O', 'o', 'a', 'y'}


In [12]:
print('Train features and labels {} {} {}'.format(str(X_train.shape),str(y_train.shape),str(z_train.shape)))
print('Validating features and labels {} {} {}'.format(str(X_val.shape),str(y_val.shape),str(z_val.shape)))
print('Test features and labels {} {} {}'.format(str(X_test.shape),str(y_test.shape),str(z_test.shape)))

Train features and labels (76260, 24, 39) (76260, 17) (76260,)
Validating features and labels (9340, 24, 39) (9340, 17) (9340,)
Test features and labels (9338, 24, 39) (9338, 17) (9338,)


### One-hot label 

In [13]:
z_train_oe=z_train.reshape(-1,1)
z_val_oe=z_val.reshape(-1,1)
z_test_oe=z_test.reshape(-1,1)

enc = OneHotEncoder(handle_unknown = 'ignore')
enc.fit(z_train_oe)

z_train_oe = enc.transform(z_train_oe).toarray()
z_test_oe = enc.transform(z_test_oe).toarray()
z_val_oe = enc.transform(z_val_oe).toarray()

## 1.) Train Baseline Model
- Model without embedding feature

### Defined Model without Embedded Feature

In [14]:
N_OUTPUTS = 17

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)
pConv1D = partial(Conv1D, padding = 'same', activation = 'linear', kernel_initializer = 'he_uniform')

def init_senet(feature_layer=1, cnn_unit=128, first_kernel=13, res_unit=128, res_kernel=7,
    bilstm = 1, bilstm_unit=256, 
    dropout_rate=0.3,
    reduction_ratio = 2,
    activation_fn='relu'):


    def cnn_block(input_x, cnn_unit, kernel_size):
        x = pConv1D(cnn_unit, kernel_size=kernel_size)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        return x
    
    def residual_block(input_x):
        x = pConv1D(res_unit, kernel_size=res_kernel)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        x = pConv1D(res_unit, kernel_size=3)(x)
        return x

    def se_block(input_x):
        x = layers.GlobalAveragePooling1D()(input_x)
        channel_shape = getattr(x, '_shape_val')[-1]
        x = Reshape((1, channel_shape))(x)
        x = Dense(channel_shape // reduction_ratio, activation=activation_fn, kernel_initializer='he_uniform')(x)
        x = Dense(channel_shape, activation='tanh', kernel_initializer='he_uniform')(x)
        x = layers.Multiply()([x, input_x])
        return x

    def se_res_block(input_x):
        se_x = se_block(input_x)
        re_x = residual_block(input_x)
        x = keras.layers.Add()([re_x, se_x])
        x = BatchNormalization()(x)
        output = Activation(activation_fn)(x)
        return x

    def senet_nn(input_shape_1,input_shape_2):

        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        x = cnn_block(input_x, cnn_unit, first_kernel)
        for i in range(feature_layer):
            x = se_res_block(x)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(feature_layer):
            x = se_res_block(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
#         x = cnn_block(x, cnn_unit=cnn_unit, kernel_size=1)
#         x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(bilstm-1):
            x = Bidirectional(pLSTM(bilstm_unit))(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = Bidirectional(pLSTM(bilstm_unit, return_sequences=False))(x)
        x = Dropout(rate=dropout_rate)(x)
        outputs = Dense(N_OUTPUTS, activation='linear', kernel_initializer='he_uniform')(x)
        model = keras.Model(inputs=input_x, outputs=outputs)
        return model

    return senet_nn

In [15]:
model = init_senet()(24, 39)

In [16]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 24, 39)]     0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 24, 128)      65024       input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 24, 128)      512         conv1d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 24, 128)      0           batch_normalization[0][0]        
______________________________________________________________________________________________

### Compile Model 

In [17]:
model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

### Train Model 

In [18]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 100

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [19]:
history = model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

Train on 76260 samples, validate on 9340 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 00069: early stopping


In [20]:
model.save('model_baseline.hdf5')

### Evaluation 

In [21]:
model.evaluate(X_test,y_test,verbose=0)

[0.020886541145753366, 0.12938069, 0.9083808]

In [22]:
y_pred = model.predict(X_test)

In [23]:
err = compute_rmse(y_test,y_pred, axis=0)

In [24]:
print(err)

[0.14535961 0.1230976  0.11669075 0.13202935 0.14240909 0.18208138
 0.19922448 0.09230688 0.1004147  0.14001313 0.15391854 0.13222808
 0.13385844 0.18009826 0.15112436 0.13686979 0.1526696 ]


## 2.) Model with (Between) Embedding Feature  

### Defined (Between) Embedding Model

In [14]:
PHONETIC_OUTPUT = len(set(z_train))
EMBEDDED_FEATURE = 128

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)

def embeded_model(input_shape_1,input_shape_2):
    inputs = keras.Input(shape=(input_shape_1,input_shape_2))
    x = pLSTM(EMBEDDED_FEATURE)(inputs)
    x = Flatten()(x)
    outputs = Dense(PHONETIC_OUTPUT, activation='softmax', kernel_initializer='he_uniform')(x)
    return keras.Model(inputs=inputs, outputs=outputs)

In [15]:
embedded = embeded_model(24, 39)
embedded.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24, 39)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 24, 128)           86016     
_________________________________________________________________
flatten (Flatten)            (None, 3072)              0         
_________________________________________________________________
dense (Dense)                (None, 14)                43022     
Total params: 129,038
Trainable params: 129,038
Non-trainable params: 0
_________________________________________________________________


### Compile Model 

In [16]:
embedded.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

### Train Model 

In [17]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 100

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [18]:
history = embedded.fit(X_train,z_train_oe,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    validation_data=(X_val,z_val_oe),
    callbacks=callback_list)

Train on 76260 samples, validate on 9340 samples
Epoch 1/100
76260/76260 - 7s - loss: 1.9392 - accuracy: 0.8312 - val_loss: 1.8428 - val_accuracy: 0.9176
Epoch 2/100
76260/76260 - 3s - loss: 1.8554 - accuracy: 0.9041 - val_loss: 1.8232 - val_accuracy: 0.9346
Epoch 3/100
76260/76260 - 3s - loss: 1.8366 - accuracy: 0.9213 - val_loss: 1.8199 - val_accuracy: 0.9366
Epoch 4/100
76260/76260 - 3s - loss: 1.8273 - accuracy: 0.9304 - val_loss: 1.8223 - val_accuracy: 0.9344
Epoch 5/100
76260/76260 - 3s - loss: 1.8185 - accuracy: 0.9383 - val_loss: 1.8194 - val_accuracy: 0.9364
Epoch 6/100
76260/76260 - 3s - loss: 1.8135 - accuracy: 0.9432 - val_loss: 1.8164 - val_accuracy: 0.9395
Epoch 7/100
76260/76260 - 3s - loss: 1.8097 - accuracy: 0.9470 - val_loss: 1.8188 - val_accuracy: 0.9372
Epoch 8/100
76260/76260 - 3s - loss: 1.8058 - accuracy: 0.9505 - val_loss: 1.8179 - val_accuracy: 0.9380
Epoch 9/100
76260/76260 - 3s - loss: 1.8029 - accuracy: 0.9533 - val_loss: 1.8118 - val_accuracy: 0.9448
Epoch 

In [19]:
embedded.save('between_embedded.hdf5')

In [20]:
z_pred = embedded.predict(X_test)
evaluate_model(z_pred, z_test_oe)

Classification Report
              precision    recall  f1-score   support

           2       0.96      0.96      0.96       701
           9       0.99      0.98      0.98       688
           @       0.95      0.98      0.97       635
           A       0.91      0.98      0.95       645
           E       0.92      0.95      0.93       697
          E:       0.93      0.93      0.93       655
           O       0.95      0.94      0.94       729
           U       0.93      0.93      0.93       687
           a       0.98      0.93      0.95       689
           e       0.95      0.93      0.94       644
           i       0.98      0.94      0.96       684
           o       0.95      0.89      0.92       634
           u       0.91      0.95      0.93       659
           y       0.93      0.97      0.95       591

    accuracy                           0.95      9338
   macro avg       0.95      0.95      0.95      9338
weighted avg       0.95      0.95      0.95      9338



### Embedded input model

In [21]:
# embedded = tf.keras.models.load_model('pre_embedded.hdf5')

embedded_layers = tf.keras.Sequential()
for layer in embedded.layers[:-2]:
    layer.trainable = False
    embedded_layers.add(layer)
embedded_layers.summary()

N_OUTPUTS = 17

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)
pConv1D = partial(Conv1D, padding = 'same', activation = 'linear', kernel_initializer = 'he_uniform')

def init_senet_w_emb(feature_layer=1, cnn_unit=128, first_kernel=13, res_unit=128, res_kernel=7,
    bilstm = 1, bilstm_unit=256, 
    dropout_rate=0.3,
    reduction_ratio = 2,
    activation_fn='relu'):

    def cnn_block(input_x, cnn_unit, kernel_size):
        x = pConv1D(cnn_unit, kernel_size=kernel_size)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        return x
    
    def residual_block(input_x):
        x = pConv1D(res_unit, kernel_size=res_kernel)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        x = pConv1D(res_unit, kernel_size=3)(x)
        return x

    def se_block(input_x):
        x = layers.GlobalAveragePooling1D()(input_x)
        channel_shape = getattr(x, '_shape_val')[-1]
        x = Reshape((1, channel_shape))(x)
        x = Dense(channel_shape // reduction_ratio, activation=activation_fn, kernel_initializer='he_uniform')(x)
        x = Dense(channel_shape, activation='tanh', kernel_initializer='he_uniform')(x)
        x = layers.Multiply()([x, input_x])
        return x

    def se_res_block(input_x):
        se_x = se_block(input_x)
        re_x = residual_block(input_x)
        x = keras.layers.Add()([re_x, se_x])
        x = BatchNormalization()(x)
        output = Activation(activation_fn)(x)
        return x

    def senet_nn(input_shape_1,input_shape_2):

        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        x = cnn_block(input_x, cnn_unit, first_kernel)
        for i in range(feature_layer):
            x = se_res_block(x)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(feature_layer):
            x = se_res_block(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
#         x = cnn_block(x, cnn_unit=cnn_unit, kernel_size=1)
        # Embedded
        embedded = embedded_layers(input_x)
        x = layers.Concatenate()([x, embedded])
#         x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(bilstm-1):
            x = Bidirectional(pLSTM(bilstm_unit))(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = Bidirectional(pLSTM(bilstm_unit, return_sequences=False))(x)
        x = Dropout(rate=dropout_rate)(x)
        outputs = Dense(N_OUTPUTS, activation='linear', kernel_initializer='he_uniform')(x)
        model = keras.Model(inputs=input_x, outputs=outputs)
        return model

    return senet_nn

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 24, 128)           86016     
Total params: 86,016
Trainable params: 0
Non-trainable params: 86,016
_________________________________________________________________


In [22]:
emb_model = init_senet_w_emb()(24, 39)
emb_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 24, 39)]     0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 24, 128)      65024       input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 24, 128)      512         conv1d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 24, 128)      0           batch_normalization[0][0]        
____________________________________________________________________________________________

### Compile Model 

In [23]:
emb_model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

### Train Model 

In [24]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 100

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [25]:
history = emb_model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

Train on 76260 samples, validate on 9340 samples
Epoch 1/100
76260/76260 - 26s - loss: 0.0586 - rmse: 0.2198 - R2: 0.7608 - val_loss: 0.0281 - val_rmse: 0.1553 - val_R2: 0.8862
Epoch 2/100
76260/76260 - 16s - loss: 0.0322 - rmse: 0.1682 - R2: 0.8704 - val_loss: 0.0268 - val_rmse: 0.1491 - val_R2: 0.8911
Epoch 3/100
76260/76260 - 16s - loss: 0.0292 - rmse: 0.1599 - R2: 0.8825 - val_loss: 0.0255 - val_rmse: 0.1450 - val_R2: 0.8962
Epoch 4/100
76260/76260 - 16s - loss: 0.0277 - rmse: 0.1557 - R2: 0.8883 - val_loss: 0.0252 - val_rmse: 0.1431 - val_R2: 0.8971
Epoch 5/100
76260/76260 - 16s - loss: 0.0269 - rmse: 0.1534 - R2: 0.8914 - val_loss: 0.0251 - val_rmse: 0.1427 - val_R2: 0.8978
Epoch 6/100
76260/76260 - 16s - loss: 0.0257 - rmse: 0.1501 - R2: 0.8962 - val_loss: 0.0250 - val_rmse: 0.1416 - val_R2: 0.8981
Epoch 7/100
76260/76260 - 16s - loss: 0.0251 - rmse: 0.1481 - R2: 0.8987 - val_loss: 0.0247 - val_rmse: 0.1406 - val_R2: 0.8987
Epoch 8/100
76260/76260 - 16s - loss: 0.0242 - rmse: 0.

In [26]:
emb_model.save('model_with_between_embedded.hdf5')

## Evaluated 

In [27]:
emb_model.evaluate(X_test,y_test,verbose=0)

[0.023792340254826398, 0.13660981, 0.8965779]

In [28]:
y_pred = emb_model.predict(X_test)

In [29]:
compute_rmse(y_test,y_pred, axis=0)

array([0.15502277, 0.13169323, 0.12194111, 0.13957697, 0.15122224,
       0.19693145, 0.21309529, 0.09659721, 0.10947966, 0.15003618,
       0.16279915, 0.13803637, 0.14098625, 0.19930146, 0.15732932,
       0.14854874, 0.16087504])