# Experiment on AAI Model Architectures
- Created: 10 Mar 2020

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from os.path import join
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from time import time
from datetime import datetime
import shutil 
import argparse
from functools import partial
import pickle
from IPython.display import Audio

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Reshape, GRU, InputLayer, AlphaDropout, Activation, BatchNormalization, Dropout, Flatten, Dense, Bidirectional, LSTM, Conv1D, SpatialDropout1D, Concatenate, Multiply, Add

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder

In [None]:
np_load_old = partial(np.load)
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

## Metric Function

In [None]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def R2(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred), axis = 0) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true, axis = 0)), axis = 0) 
    return K.mean(1 - (SS_res/SS_tot), axis=0)

## Utility Functions

In [None]:
#Function for preprocessing data
def delete_params(params):
    '''
    This function remove JX, WC, TRX, TRY, and MS1,2,3 paramter
    '''
    DEL_PARAMS_LIST = [2,8,15,16,21,22,23]
    return np.delete(params,DEL_PARAMS_LIST , axis=1)

In [None]:
def compute_rmse(actual,pred, axis=1):
    # Compute RMSE by row (axis=1) result in rmse of each data
    # Compute RMSE by column (axis=0) result in rmse of each label
    return np.sqrt((np.square(actual - pred)).mean(axis=axis))

In [None]:
def evaluate_model(y_pred, label):
    y_pred = enc.inverse_transform(y_pred)
    label = enc.inverse_transform(label)
#     print('Confusion Matrix')
#     print(confusion_matrix(label, y_pred))
    print('Classification Report')
    print(classification_report(label, y_pred))

In [None]:
def compute_se(actual,pred):
    return np.sqrt((np.square(actual - pred)))

## Load Data 

In [None]:
def prep_data():

    # load data from preprocess pipeline
    dataset = np.load(join('../../data/d_dataset_p2/prep_data_13_2','training_subsets.npz'))
    X_train =dataset['X_train']
    y_train= dataset['y_train']
    z_train= dataset['z_train']
    X_val = dataset['X_val']
    y_val = dataset['y_val']
    z_val = dataset['z_val']
    X_test = dataset['X_test']
    y_test = dataset['y_test']
    z_test = dataset['z_test']
    a_test = dataset['audio_test']
    a_val = dataset['audio_val']
    
    y_train = delete_params(y_train)
    y_val = delete_params(y_val)
    y_test = delete_params(y_test)

    print('Train features and labels %s %s'%(str(X_train.shape),str(y_train.shape)))
    print('Validating features and labels %s %s'%(str(X_val.shape),str(y_val.shape)))
    print('Test features and labels %s %s'%(str(X_test.shape),str(y_test.shape)))

    return X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test, a_test, a_val

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test, a_test, a_val = prep_data()

In [None]:
print(set(z_test))

In [None]:
print('Train features and labels {} {} {}'.format(str(X_train.shape),str(y_train.shape),str(z_train.shape)))
print('Validating features and labels {} {} {}'.format(str(X_val.shape),str(y_val.shape),str(z_val.shape)))
print('Test features and labels {} {} {}'.format(str(X_test.shape),str(y_test.shape),str(z_test.shape)))

### One-hot label 

In [None]:
z_train_oe=z_train.reshape(-1,1)
z_val_oe=z_val.reshape(-1,1)
z_test_oe=z_test.reshape(-1,1)

enc = OneHotEncoder(handle_unknown = 'ignore')
enc.fit(z_train_oe)

z_train_oe = enc.transform(z_train_oe).toarray()
z_test_oe = enc.transform(z_test_oe).toarray()
z_val_oe = enc.transform(z_val_oe).toarray()

## 1.) Train Baseline Model
- Model without embedding feature

### Defined Model without Embedded Feature

In [None]:
N_OUTPUTS = 17

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)
pConv1D = partial(Conv1D, padding = 'same', activation = 'linear', kernel_initializer = 'he_uniform')

def init_senet(feature_layer=1, cnn_unit=64, first_kernel=11, res_unit=128, res_kernel=7,
    bilstm = 1, bilstm_unit=256, 
    dropout_rate=0.3,
    reduction_ratio = 2,
    activation_fn='relu'):


    def cnn_block(input_x, cnn_unit, kernel_size):
        x = pConv1D(cnn_unit, kernel_size=kernel_size)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        return x

    def se_block(input_x):
        x = layers.GlobalAveragePooling1D()(input_x)
        channel_shape = getattr(x, '_shape_val')[-1]
        x = Reshape((1, channel_shape))(x)
        x = Dense(channel_shape // reduction_ratio, activation=activation_fn, kernel_initializer='he_uniform')(x)
        x = Dense(channel_shape, activation='tanh', kernel_initializer='he_uniform')(x)
        return x

    def residual_block(input_x):
        x = cnn_block(input_x, res_unit,kernel_size=res_kernel)
        x = pConv1D(res_unit, kernel_size=3)(x)
        x = BatchNormalization()(x)
        return x

    def se_res_block(input_x):
        res_x = residual_block(input_x)
        se_x = se_block(res_x)
        x = layers.Multiply()([res_x, se_x])
        x = Activation(activation_fn)(x)
        # input_x = cnn_block(input_x, cnn_unit=cnn_unit, kernel_size=1)
        x = layers.Add()([x, input_x])
        return outputs

    def senet_nn(input_shape_1,input_shape_2):

        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        x = cnn_block(input_x, 128, first_kernel)
        # layers.Concatenate()([x, embeded])
        for i in range(feature_layer):
            x = se_res_block(x)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(feature_layer):
            x = se_res_block(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = cnn_block(x, cnn_unit=cnn_unit, kernel_size=1)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(bilstm-1):
            x = Bidirectional(pLSTM(bilstm_unit))(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = Bidirectional(pLSTM(bilstm_unit, return_sequences=False))(x)
        x = Dropout(rate=dropout_rate)(x)
        outputs = Dense(N_OUTPUTS, activation='linear', kernel_initializer='he_uniform')(x)
        model = keras.Model(inputs=input_x, outputs=outputs)
        return model

    return senet_nn

In [None]:
model = init_senet()(24, 39)

In [None]:
model.summary()

### Compile Model 

In [None]:
model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

### Train Model 

In [None]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 50

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [None]:
history = model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

In [None]:
model.save('model_baseline.hdf5')

### Evaluation 

In [None]:
model.evaluate(X_test,y_test,verbose=0)

In [None]:
y_pred = model.predict(X_test)

In [None]:
err = compute_rmse(y_test,y_pred, axis=0)

In [None]:
print(err)

## 2.) Trained Embedding Layer 

### Defined Embedding Model

In [None]:
PHONETIC_OUTPUT = 14
EMBEDDED_FEATURE = 100

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)

def embeded_model(input_shape_1,input_shape_2):
    inputs = keras.Input(shape=(input_shape_1,input_shape_2))
    x = pLSTM(EMBEDDED_FEATURE)(inputs)
    x = Flatten()(x)
    outputs = pDense(PHONETIC_OUTPUT, activation='softmax', kernel_initializer='he_uniform')(x)
    return keras.Model(inputs=inputs, outputs=outputs)

In [None]:
embedded = embeded_model(24, 39)
embedded.summary()

### Compile Model 

In [None]:
embedded.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

### Train Model 

In [None]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 50

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [None]:
history = embedded.fit(X_train,z_train_oe,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,z_val_oe),
    callbacks=callback_list)

In [None]:
embedded.save('embedded.hdf5')

### Evaluated Model 

In [None]:
z_pred = embedded.predict(X_test)
evaluate_model(z_pred, z_test_oe)

## 3.) Model with Embedding Feature 

### Embedded input model

In [None]:
embedded = tf.keras.models.load_model('embedded.hdf5')

embedded_layers = tf.keras.Sequential()
for layer in embedded.layers[:-2]:
    layer.trainable = False
    embedded_layers.add(layer)
embedded_layers.summary()

N_OUTPUTS = 17

pLSTM = partial(LSTM, kernel_initializer='he_uniform', return_sequences=True)
pConv1D = partial(Conv1D, padding = 'same', activation = 'linear', kernel_initializer = 'he_uniform')

def init_senet_w_emb(feature_layer=1, cnn_unit=64, first_kernel=11, res_unit=128, res_kernel=7,
    bilstm = 1, bilstm_unit=256, 
    dropout_rate=0.3,
    reduction_ratio = 2,
    activation_fn='relu'):


    def cnn_block(input_x, cnn_unit, kernel_size):
        x = pConv1D(cnn_unit, kernel_size=kernel_size)(input_x)
        x = BatchNormalization()(x)
        x = Activation(activation_fn)(x)
        return x

    def se_block(input_x):
        x = layers.GlobalAveragePooling1D()(input_x)
        channel_shape = getattr(x, '_shape_val')[-1]
        x = Reshape((1, channel_shape))(x)
        x = Dense(channel_shape // reduction_ratio, activation=activation_fn, kernel_initializer='he_uniform')(x)
        x = Dense(channel_shape, activation='tanh', kernel_initializer='he_uniform')(x)
        return x

    def residual_block(input_x):
        x = cnn_block(input_x, res_unit,kernel_size=res_kernel)
        x = pConv1D(res_unit, kernel_size=3)(x)
        x = BatchNormalization()(x)
        return x

    def se_res_block(input_x):
        res_x = residual_block(input_x)
        se_x = se_block(res_x)
        x = layers.Multiply()([res_x, se_x])
        x = Activation(activation_fn)(x)
        # input_x = cnn_block(input_x, cnn_unit=cnn_unit, kernel_size=1)
        x = layers.Add()([x, input_x])
        return outputs

    def senet_nn(input_shape_1,input_shape_2):

        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        x = cnn_block(input_x, 128, first_kernel)
        embedded = embedded_layers(input_x)
        x = layers.Concatenate()([x, embedded])
        for i in range(feature_layer):
            x = se_res_block(x)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(feature_layer):
            x = se_res_block(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = cnn_block(x, cnn_unit=cnn_unit, kernel_size=1)
        x = SpatialDropout1D(rate=dropout_rate)(x)
        for i in range(bilstm-1):
            x = Bidirectional(pLSTM(bilstm_unit))(x)
            x = SpatialDropout1D(rate=dropout_rate)(x)
        x = Bidirectional(pLSTM(bilstm_unit, return_sequences=False))(x)
        x = Dropout(rate=dropout_rate)(x)
        outputs = Dense(N_OUTPUTS, activation='linear', kernel_initializer='he_uniform')(x)
        model = keras.Model(inputs=input_x, outputs=outputs)
        return model

    return senet_nn

In [None]:
emb_model = init_senet_w_emb()(24, 39)
emb_model.summary()

### Compile Model 

In [None]:
emb_model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

### Train Model 

In [None]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 30

early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [None]:
history = emb_model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

In [None]:
emb_model.save('model_with_embedded.hdf5')

## 4.) Evaluation 

In [None]:
emb_model = tf.keras.models.load_model('model_with_embedded.hdf5', custom_objects={'rmse': rmse, 'R2':R2})

In [None]:
emb_model.evaluate(X_test,y_test,verbose=0)

In [None]:
y_pred = emb_model.predict(X_test)

### Check error by each data 

In [None]:
errs = compute_rmse(y_test,y_pred, axis=1)

In [None]:
print(err)

### Error rank by highest 

In [None]:
err_df = pd.DataFrame({'phonetic':z_test, 'err':errs})

In [None]:
err_df.head()

In [None]:
err_df.sort_values('err')

In [None]:
err_df.hist('err')

In [None]:
err_df_high = err_df[err_df['err'] > 0.15]

In [None]:
err_df_high.head()

In [None]:
err_df[err_df['err'] > 0.5].groupby('phonetic')['phonetic'].count().sort_values(ascending=False).plot.bar()

In [None]:
err_df_high.groupby('phonetic')['phonetic'].count()

## Evaluate each param of each row

In [None]:
se = compute_se(y_test,y_pred)
errs = compute_rmse(y_test,y_pred, axis=1)

In [None]:
df_se = pd.DataFrame(se, columns=["HX","HY","JA","LP","LD","VS","VO","TCX","TCY","TTX","TTY","TBX","TBY","TS1","TS2","TS3","TS4"])

In [None]:
df_se['phonetic'] = z_test
df_se['mean_err'] = errs
df_se['audio'] = a_test

In [None]:
df_se.head()

In [None]:
df_se[(df_se['phonetic'] == '7') & (df_se['mean_err'] > 0.5)]['TS4'].plot.hist()

## EDA audio of testing data 

In [None]:
df_se_sort = df_se.sort_values('mean_err',ascending=False)
df_se_sort.reset_index(drop=True, inplace=True)

In [None]:
df_se_sort[df_se_sort['phonetic'] == '&'].head(n=5)

In [None]:
Audio(df_se_sort['audio'][0], rate=16000)

In [None]:
df_se_sort[df_se_sort['phonetic'] == '&'].tail(n=5)

In [None]:
Audio(df_se_sort['audio'][12073], rate=16000)