# Speech + Phonetic AAI Model

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import matplotlib.pyplot as plt
import os
from os.path import join
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from time import time
from datetime import datetime
import shutil 
import argparse
from functools import partial
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Reshape, GRU, InputLayer, AlphaDropout, Activation, BatchNormalization, Dropout, Flatten, Dense, Bidirectional, LSTM, Conv1D, SpatialDropout1D, Concatenate

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder

## Metric Function

In [2]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def R2(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred), axis = 0) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true, axis = 0)), axis = 0) 
    return K.mean(1 - (SS_res/SS_tot), axis=0)

## Utility Functions

In [3]:
#Function for preprocessing data
def delete_params(params):
    '''
    This function remove JX, WC, TRX, TRY, and MS1,2,3 paramter
    '''
    DEL_PARAMS_LIST = [2,8,15,16,21,22,23]
    return np.delete(params,DEL_PARAMS_LIST , axis=1)

In [4]:
def compute_rmse(actual,pred, axis=1):
    # Compute RMSE by row (axis=1) result in rmse of each data
    # Compute RMSE by column (axis=0) result in rmse of each label
    return np.sqrt((np.square(actual - pred)).mean(axis=axis))

In [5]:
def evaluate_model(y_pred, label):
    y_pred = enc.inverse_transform(y_pred)
    label = enc.inverse_transform(label)
    print('Confusion Matrix')
    print(confusion_matrix(label, y_pred))
    print('Classification Report')
    print(classification_report(label, y_pred))

## Load Data 

In [6]:
def prep_data():

    # load data from preprocess pipeline
    dataset = np.load(join('../data/d_dataset_p1/prep_data_13','training_subsets.npz'))
    X_train =dataset['X_train']
    y_train= dataset['y_train']
    z_train= dataset['z_train']
    X_val = dataset['X_val']
    y_val = dataset['y_val']
    z_val = dataset['z_val']
    X_test = dataset['X_test']
    y_test = dataset['y_test']
    z_test = dataset['z_test']
    
    y_train = delete_params(y_train)
    y_val = delete_params(y_val)
    y_test = delete_params(y_test)

    print('Train features and labels %s %s'%(str(X_train.shape),str(y_train.shape)))
    print('Validating features and labels %s %s'%(str(X_val.shape),str(y_val.shape)))
    print('Test features and labels %s %s'%(str(X_test.shape),str(y_test.shape)))

    return X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test

In [7]:
X_train, X_val, X_test, y_train, y_val, y_test, z_train, z_val, z_test = prep_data()

Train features and labels (98746, 24, 39) (98746, 17)
Validating features and labels (12094, 24, 39) (12094, 17)
Test features and labels (12092, 24, 39) (12092, 17)


In [8]:
print(set(z_test))

{'&', 'M', 'Q', 'u', 'A', 'o', 'U', 'E:', 'E', 'O', '2', 'V', 'e', 'i', 'y', '9', '7', '@', 'a'}


In [9]:
print('Train features and labels {} {} {}'.format(str(X_train.shape),str(y_train.shape),str(z_train.shape)))
print('Validating features and labels {} {} {}'.format(str(X_val.shape),str(y_val.shape),str(z_val.shape)))
print('Test features and labels {} {} {}'.format(str(X_test.shape),str(y_test.shape),str(z_test.shape)))

Train features and labels (98746, 24, 39) (98746, 17) (98746,)
Validating features and labels (12094, 24, 39) (12094, 17) (12094,)
Test features and labels (12092, 24, 39) (12092, 17) (12092,)


### One-hot label 

In [10]:
z_train_oe=z_train.reshape(-1,1)
z_val_oe=z_val.reshape(-1,1)
z_test_oe=z_test.reshape(-1,1)

In [11]:
enc = OneHotEncoder(handle_unknown = 'ignore')
enc.fit(z_train_oe)

z_train_oe = enc.transform(z_train_oe).toarray()
z_test_oe = enc.transform(z_test_oe).toarray()
z_val_oe = enc.transform(z_val_oe).toarray()

In [12]:
z_train_oe.shape

(98746, 19)

In [13]:
z_val_oe.shape

(12094, 19)

In [14]:
z_train_oe[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0.])

## Defined Model 

In [12]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

def init_bilstm(unit=128, bi_layer_num=3, drop_rate=0.2):

    def bilstm(input_shape_1,input_shape_2):
        
        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        # feature extraction layers
        x = input_x
        for i in range(bi_layer_num-1):
            x = Bidirectional(pLSTM(unit))(x)
            if drop_rate: 
                x = SpatialDropout1D(rate=drop_rate)(x)
        # output layers
        x = Bidirectional(pLSTM(unit*2, return_sequences=False))(x)
        if drop_rate: 
            x = Dropout(rate=drop_rate)(x)
        x = pDense(1024, activation='relu')(x)
        outputs_param = pDense(N_OUTPUTS, activation='linear')(x)
        
        model = keras.Model(inputs=input_x, outputs=outputs_param)
        # model.summary()
        return model
    return bilstm

In [13]:
model = init_bilstm()(24, 39)

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24, 39)]          0         
_________________________________________________________________
bidirectional (Bidirectional (None, 24, 256)           172032    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 24, 256)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 24, 256)           394240    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 24, 256)           0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 512)               1050624   
_________________________________________________________________
dropout (Dropout)            (None, 512)               0     

## Compile Model 

In [16]:
model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

## Train Model 

In [17]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 30

In [18]:
early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [19]:
history = model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

Train on 98746 samples, validate on 12094 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 00021: early stopping


In [20]:
model.save('model_baseline.hdf5')

## Evaluation 

In [21]:
model.evaluate(X_test,y_test,verbose=0)

[0.032602244376239105, 0.14959863, 0.8500039]

In [22]:
y_pred = model.predict(X_test)

In [24]:
err = compute_rmse(y_test,y_pred, axis=0)

In [25]:
print(err)

[0.15738225 0.13672984 0.14865203 0.19487172 0.16036555 0.21226606
 0.25415587 0.10173931 0.1306819  0.24195167 0.13813107 0.1652471
 0.17270515 0.26993943 0.18364394 0.15410301 0.15102673]


## Defined Model + Phonetic

In [26]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

def init_bilstm_pho(unit=128, bi_layer_num=3, drop_rate=0.2):

    def bilstm(input_shape_1,input_shape_2):
        
        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        # feature extraction layers
        x = input_x
        z = input_x
        for i in range(bi_layer_num-1):
            x = Bidirectional(pLSTM(unit))(x)
            if drop_rate: 
                x = SpatialDropout1D(rate=drop_rate)(x)
        # output layers
        x = Bidirectional(pLSTM(unit*2, return_sequences=False))(x)
        if drop_rate: 
            x = Dropout(rate=drop_rate)(x)
                
        for i in range(bi_layer_num-2):
            z = Bidirectional(pLSTM(unit))(z)
            if drop_rate: 
                z = SpatialDropout1D(rate=drop_rate)(z)
        z = Bidirectional(pLSTM(unit*2, return_sequences=False))(z)
        if drop_rate: 
            z = Dropout(rate=drop_rate)(z)
        z = pDense(100, activation='relu')(z)
        outputs_pho = pDense(19, activation='softmax', name='pho')(x)
        
        x = Concatenate()([x,z])
        x = pDense(512, activation='relu')(x)
        outputs_param = pDense(N_OUTPUTS, activation='linear', name='param')(x)
        
        model = keras.Model(inputs=input_x, outputs=[outputs_param, outputs_pho])
        # model.summary()
        return model
    return bilstm

In [27]:
model_pho = init_bilstm_pho()(24, 39)

In [28]:
model_pho.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 24, 39)]     0                                            
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 24, 256)      172032      input_2[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_2 (SpatialDro (None, 24, 256)      0           bidirectional_3[0][0]            
__________________________________________________________________________________________________
bidirectional_6 (Bidirectional) (None, 24, 256)      172032      input_2[0][0]                    
____________________________________________________________________________________________

In [29]:
losses = {
    "param": "mse",
    "pho": tf.keras.losses.CategoricalCrossentropy(from_logits=True),
}
lossWeights = {"param": 1.0, "pho": 1.0}

In [30]:
metrics = {
    "param": [rmse, R2],
    "pho": "accuracy",
}

## Compile Model 

In [31]:
model_pho.compile(optimizer='adam',loss=losses, loss_weights=lossWeights, metrics=metrics)

## Train Model 

In [32]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 30

In [33]:
early = callbacks.EarlyStopping(monitor='param_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [34]:
history = model_pho.fit(X_train,{"param": y_train, "pho": z_train_oe},
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    validation_data=(X_val,{"param": y_val, "pho": z_val_oe}),
    callbacks=callback_list)

Train on 98746 samples, validate on 12094 samples
Epoch 1/30
98746/98746 - 54s - loss: 2.3965 - param_loss: 0.0713 - pho_loss: 2.3251 - param_rmse: 0.2419 - param_R2: 0.6899 - pho_accuracy: 0.7161 - val_loss: 2.2395 - val_param_loss: 0.0432 - val_pho_loss: 2.1963 - val_param_rmse: 0.1845 - val_param_R2: 0.8155 - val_pho_accuracy: 0.8358
Epoch 2/30
98746/98746 - 32s - loss: 2.2864 - param_loss: 0.0470 - pho_loss: 2.2394 - param_rmse: 0.1938 - param_R2: 0.7974 - pho_accuracy: 0.7928 - val_loss: 2.2200 - val_param_loss: 0.0391 - val_pho_loss: 2.1806 - val_param_rmse: 0.1743 - val_param_R2: 0.8325 - val_pho_accuracy: 0.8508
Epoch 3/30
98746/98746 - 32s - loss: 2.2617 - param_loss: 0.0411 - pho_loss: 2.2205 - param_rmse: 0.1814 - param_R2: 0.8224 - pho_accuracy: 0.8113 - val_loss: 2.2225 - val_param_loss: 0.0378 - val_pho_loss: 2.1845 - val_param_rmse: 0.1686 - val_param_R2: 0.8387 - val_pho_accuracy: 0.8463
Epoch 4/30
98746/98746 - 32s - loss: 2.2534 - param_loss: 0.0379 - pho_loss: 2.2155

In [35]:
model_pho.save('model_baseline_pho.hdf5')

## Evaluation

In [39]:
y_pred, y_pho_pred = model_pho.predict(X_test)

In [40]:
err = compute_rmse(y_test,y_pred, axis=0)
print(err)

[0.16499175 0.13846306 0.15626041 0.20971853 0.16883098 0.22409996
 0.26204728 0.10337965 0.13968868 0.25097486 0.14635381 0.16778201
 0.18071192 0.27521861 0.18504289 0.15457323 0.15504013]


In [45]:
evaluate_model(y_pho_pred, z_test_oe)

Confusion Matrix
[[608   0   1  15   0   0   0   0   0   6  13   1   5  37   0   0   0   0
    0]
 [  0 668   0   1   0   0   0   0   2   0   0   1   0   0   0   0   0   2
   13]
 [  6   1 365   4   0   3   0   0   5  80   8  14  67  11   0   0   2   0
    0]
 [ 14   0   0 622   0   0   2   0   6   0   5   6   4   0   0   0   0   0
    0]
 [  1   0   0  12 595   0  31   1   0   0   0   0   0   0   0   0   0   0
    0]
 [  0   0   0   0   0 550   0   0   0   0  13   0  19  20   0   0   0   0
    0]
 [  0   0   0   1  12   0 614  36   0   0   1   0   0   1   0   0   0   0
    0]
 [  0   0   0   0   0   1  17 600   0   0   0   0   0   0   7   0   0   0
    0]
 [  0   1  18   4   0   0   0   0 598   0   0  29   0   0   0   0   0   0
    0]
 [  5   0  18   2   0   1   0   0   3 562   7  11  47   1   0   0   8   0
    0]
 [ 16   0   1   3   0  44   2   0   0   1 546   0  10  10   0   0   0   0
    0]
 [  5   1  16   0   0   0   0   0  54   5   0 537   0   0   0   0   5  24
    0]
 [  1   1  

## Embedded Model 

In [15]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

def init_embeded_pho(unit=128, drop_rate=0.2):
    
    def embeded(input_shape_1,input_shape_2):
        inputs = keras.Input(shape=(input_shape_1,input_shape_2))
        x = pLSTM(100)(inputs)
        x = Flatten()(x)
        outputs = pDense(19, activation='softmax')(x)
        return keras.Model(inputs=inputs, outputs=outputs)
    return embeded

In [16]:
embedded = init_embeded_pho()(24, 39)
embedded.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24, 39)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 24, 100)           56000     
_________________________________________________________________
flatten (Flatten)            (None, 2400)              0         
_________________________________________________________________
dense (Dense)                (None, 19)                45619     
Total params: 101,619
Trainable params: 101,619
Non-trainable params: 0
_________________________________________________________________


## Compile Model 

In [17]:
embedded.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

## Train Model 

In [18]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 40

In [19]:
early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [20]:
history = embedded.fit(X_train,z_train_oe,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,z_val_oe),
    callbacks=callback_list)

Train on 98746 samples, validate on 12094 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 00029: early stopping


In [21]:
embedded.save('embedded.hdf5')

In [22]:
y_pred = embedded.predict(X_test)
evaluate_model(y_pred, z_test_oe)

Confusion Matrix
[[618   1   1  13   0   0   1   0   0   3  25   0   8  16   0   0   0   0
    0]
 [  0 662   0   1   0   2   0   0   1   0   0   1   0   0   0   0   0   3
   17]
 [  3   1 364   1   0   4   0   0   8  60   7  13  97   3   0   0   5   0
    0]
 [ 15   1   0 610   1   0   2   0  12   0   3   0  12   3   0   0   0   0
    0]
 [  4   0   0  12 577   0  46   1   0   0   0   0   0   0   0   0   0   0
    0]
 [  0   0   0   0   0 566   0   0   0   0  21   0   8   7   0   0   0   0
    0]
 [  1   0   0  12   6   0 624  17   0   0   2   0   0   3   0   0   0   0
    0]
 [  0   0   0   0   0   3  51 567   0   0   0   0   0   0   3   0   0   0
    1]
 [  0   2   3   1   0   0   0   0 618   1   0  17   0   0   0   0   1   5
    2]
 [  3   0   9   0   0   2   0   0   2 576   9   7  47   0   0   0  10   0
    0]
 [  9   0   0   3   0  14   2   0   0   3 594   0   6   2   0   0   0   0
    0]
 [  0   1   7   3   0   0   0   0  72   7   0 535   2   0   0   0   7  13
    0]
 [  1   0  

In [23]:
embedded = tf.keras.models.load_model('embedded.hdf5')

In [24]:
model = tf.keras.Sequential()
for layer in embedded.layers[:-1]:
    model.add(layer)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 24, 100)           56000     
_________________________________________________________________
flatten (Flatten)            (None, 2400)              0         
Total params: 56,000
Trainable params: 56,000
Non-trainable params: 0
_________________________________________________________________


In [25]:
y_pred = model.predict(X_test)

In [26]:
y_pred.shape

(12092, 2400)

## Test Embedding

In [117]:
from scipy.spatial.distance import cosine

In [118]:
print([(idx, item) for idx, item in enumerate(z_test.tolist()[:30])])

[(0, 'O'), (1, '2'), (2, 'U'), (3, 'o'), (4, '@'), (5, '9'), (6, 'u'), (7, 'E'), (8, '2'), (9, '2'), (10, 'A'), (11, '@'), (12, 'y'), (13, 'i'), (14, 'E'), (15, 'Q'), (16, 'u'), (17, 'o'), (18, '7'), (19, 'E:'), (20, '9'), (21, 'E:'), (22, '&'), (23, 'V'), (24, 'E:'), (25, 'o'), (26, '7'), (27, 'a'), (28, '2'), (29, 'i')]


In [129]:
def cosine_sim(idx, idx2):
    sim = 1 - cosine(y_pred[idx], y_pred[idx2])
    if sim > 0.40:
        print('{} {}: {}'.format(z_test[idx], z_test[idx2],sim))

In [130]:
for i in range(30):
    for j in range(30):
        cosine_sim(i, j)

O O: 1.0
2 2: 1.0
2 2: 0.5821772217750549
U U: 1.0
o o: 1.0
@ @: 1.0
9 9: 1.0
9 9: 0.6471675634384155
u u: 1.0
E E: 1.0
E E: 0.6792986392974854
2 2: 0.5821772217750549
2 2: 1.0
2 2: 1.0
A A: 1.0
@ @: 1.0
y y: 1.0
i i: 1.0
i E:: 0.43088242411613464
i i: 0.4158954620361328
E E: 0.6792986392974854
E E: 1.0
Q Q: 1.0
u u: 1.0
o o: 1.0
7 7: 1.0
7 7: 0.4666718542575836
E: E:: 1.0
E: E:: 0.4112861156463623
E: E:: 0.5355876088142395
9 9: 0.6471675634384155
9 9: 1.0
E: E:: 0.4112861156463623
E: E:: 1.0
E: E:: 0.4070873558521271
& &: 1.0
V V: 1.0
E: i: 0.43088242411613464
E: E:: 0.5355876088142395
E: E:: 0.4070873558521271
E: E:: 1.0
o o: 1.0
7 7: 0.4666718542575836
7 7: 1.0
a a: 1.0
2 2: 1.0
i i: 0.4158954620361328
i i: 1.0


# Model with Embedding 

## Baseline 

In [27]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

def init_bilstm(unit=128, bi_layer_num=3, drop_rate=0.2):

    def bilstm(input_shape_1,input_shape_2):
        
        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        # feature extraction layers
        x = input_x
        for i in range(bi_layer_num-1):
            x = Bidirectional(pLSTM(unit))(x)
            if drop_rate: 
                x = SpatialDropout1D(rate=drop_rate)(x)
        # output layers
        x = Bidirectional(pLSTM(unit, return_sequences=False))(x)
        if drop_rate: 
            x = Dropout(rate=drop_rate)(x)
        outputs_param = pDense(N_OUTPUTS, activation='linear')(x)
        
        model = keras.Model(inputs=input_x, outputs=outputs_param)
        # model.summary()
        return model
    return bilstm

In [28]:
model = init_bilstm()(24, 39)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 24, 39)]          0         
_________________________________________________________________
bidirectional (Bidirectional (None, 24, 256)           172032    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 24, 256)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 24, 256)           394240    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 24, 256)           0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 256)               394240    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0   

## Compile Model 

In [29]:
model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

## Train Model 

In [30]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 30

In [31]:
early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [32]:
history = model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

Train on 98746 samples, validate on 12094 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [33]:
model.save('model_baseline_2.hdf5')

## Evaluation 

In [34]:
model.evaluate(X_test,y_test,verbose=0)

[0.03239709647693383, 0.1489216, 0.8514758]

In [35]:
y_pred = model.predict(X_test)

In [36]:
err = compute_rmse(y_test,y_pred, axis=0)

In [37]:
print(err)

[0.1568435  0.1333071  0.14723637 0.20025707 0.16252713 0.21058316
 0.25390446 0.09769458 0.13031462 0.24590375 0.13846071 0.16655805
 0.16988616 0.26658766 0.17800331 0.15121139 0.15160024]


## Embedded input model

In [43]:
N_OUTPUTS = 17

pLSTM = partial(LSTM,
    kernel_initializer='he_uniform',
    return_sequences=True)

pDense = partial(Dense,
    kernel_initializer='he_normal',
    activation='elu')

embedded = tf.keras.models.load_model('embedded.hdf5')

embedded_layers = tf.keras.Sequential()
for layer in embedded.layers[:-2]:
    layer.trainable = False
    embedded_layers.add(layer)
embedded_layers.summary()

def init_embeded_bilstm(unit=128, bi_layer_num=3, drop_rate=0.2):

    def bilstm(input_shape_1,input_shape_2):
        
        input_x = keras.Input(shape=(input_shape_1,input_shape_2))
        
        emb = embedded_layers(input_x)
        
        # feature extraction layers
        x = Concatenate()([input_x,emb])
        for i in range(bi_layer_num-1):
            x = Bidirectional(pLSTM(unit))(x)
            if drop_rate: 
                x = SpatialDropout1D(rate=drop_rate)(x)
        # output layers
        x = Bidirectional(pLSTM(unit, return_sequences=False))(x)
        if drop_rate: 
            x = Dropout(rate=drop_rate)(x)
        outputs_param = pDense(N_OUTPUTS, activation='linear')(x)
        
        model = keras.Model(inputs=input_x, outputs=outputs_param)
        # model.summary()
        return model
    return bilstm

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 24, 100)           56000     
Total params: 56,000
Trainable params: 0
Non-trainable params: 56,000
_________________________________________________________________


In [44]:
emb_model = init_embeded_bilstm()(24, 39)
emb_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 24, 39)]     0                                            
__________________________________________________________________________________________________
sequential_3 (Sequential)       (None, 24, 100)      56000       input_5[0][0]                    
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 24, 139)      0           input_5[0][0]                    
                                                                 sequential_3[1][0]               
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 24, 256)      274432      concatenate_1[0][0]        

## Compile Model 

In [45]:
emb_model.compile(optimizer='adam',loss='mse',metrics=[rmse, R2])

## Train Model 

In [46]:
PATIENCE = 10
BATCH_SIZE = 128
EPOCHS = 30

In [48]:
early = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=PATIENCE, verbose=1, mode='min', baseline=None, restore_best_weights=False)
callback_list = [early]

In [49]:
history = emb_model.fit(X_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val,y_val),
    callbacks=callback_list)

Train on 98746 samples, validate on 12094 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 00027: early stopping


In [50]:
emb_model.save('model_embedded_bilstm.hdf5')

## Evaluation 

In [55]:
emb_model.evaluate(X_test,y_test,verbose=0)

[0.03327701454338276, 0.14917862, 0.8477405]

In [52]:
y_pred = emb_model.predict(X_test)

In [53]:
err = compute_rmse(y_test,y_pred, axis=0)

In [54]:
print(err)

[0.15902291 0.13894631 0.14723567 0.19502921 0.16384531 0.22005402
 0.26012667 0.10071308 0.13245143 0.24339959 0.13835357 0.1690107
 0.17107858 0.27215488 0.17948705 0.15675618 0.15396349]
