In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pickle

from keras.initializers import glorot_uniform as keras_glorot_uniform
from keras.initializers import orthogonal as keras_orthogonal
from keras.initializers import uniform as keras_uniform
from keras.layers import Input
from keras.layers.core import Dense
from keras.layers.core import Masking
from keras.layers.embeddings import Embedding
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import LSTM
from keras.models import Model

Using TensorFlow backend.


In [2]:

with open('data/words.pickle', 'rb') as f:
    words = pickle.load(f)
    
enc_input = np.load('data/enc_input.npy')
dec_input = np.load('data/dec_input.npy')
lbl_input = np.load('data/lbl_input.npy')
maxlen_e, maxlen_d = np.load('data/maxlen.npy')


# Split data for training and test
sep_idx = int(enc_input.shape[0] * 0.95)
enc_train, enc_test = np.vsplit(enc_input, [sep_idx])
dec_train, dec_test = np.vsplit(dec_input, [sep_idx])
lbl_train, lbl_test = np.vsplit(lbl_input, [sep_idx])


In [19]:

class Dialog:
    
    def __init__(
        self,
        maxlen_e,
        maxlen_d,
        num_input,
        num_output,
        dim_vec,
        dim_hidden_vec
    ):
        self.maxlen_e = maxlen_e
        self.maxlen_d = maxlen_d
        self.num_input = num_input
        self.num_output = num_output
        self.dim_vec = dim_vec
        self.dim_hidden_vec = dim_hidden_vec
        
    def create_model(self):        # Used in train() and prediction
        
        print('\n...Start creating models.\n')
        
        #
        # Encoder model creation
        #
        
        enc_input = Input(shape=(self.maxlen_e,), dtype='int32', name='encoder_input')
        tf_tensor = Embedding(
                              input_dim=self.num_input, 
                              output_dim=self.num_output, 
                              mask_zero=True, # ID 0 in Input data is considered as padding.
                              embeddings_initializer=keras_uniform(seed=12345)
                             )(enc_input)
        
        # axis -1 counts dim from the highest rank
        tf_tensor = BatchNormalization(axis=-1)(tf_tensor)
        tf_tensor = Masking(mask_value=0.0)(tf_tensor)
        
        enc_output, enc_hidden_state, enc_cell_state = LSTM(
                                                            units=self.dim_hidden_vec,
                                                            kernel_initializer=keras_glorot_uniform(seed=12345),
                                                            # Coefficient to the orthogonal matrix is 1.0
                                                            recurrent_initializer=keras_orthogonal(gain=1.0, seed=12345),
                                                            dropout=0.5,
                                                            recurrent_dropout=0.5,
                                                            return_state=True,
                                                           )(tf_tensor)
        
        enc_model  = Model(inputs=enc_input, outputs=[enc_output, enc_hidden_state, enc_cell_state])

        print('Encoder model created.\n')
        
        #
        # Decoder training model creation
        #
        
        dec_input = Input(shape=(self.maxlen_d,), dtype='int32', name='decoder_input')
        tf_tensor = Embedding(
                              input_dim=self.num_input,
                              output_dim=self.dim_vec,
                              mask_zero=True,
                              embeddings_initializer=keras_uniform(seed=12345)
                             )(dec_input)
        
        tf_tensor = BatchNormalization(axis=-1)(tf_tensor)
        dec_LSTM_input = Masking(mask_value=0.0)(tf_tensor)
        
        # dec_LSTM is used later again
        dec_LSTM = LSTM(
                        units=self.dim_hidden_vec,
                        kernel_initializer=keras_glorot_uniform(seed=12345),
                        recurrent_initializer=keras_orthogonal(gain=1.0, seed=12345),
                        dropout=0.5,
                        recurrent_dropout=0.5,
                        return_state=True,
                        return_sequences=True,
                       )
        tf_tensor, _, _ = dec_LSTM(dec_LSTM_input, initial_state=[enc_hidden_state, enc_cell_state])
        
        # Densely connected NN after LSTM is used later again
        dec_Dense = Dense(
                          units=self.num_output,
                          activation='softmax',
                          kernel_initializer=keras_glorot_uniform(seed=12345)
                         ) 
        dec_output = dec_Dense(tf_tensor)
        
        model = Model(inputs=[enc_input, dec_input], outputs=dec_output)
        model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['categorical_accuracy'])
        
        print('Decoder training model created.\n')
        
        #
        # Decoder model creation
        #
        
        dec_hidd_state_input = Input(shape=(self.dim_hidden_vec,), name='hidd_state_input')
        dec_cell_state_input = Input(shape=(self.dim_hidden_vec,), name='cell_state_input')
        
        
        tf_tensor, dec_hidden_state, dec_cell_state = dec_LSTM(
                                                                dec_LSTM_input, 
                                                                initial_state=[dec_hidd_state_input, dec_cell_state_input]
                                                                )
        dec_response = dec_Dense(tf_tensor)
        
        dec_model = Model(
                          inputs =[dec_input, dec_hidd_state_input, dec_cell_state_input],
                          outputs=[dec_response, dec_hidden_state, dec_cell_state]
                         )
        
        print('Decoder model created.\n')
        
        return model, enc_model, dec_model
    
        
    
    def eval_perplexity(self):     # Used in on_batch() and test
        pass
    
    def on_batch(self):            # Used in train()
        pass
    
    def train(self):
        self.create_model()
    
    def sampling_detoknize(self):
        pass
    

In [20]:

num_input = len(words)
num_output = num_input
dim_vec = 400
dim_hidden_vec = int(dim_vec * 2)

dialog = Dialog(maxlen_e, maxlen_d, num_input, num_output, dim_vec, dim_hidden_vec)

dialog.train()


...Start creating models.

Encoder model created.

Decoder training model created.

Decoder model created.

