# Imports

In [1]:
# -------------- Modelling Packages --------------
# For modeling
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Bidirectional, Concatenate, Embedding, LSTM, Dropout, Reshape
from keras.optimizers import adam

# Callback Functions
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

# For Timestamping Models
import time

# For balancing class weights
from sklearn.utils import class_weight

# -------------- General Packages --------------
# Data Manipulation
import pandas as pd
import numpy as np

# For Saving Files
import pickle
import os

Using TensorFlow backend.


# Directories

In [2]:
DIR = './'
MODEL_DIR = os.path.join(DIR+os.sep,'models'+os.sep)
SPLIT_DATA_DIR = os.path.join(DIR+os.sep,'split_data'+os.sep)
LOG_DIR = os.path.join('logs')

EN_TOKENIZER_DIR = os.path.join(DIR+os.sep,'en_tokenizer.pickle')
ZH_TOKENIZER_DIR = os.path.join(DIR+os.sep,'zh_tokenizer.pickle')

os.makedirs(os.path.dirname(MODEL_DIR), exist_ok=True)
os.makedirs(os.path.dirname(SPLIT_DATA_DIR), exist_ok=True)

# Loading The Dataset [Functions]

In [3]:
# Given the split dataset directory, return the train/test split
def load_dataset(split_data_dir):
    pickle_in = open(split_data_dir+'X_train.pickle','rb')
    X_train = pickle.load(pickle_in)
    
    pickle_in = open(split_data_dir+'X_test.pickle','rb')
    X_test = pickle.load(pickle_in)

    pickle_in = open(split_data_dir+'y_train.pickle','rb')
    y_train = pickle.load(pickle_in)

    pickle_in = open(split_data_dir+'y_test.pickle','rb')
    y_test = pickle.load(pickle_in)
    return X_train,X_test,y_train,y_test

def load_tokenizers(EN_TOKENIZER_DIR,ZH_TOKENIZER_DIR):
    pickle_in = open(EN_TOKENIZER_DIR,'rb')
    t_EN = pickle.load(pickle_in)
    pickle_in = open(ZH_TOKENIZER_DIR,'rb')
    t_ZH = pickle.load(pickle_in)
    return t_EN,t_ZH

# Loading the Dataset + Tokenizers

In [4]:
X_train,X_test,y_train,y_test = load_dataset(SPLIT_DATA_DIR)
t_EN,t_ZH = load_tokenizers(EN_TOKENIZER_DIR,ZH_TOKENIZER_DIR)

In [5]:
loss = 'sparse_categorical_crossentropy'
optimizer = 'RMSProp'
metrics = ['accuracy']

# -------------- Tokenizer Values --------------
SENTENCE_SIZE = int(X_train.shape[1]/4)
en_vocab_size = len(t_EN.word_index) + 1
zh_vocab_size = len(t_ZH.word_index) + 1
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)

# -------------- TUNABLE HYPERPARAMETERS --------------
EMBED_SIZES = [ 10, 20, 50 ]
LSTM_SIZES = [ 2, 3, 4, 8, 20, 50, 100 ]
LSTM_LAYER_SIZES = [ 1, 2, 3, 4]
DROPOUT_SIZES = [ [0,0.5,0], [0.5,0.5,0], [0,0.5,0.5], [0.5,0.5,0.5] ]
CLIPNORM = 1

loss = 'sparse_categorical_crossentropy'
optimizer = 'RMSProp'
metrics = ['accuracy']

epochs = 100
batch_size = 1024

# Creating the Models

In [6]:
for EMBED_SIZE in EMBED_SIZES:
    for DROPOUT_SIZE in DROPOUT_SIZES:
        for LSTM_SIZE in LSTM_SIZES:
            for LSTM_LAYERS in LSTM_LAYER_SIZES:
                
                # -------------- MODEL NAMING --------------
                NAME = 'EN+ZH-BiLSTM-{}E-{}x{}L-{}Dropout-{}Clip-{}.hdf5'.format(EMBED_SIZE,
                                                                                   LSTM_LAYERS,LSTM_SIZE,
                                                                                   DROPOUT_SIZE,
                                                                                   CLIPNORM,
                                                                                   time.time())
                print('Creating {}'.format(NAME))
                MODEL_LOG_DIR = os.path.join(LOG_DIR,NAME)

                # -------------- Callbacks --------------
                # access tensorboard from the command line: tensorboard --logdir logs/
                tensorboard = TensorBoard(log_dir=MODEL_LOG_DIR) 
                checkpointer = ModelCheckpoint(MODEL_DIR+NAME, 
                                               monitor='val_accuracy', 
                                               verbose=1, 
                                               save_best_only=True, 
                                               mode='auto')
                earlystop = EarlyStopping(monitor='val_loss', patience=5)

                callbacks=[tensorboard,checkpointer,earlystop]

                # EN MODEL CREATION
                EN_INPUT = Input(shape=(SENTENCE_SIZE*2,))
                EN_MODEL = Reshape((-1,2,SENTENCE_SIZE))(EN_INPUT)
                EN_MODEL = Embedding(en_vocab_size,
                                EMBED_SIZE,
                                input_shape=(2,SENTENCE_SIZE),
                                trainable=True)(EN_MODEL)
                EN_MODEL = Reshape((2,SENTENCE_SIZE*EMBED_SIZE,))(EN_MODEL)
                
                if DROPOUT_SIZE[0] > 0: EN_MODEL = Dropout(DROPOUT_SIZE[0])(EN_MODEL)
                for layer in range(LSTM_LAYERS-1):
                    EN_MODEL = Bidirectional(LSTM(LSTM_SIZE,return_sequences=True,
                                   dropout=DROPOUT_SIZE[1],recurrent_dropout=DROPOUT_SIZE[1]))(EN_MODEL)
                EN_MODEL = Bidirectional(LSTM(LSTM_SIZE,dropout=DROPOUT_SIZE[1],recurrent_dropout=DROPOUT_SIZE[1]))(EN_MODEL)
                if DROPOUT_SIZE[2] > 0: EN_MODEL = Dropout(DROPOUT_SIZE[2])(EN_MODEL)
    
                # ZH MODEL CREATION
                ZH_INPUT = Input(shape=(SENTENCE_SIZE*2,))
                ZH_MODEL = Reshape((-1,2,SENTENCE_SIZE))(ZH_INPUT)
                ZH_MODEL = Embedding(zh_vocab_size,
                                EMBED_SIZE,
                                input_shape=(2,SENTENCE_SIZE),
                                trainable=True)(ZH_MODEL)
                ZH_MODEL = Reshape((2,SENTENCE_SIZE*EMBED_SIZE,))(ZH_MODEL)
                
                if DROPOUT_SIZE[0] > 0: ZH_MODEL = Dropout(DROPOUT_SIZE[0])(ZH_MODEL)
                for layer in range(LSTM_LAYERS-1):
                    ZH_MODEL = Bidirectional(LSTM(LSTM_SIZE,return_sequences=True,
                                   dropout=DROPOUT_SIZE[1],recurrent_dropout=DROPOUT_SIZE[1]))(ZH_MODEL)
                ZH_MODEL = Bidirectional(LSTM(LSTM_SIZE,dropout=DROPOUT_SIZE[1],recurrent_dropout=DROPOUT_SIZE[1]))(ZH_MODEL)
                if DROPOUT_SIZE[2] > 0: ZH_MODEL = Dropout(DROPOUT_SIZE[2])(ZH_MODEL)
                    
                # MERGE MODEL
                merged = Concatenate(axis=1)([EN_MODEL,ZH_MODEL])
                merged = Dense(3, activation='softmax')(merged)

                model = Model(inputs=[EN_INPUT,ZH_INPUT], outputs=merged)
                model.compile(optimizer=optimizer, loss=loss,metrics=metrics)

                # Training the model
                model.fit([X_train[:,:SENTENCE_SIZE*2], 
                           X_train[:,SENTENCE_SIZE*2:]], y_train,
                          epochs=epochs,
                          batch_size=batch_size,
                          validation_data=([X_test[:,:SENTENCE_SIZE*2], X_test[:,SENTENCE_SIZE*2:]], y_test),
                          callbacks=callbacks,
                          class_weight=class_weights)

Creating DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/100
  5120/192223 [..............................] - ETA: 1:27 - loss: 1.0789 - accuracy: 0.6471




Epoch 00001: val_accuracy improved from -inf to 0.68401, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.68401 to 0.74377, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5
Epoch 3/100

Epoch 00003: val_accuracy improved from 0.74377 to 0.79573, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.79573 to 0.81071, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.81071 to 0.81808, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.693404.hdf5
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.81808 to 0.82602, saving model to ./\models\DualingualLSTM-10E-1x2L-[0, 0.5, 0]Dropout-1Clip-1573806781.6

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/100
  4096/192223 [..............................] - ETA: 4:57 - loss: 1.0920 - accuracy: 0.6196




Epoch 00001: val_accuracy improved from -inf to 0.68401, saving model to ./\models\DualingualLSTM-10E-2x2L-[0, 0.5, 0]Dropout-1Clip-1573807017.4185092.hdf5
Epoch 2/100

Epoch 00002: val_accuracy did not improve from 0.68401
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.68401
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.68401 to 0.77797, saving model to ./\models\DualingualLSTM-10E-2x2L-[0, 0.5, 0]Dropout-1Clip-1573807017.4185092.hdf5
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.77797 to 0.80110, saving model to ./\models\DualingualLSTM-10E-2x2L-[0, 0.5, 0]Dropout-1Clip-1573807017.4185092.hdf5
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.80110 to 0.80999, saving model to ./\models\DualingualLSTM-10E-2x2L-[0, 0.5, 0]Dropout-1Clip-1573807017.4185092.hdf5
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.80999 to 0.81553, saving model to ./\models\DualingualLSTM-10E-2x2L-[0, 0.5, 0]Dropout-1Clip-1573807017.4185092.hdf5
Epoch 8/100

Epoch 

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/100
  4096/192223 [..............................] - ETA: 3:15 - loss: 1.0937 - accuracy: 0.6421




Epoch 00001: val_accuracy improved from -inf to 0.68401, saving model to ./\models\DualingualLSTM-10E-3x2L-[0, 0.5, 0]Dropout-1Clip-1573807614.991443.hdf5
Epoch 2/100

Epoch 00002: val_accuracy did not improve from 0.68401
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.68401
Epoch 4/100

Epoch 00004: val_accuracy did not improve from 0.68401
Epoch 5/100

Epoch 00005: val_accuracy did not improve from 0.68401
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.68401 to 0.68992, saving model to ./\models\DualingualLSTM-10E-3x2L-[0, 0.5, 0]Dropout-1Clip-1573807614.991443.hdf5
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.68992 to 0.74614, saving model to ./\models\DualingualLSTM-10E-3x2L-[0, 0.5, 0]Dropout-1Clip-1573807614.991443.hdf5
Epoch 8/100

Epoch 00008: val_accuracy improved from 0.74614 to 0.75538, saving model to ./\models\DualingualLSTM-10E-3x2L-[0, 0.5, 0]Dropout-1Clip-1573807614.991443.hdf5
Epoch 9/100

Epoch 00009: val_accuracy improved from 0.7553


Epoch 00059: val_accuracy improved from 0.84491 to 0.84521, saving model to ./\models\DualingualLSTM-10E-3x2L-[0, 0.5, 0]Dropout-1Clip-1573807614.991443.hdf5
Epoch 60/100

Epoch 00060: val_accuracy did not improve from 0.84521
Creating DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/100
  4096/192223 [..............................] - ETA: 4:40 - loss: 1.0948 - accuracy: 0.6201




Epoch 00001: val_accuracy improved from -inf to 0.68401, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 2/100

Epoch 00002: val_accuracy did not improve from 0.68401
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.68401
Epoch 4/100

Epoch 00004: val_accuracy did not improve from 0.68401
Epoch 5/100

Epoch 00005: val_accuracy did not improve from 0.68401
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.68401
Epoch 7/100

Epoch 00007: val_accuracy did not improve from 0.68401
Epoch 8/100

Epoch 00008: val_accuracy did not improve from 0.68401
Epoch 9/100

Epoch 00009: val_accuracy improved from 0.68401 to 0.69435, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 10/100

Epoch 00010: val_accuracy improved from 0.69435 to 0.71172, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 11/100

Epoch 00

Epoch 30/100

Epoch 00030: val_accuracy improved from 0.82836 to 0.82920, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 31/100

Epoch 00031: val_accuracy improved from 0.82920 to 0.83046, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 32/100

Epoch 00032: val_accuracy improved from 0.83046 to 0.83106, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 33/100

Epoch 00033: val_accuracy improved from 0.83106 to 0.83224, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hdf5
Epoch 34/100

Epoch 00034: val_accuracy did not improve from 0.83224
Epoch 35/100

Epoch 00035: val_accuracy did not improve from 0.83224
Epoch 36/100

Epoch 00036: val_accuracy improved from 0.83224 to 0.83268, saving model to ./\models\DualingualLSTM-10E-4x2L-[0, 0.5, 0]Dropout-1Clip-1573808475.7952914.hd

KeyboardInterrupt: 

# Predict

In [None]:
np.argmax(model.predict([[X_train[1]]])[0])

In [None]:
y_train[1]