### Imports

In [1]:
# For Saving/Loading Files
from utils.storage import load_data,load_tokenizers

# -------------- Modelling Packages --------------
# For modeling
from keras.models import Model, Sequential
from keras.layers import Embedding, GRU, Bidirectional
from keras.layers import Input, Reshape, SpatialDropout1D, Dense, Flatten
from keras.layers import Concatenate
from keras import optimizers

# Callback Functions
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

# For Timestamping Models
import time

# For balancing class weights
from sklearn.utils import class_weight

# -------------- General Packages --------------
# Data Manipulation
import numpy as np

# Saving/Loading
import os

Using TensorFlow backend.


### Directories

In [2]:
MODEL_DIR = './models/'
SPLIT_DATA_DIR = './split_data/'
LOG_DIR = 'logs'
TOKENIZER_DIR = './tokenizers/'

### Loading the Dataset + Tokenizers

In [3]:
EN_X_train,EN_X_test,ZH_X_train,ZH_X_test,y_train,y_test = load_data(SPLIT_DATA_DIR)
t_EN,t_ZH = load_tokenizers(TOKENIZER_DIR)

### Creating the Models

In [4]:
os.makedirs(os.path.dirname(MODEL_DIR), exist_ok=True)

# -------------- Tokenizer Values --------------
EN_SENTENCE_SIZE = int(EN_X_train.shape[1]/2)
ZH_SENTENCE_SIZE = int(ZH_X_train.shape[1]/2)
en_vocab_size = len(t_EN.word_index) + 1
zh_vocab_size = len(t_ZH.word_index) + 1
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)

# -------------- TUNABLE HYPERPARAMETERS --------------
EMBED_SIZES = [ 50 ]
GRU_SIZES = [ 100 ]
GRU_LAYER_SIZES = [ 1 ]

# DROPOUT_SIZES = [(0.5,0)]
# DROPOUT_SIZES = [(0,0),(0,0.5),(0.5,0),(0.5,0.5),
#                  (0.25,0.25),(0.25,0.5),(0.5,0.25),(0.25,0),
#                  (0,0.25),(0.75,0.75),(0.75,0.5),(0.5,0.75),
#                  (0.75,0),(0,0.75),(0.75,0.25),(0.25,0.75)]

DROPOUT_SIZES = [(0,0.75),(0.75,0.25),(0.25,0.75)]

loss = 'sparse_categorical_crossentropy'
optimizer = optimizers.adam()
#optimizer = optimizers.sgd(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
metrics = ['accuracy']

epochs = 200
batch_size = 1024
PATIENCE = 3

In [None]:
for EMBED_SIZE in EMBED_SIZES:
    for DROPOUT_SIZE in DROPOUT_SIZES:
        for GRU_SIZE in GRU_SIZES:
            for GRU_LAYERS in GRU_LAYER_SIZES:
                # -------------- MODEL NAMING --------------
                NAME = 'BiGRU-{}E-{}x{}G-{}Dropout-{}.hdf5'.format(EMBED_SIZE,
                                                                         GRU_LAYERS,GRU_SIZE,
                                                                         DROPOUT_SIZE,
                                                                         time.time())
                print('Creating {}'.format(NAME))
                MODEL_LOG_DIR = os.path.join(LOG_DIR,NAME)

                # -------------- Callbacks --------------
                # access tensorboard from the command line: tensorboard --logdir=logs/
                tensorboard = TensorBoard(log_dir=MODEL_LOG_DIR) 
                checkpointer = ModelCheckpoint(MODEL_DIR+NAME, 
                                               monitor='val_accuracy', 
                                               verbose=1, 
                                               save_best_only=True, 
                                               mode='auto')
                earlystop = EarlyStopping(monitor='val_loss', patience=PATIENCE)

                callbacks=[tensorboard,checkpointer,earlystop]

                # -------------- EN MODEL CREATION --------------
                EN_INPUT = Input(shape=(EN_SENTENCE_SIZE*2,))
                EN_MODEL = Reshape((-1,2,EN_SENTENCE_SIZE))(EN_INPUT)
                EN_MODEL = Embedding(en_vocab_size,
                                EMBED_SIZE,
                                input_shape=(2,EN_SENTENCE_SIZE),
                                trainable=True)(EN_MODEL)
                EN_MODEL = Reshape((2,EN_SENTENCE_SIZE*EMBED_SIZE,))(EN_MODEL)
                
                if DROPOUT_SIZE[0] > 0: EN_MODEL = SpatialDropout1D(DROPOUT_SIZE[0])(EN_MODEL)
                for layer in range(GRU_LAYERS-1):
                    EN_MODEL = Bidirectional(GRU(GRU_SIZE,
                                                  return_sequences=True, 
                                                  recurrent_dropout=DROPOUT_SIZE[1]))(EN_MODEL)
                EN_MODEL = Bidirectional(GRU(GRU_SIZE,
                                              return_sequences=True,
                                              recurrent_dropout=DROPOUT_SIZE[1]))(EN_MODEL)
    
                # -------------- ZH MODEL CREATION --------------
                ZH_INPUT = Input(shape=(ZH_SENTENCE_SIZE*2,))
                ZH_MODEL = Reshape((-1,2,ZH_SENTENCE_SIZE))(ZH_INPUT)
                ZH_MODEL = Embedding(zh_vocab_size,
                                EMBED_SIZE,
                                input_shape=(2,ZH_SENTENCE_SIZE),
                                trainable=True)(ZH_MODEL)
                ZH_MODEL = Reshape((2,ZH_SENTENCE_SIZE*EMBED_SIZE,))(ZH_MODEL)
                
                if DROPOUT_SIZE[0] > 0: ZH_MODEL = SpatialDropout1D(DROPOUT_SIZE[0])(ZH_MODEL)
                for layer in range(GRU_LAYERS-1):
                    ZH_MODEL = Bidirectional(GRU(GRU_SIZE,
                                                  return_sequences=True, 
                                                  recurrent_dropout=DROPOUT_SIZE[1]))(ZH_MODEL)
                ZH_MODEL = Bidirectional(GRU(GRU_SIZE,
                                              return_sequences=True,
                                              recurrent_dropout=DROPOUT_SIZE[1]))(ZH_MODEL)
                    
                # -------------- MERGE MODEL --------------
                merged = Concatenate(1)([EN_MODEL,ZH_MODEL])
                merged = GRU(GRU_SIZE,
                              return_sequences=True,
                              recurrent_dropout=DROPOUT_SIZE[1])(merged)
                merged = Flatten()(merged)
                merged = Dense(3, activation='softmax')(merged)

                model = Model(inputs=[EN_INPUT,ZH_INPUT], outputs=merged)
                model.compile(optimizer=optimizer, loss=loss,metrics=metrics)

                # -------------- Training the model --------------
                model.fit([EN_X_train,ZH_X_train], y_train,
                          epochs=epochs,
                          batch_size=batch_size,
                          validation_data=([EN_X_test,ZH_X_test], y_test),
                          callbacks=callbacks,
                          class_weight=class_weights)

Creating BiGRU-50E-1x100G-(0, 0)Dropout-1574828593.9030213.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200
  2048/192223 [..............................] - ETA: 15:30 - loss: 1.0758 - accuracy: 0.5879



  3072/192223 [..............................] - ETA: 11:16 - loss: 1.0540 - accuracy: 0.6230




Epoch 00001: val_accuracy improved from -inf to 0.84259, saving model to ./models/BiGRU-50E-1x100G-(0, 0)Dropout-1574828593.9030213.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.84259 to 0.85258, saving model to ./models/BiGRU-50E-1x100G-(0, 0)Dropout-1574828593.9030213.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.85258 to 0.85682, saving model to ./models/BiGRU-50E-1x100G-(0, 0)Dropout-1574828593.9030213.hdf5
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.85682
Epoch 5/200

Epoch 00005: val_accuracy did not improve from 0.85682
Creating BiGRU-50E-1x100G-(0, 0.5)Dropout-1574828846.1844137.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.85348, saving model to ./models/BiGRU-50E-1x100G-(0, 0.5)Dropout-1574828846.1844137.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.85348 to 0.85978, saving model to ./models/BiGRU-50E-1x100G-(0, 0.5)Dropout-1574828846.1844137.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.85978
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.85978
Creating BiGRU-50E-1x100G-(0.5, 0)Dropout-1574829039.4294608.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.84833, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0)Dropout-1574829039.4294608.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.84833 to 0.85861, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0)Dropout-1574829039.4294608.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.85861
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.85861
Epoch 5/200

Epoch 00005: val_accuracy improved from 0.85861 to 0.86058, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0)Dropout-1574829039.4294608.hdf5
Creating BiGRU-50E-1x100G-(0.5, 0.5)Dropout-1574829292.4205437.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.84722, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.5)Dropout-1574829292.4205437.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.84722 to 0.85858, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.5)Dropout-1574829292.4205437.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.85858 to 0.85919, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.5)Dropout-1574829292.4205437.hdf5
Epoch 4/200

Epoch 00004: val_accuracy improved from 0.85919 to 0.85924, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.5)Dropout-1574829292.4205437.hdf5
Epoch 5/200

Epoch 00005: val_accuracy did not improve from 0.85924
Creating BiGRU-50E-1x100G-(0.25, 0.25)Dropout-1574829549.4543834.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.85339, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0.25)Dropout-1574829549.4543834.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.85339 to 0.86046, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0.25)Dropout-1574829549.4543834.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.86046
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.86046
Creating BiGRU-50E-1x100G-(0.25, 0.5)Dropout-1574829763.9738278.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.85432, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0.5)Dropout-1574829763.9738278.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.85432 to 0.86166, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0.5)Dropout-1574829763.9738278.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.86166
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.86166
Creating BiGRU-50E-1x100G-(0.5, 0.25)Dropout-1574829980.343144.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.84941, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.25)Dropout-1574829980.343144.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.84941 to 0.85735, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.25)Dropout-1574829980.343144.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.85735 to 0.86173, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.25)Dropout-1574829980.343144.hdf5
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.86173
Epoch 5/200

Epoch 00005: val_accuracy did not improve from 0.86173
Creating BiGRU-50E-1x100G-(0.25, 0)Dropout-1574830251.4143076.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.85326, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0)Dropout-1574830251.4143076.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.85326 to 0.85982, saving model to ./models/BiGRU-50E-1x100G-(0.25, 0)Dropout-1574830251.4143076.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.85982
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.85982
Creating BiGRU-50E-1x100G-(0, 0.25)Dropout-1574830473.0903265.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.85657, saving model to ./models/BiGRU-50E-1x100G-(0, 0.25)Dropout-1574830473.0903265.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.85657 to 0.86295, saving model to ./models/BiGRU-50E-1x100G-(0, 0.25)Dropout-1574830473.0903265.hdf5
Epoch 3/200

Epoch 00003: val_accuracy did not improve from 0.86295
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.86295
Creating BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200
  2048/192223 [..............................] - ETA: 9:06 - loss: 1.0299 - accuracy: 0.4849 




Epoch 00001: val_accuracy improved from -inf to 0.83284, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.83284 to 0.84950, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.84950 to 0.85498, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 4/200

Epoch 00004: val_accuracy improved from 0.85498 to 0.85776, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 5/200

Epoch 00005: val_accuracy improved from 0.85776 to 0.86021, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 6/200

Epoch 00006: val_accuracy improved from 0.86021 to 0.86188, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.75)Dropout-1574830691.7271037.hdf5
Epoch 7/200

Epoch 00007: val_accuracy did not improve from 0

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.83703, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.5)Dropout-1574831070.01593.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.83703 to 0.84908, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.5)Dropout-1574831070.01593.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.84908 to 0.85638, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.5)Dropout-1574831070.01593.hdf5
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.85638
Epoch 5/200

Epoch 00005: val_accuracy improved from 0.85638 to 0.86194, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.5)Dropout-1574831070.01593.hdf5
Epoch 6/200

Epoch 00006: val_accuracy did not improve from 0.86194
Epoch 7/200

Epoch 00007: val_accuracy did not improve from 0.86194
Epoch 8/200

Epoch 00008: val_accuracy improved from 0.86194 to 0.86233, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0.

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200
  2048/192223 [..............................] - ETA: 38:26 - loss: 1.0094 - accuracy: 0.5171  



  3072/192223 [..............................] - ETA: 32:37 - loss: 0.9436 - accuracy: 0.5684



  4096/192223 [..............................] - ETA: 24:31 - loss: 0.9044 - accuracy: 0.5930




Epoch 00001: val_accuracy improved from -inf to 0.84637, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.75)Dropout-1574831514.7778163.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.84637 to 0.85766, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.75)Dropout-1574831514.7778163.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.85766 to 0.86138, saving model to ./models/BiGRU-50E-1x100G-(0.5, 0.75)Dropout-1574831514.7778163.hdf5
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.86138
Epoch 5/200

Epoch 00005: val_accuracy did not improve from 0.86138
Creating BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
Epoch 1/200
  3072/192223 [..............................] - ETA: 6:41 - loss: 0.9676 - accuracy: 0.5830



  4096/192223 [..............................] - ETA: 5:10 - loss: 0.9242 - accuracy: 0.6133




Epoch 00001: val_accuracy improved from -inf to 0.83508, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5
Epoch 2/200

Epoch 00002: val_accuracy improved from 0.83508 to 0.85103, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.85103 to 0.85665, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5
Epoch 4/200

Epoch 00004: val_accuracy improved from 0.85665 to 0.85729, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5
Epoch 5/200

Epoch 00005: val_accuracy improved from 0.85729 to 0.86239, saving model to ./models/BiGRU-50E-1x100G-(0.75, 0)Dropout-1574831895.9012818.hdf5
Epoch 6/200

Epoch 00006: val_accuracy did not improve from 0.86239
Epoch 7/200

Epoch 00007: val_accuracy did not improve from 0.86239
Creating BiGRU-50E-1x100G-(0, 0.75)Dropout-1574832340.728988.hdf5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 192223 samples, validate on 64087 samples
