Add config options: GRU_RESET_AFTER and MAX_PLOT_Y

lvapeab · May 3, 2020 · 2854b7e · 2854b7e
1 parent 57978ab
commit 2854b7e
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 9 deletions.
diff --git a/config.py b/config.py
@@ -28,7 +28,7 @@ def load_parameters():
     OUTPUTS_TYPES_DATASET = ['text-features']                  # They are equivalent, only differ on how the data is loaded.
 
     # Evaluation params
-    METRICS = ['sacrebleu', 'perplexity']                       # Metric used for evaluating the model.
+    METRICS = ['sacrebleu', 'perplexity']         # Metric used for evaluating the model.
     KERAS_METRICS = ['perplexity']                # Metrics to be logged by Keras during training (in addition to the loss).
     EVAL_ON_SETS = ['val']                        # Possible values: 'train', 'val' and 'test' (external evaluator).
     START_EVAL_ON_EPOCH = 1                       # First epoch to start the model evaluation.
@@ -177,7 +177,6 @@ def load_parameters():
                                                   # Supported architectures: 'AttentionRNNEncoderDecoder' and 'Transformer'.
 
     # Common hyperparameters for all models
-    # # # # # # # # # # # # # # # # # # # # # # # #
     TRAINABLE_ENCODER = True                      # Whether the encoder's weights should be modified during training.
     TRAINABLE_DECODER = True                      # Whether the decoder's weights should be modified during training.
 
@@ -210,12 +209,13 @@ def load_parameters():
     #       Here we should specify the activation function and the output dimension.
     #       (e.g DEEP_OUTPUT_LAYERS = [('tanh', 600), ('relu', 400), ('relu', 200)])
     DEEP_OUTPUT_LAYERS = [('linear', TARGET_TEXT_EMBEDDING_SIZE)]
-    # # # # # # # # # # # # # # # # # # # # # # # #
 
     # AttentionRNNEncoderDecoder model hyperparameters
-    # # # # # # # # # # # # # # # # # # # # # # # #
     ENCODER_RNN_TYPE = 'LSTM'                     # Encoder's RNN unit type ('LSTM' and 'GRU' supported).
-    USE_CUDNN = False                              # Use CuDNN's implementation of GRU and LSTM (only for Tensorflow backend).
+    USE_CUDNN = False                             # Use CuDNN's implementation of GRU and LSTM (only for Tensorflow backend).
+    GRU_RESET_AFTER = True                        # GRU convention (whether to apply reset gate after or before matrix multiplication).
+                                                  # False = "before", True = "after" (CuDNN compatible).
+
 
     DECODER_RNN_TYPE = 'ConditionalLSTM'          # Decoder's RNN unit type.
                                                   # ('LSTM', 'GRU', 'ConditionalLSTM' and 'ConditionalGRU' supported).
@@ -240,15 +240,12 @@ def load_parameters():
     SKIP_VECTORS_HIDDEN_SIZE = TARGET_TEXT_EMBEDDING_SIZE     # Hidden size.
     ADDITIONAL_OUTPUT_MERGE_MODE = 'Add'          # Merge mode for the skip-connections (see keras.layers.merge.py).
     SKIP_VECTORS_SHARED_ACTIVATION = 'tanh'       # Activation for the skip vectors.
-    # # # # # # # # # # # # # # # # # # # # # # # #
 
     # Transformer model hyperparameters
-    # # # # # # # # # # # # # # # # # # # # # # # #
     MODEL_SIZE = 32                               # Transformer model size (d_{model} in de paper).
     MULTIHEAD_ATTENTION_ACTIVATION = 'linear'     # Activation the input projections in the Multi-Head Attention blocks.
     FF_SIZE = MODEL_SIZE * 4                      # Size of the feed-forward layers of the Transformer model.
     N_HEADS = 8                                   # Number of parallel attention layers of the Transformer model.
-    # # # # # # # # # # # # # # # # # # # # # # # #
 
     # Regularizers
     REGULARIZATION_FN = 'L2'                      # Regularization function. 'L1', 'L2' and 'L1_L2' supported.
@@ -308,7 +305,8 @@ def load_parameters():
 
     SAMPLING_SAVE_MODE = 'list'                        # 'list': Store in a text file, one sentence per line.
     PLOT_EVALUATION = False                            # If True, the evaluation will be plotted into the model folder.
-
+    MAX_PLOT_Y = 1. if 'coco' in METRICS else 100.     # Max value of axis Y in the plot.
+
     VERBOSE = 1                                        # Verbosity level.
     RELOAD = 0                                         # If 0 start training from scratch, otherwise the model.
                                                        # Saved on epoch 'RELOAD' will be used.

diff --git a/nmt_keras/model_zoo.py b/nmt_keras/model_zoo.py
@@ -487,6 +487,7 @@ def AttentionRNNEncoderDecoder(self, params):
                                                                                           kernel_initializer=params['INIT_FUNCTION'],
                                                                                           recurrent_initializer=params['INNER_INIT'],
                                                                                           trainable=params.get('TRAINABLE_ENCODER', True),
+                                                                                          reset_after=params.get('GRU_RESET_AFTER', False),
                                                                                           return_sequences=True),
                                         trainable=params.get('TRAINABLE_ENCODER', True),
                                         name='bidirectional_encoder_' + params['ENCODER_RNN_TYPE'],
@@ -498,6 +499,7 @@ def AttentionRNNEncoderDecoder(self, params):
                                                                             bias_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']),
                                                                             kernel_initializer=params['INIT_FUNCTION'],
                                                                             recurrent_initializer=params['INNER_INIT'],
+                                                                            reset_after=params.get('GRU_RESET_AFTER', False),
                                                                             trainable=params.get('TRAINABLE_ENCODER', True),
                                                                             return_sequences=True,
                                                                             name='encoder_' + params['ENCODER_RNN_TYPE'])(src_embedding)