Skip to content

Commit

Permalink
Add config options: GRU_RESET_AFTER and MAX_PLOT_Y
Browse files Browse the repository at this point in the history
  • Loading branch information
lvapeab committed May 3, 2020
1 parent 57978ab commit 2854b7e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
16 changes: 7 additions & 9 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def load_parameters():
OUTPUTS_TYPES_DATASET = ['text-features'] # They are equivalent, only differ on how the data is loaded.

# Evaluation params
METRICS = ['sacrebleu', 'perplexity'] # Metric used for evaluating the model.
METRICS = ['sacrebleu', 'perplexity'] # Metric used for evaluating the model.
KERAS_METRICS = ['perplexity'] # Metrics to be logged by Keras during training (in addition to the loss).
EVAL_ON_SETS = ['val'] # Possible values: 'train', 'val' and 'test' (external evaluator).
START_EVAL_ON_EPOCH = 1 # First epoch to start the model evaluation.
Expand Down Expand Up @@ -177,7 +177,6 @@ def load_parameters():
# Supported architectures: 'AttentionRNNEncoderDecoder' and 'Transformer'.

# Common hyperparameters for all models
# # # # # # # # # # # # # # # # # # # # # # # #
TRAINABLE_ENCODER = True # Whether the encoder's weights should be modified during training.
TRAINABLE_DECODER = True # Whether the decoder's weights should be modified during training.

Expand Down Expand Up @@ -210,12 +209,13 @@ def load_parameters():
# Here we should specify the activation function and the output dimension.
# (e.g DEEP_OUTPUT_LAYERS = [('tanh', 600), ('relu', 400), ('relu', 200)])
DEEP_OUTPUT_LAYERS = [('linear', TARGET_TEXT_EMBEDDING_SIZE)]
# # # # # # # # # # # # # # # # # # # # # # # #

# AttentionRNNEncoderDecoder model hyperparameters
# # # # # # # # # # # # # # # # # # # # # # # #
ENCODER_RNN_TYPE = 'LSTM' # Encoder's RNN unit type ('LSTM' and 'GRU' supported).
USE_CUDNN = False # Use CuDNN's implementation of GRU and LSTM (only for Tensorflow backend).
USE_CUDNN = False # Use CuDNN's implementation of GRU and LSTM (only for Tensorflow backend).
GRU_RESET_AFTER = True # GRU convention (whether to apply reset gate after or before matrix multiplication).
# False = "before", True = "after" (CuDNN compatible).


DECODER_RNN_TYPE = 'ConditionalLSTM' # Decoder's RNN unit type.
# ('LSTM', 'GRU', 'ConditionalLSTM' and 'ConditionalGRU' supported).
Expand All @@ -240,15 +240,12 @@ def load_parameters():
SKIP_VECTORS_HIDDEN_SIZE = TARGET_TEXT_EMBEDDING_SIZE # Hidden size.
ADDITIONAL_OUTPUT_MERGE_MODE = 'Add' # Merge mode for the skip-connections (see keras.layers.merge.py).
SKIP_VECTORS_SHARED_ACTIVATION = 'tanh' # Activation for the skip vectors.
# # # # # # # # # # # # # # # # # # # # # # # #

# Transformer model hyperparameters
# # # # # # # # # # # # # # # # # # # # # # # #
MODEL_SIZE = 32 # Transformer model size (d_{model} in de paper).
MULTIHEAD_ATTENTION_ACTIVATION = 'linear' # Activation the input projections in the Multi-Head Attention blocks.
FF_SIZE = MODEL_SIZE * 4 # Size of the feed-forward layers of the Transformer model.
N_HEADS = 8 # Number of parallel attention layers of the Transformer model.
# # # # # # # # # # # # # # # # # # # # # # # #

# Regularizers
REGULARIZATION_FN = 'L2' # Regularization function. 'L1', 'L2' and 'L1_L2' supported.
Expand Down Expand Up @@ -308,7 +305,8 @@ def load_parameters():

SAMPLING_SAVE_MODE = 'list' # 'list': Store in a text file, one sentence per line.
PLOT_EVALUATION = False # If True, the evaluation will be plotted into the model folder.

MAX_PLOT_Y = 1. if 'coco' in METRICS else 100. # Max value of axis Y in the plot.

VERBOSE = 1 # Verbosity level.
RELOAD = 0 # If 0 start training from scratch, otherwise the model.
# Saved on epoch 'RELOAD' will be used.
Expand Down
2 changes: 2 additions & 0 deletions nmt_keras/model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ def AttentionRNNEncoderDecoder(self, params):
kernel_initializer=params['INIT_FUNCTION'],
recurrent_initializer=params['INNER_INIT'],
trainable=params.get('TRAINABLE_ENCODER', True),
reset_after=params.get('GRU_RESET_AFTER', False),
return_sequences=True),
trainable=params.get('TRAINABLE_ENCODER', True),
name='bidirectional_encoder_' + params['ENCODER_RNN_TYPE'],
Expand All @@ -498,6 +499,7 @@ def AttentionRNNEncoderDecoder(self, params):
bias_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']),
kernel_initializer=params['INIT_FUNCTION'],
recurrent_initializer=params['INNER_INIT'],
reset_after=params.get('GRU_RESET_AFTER', False),
trainable=params.get('TRAINABLE_ENCODER', True),
return_sequences=True,
name='encoder_' + params['ENCODER_RNN_TYPE'])(src_embedding)
Expand Down

0 comments on commit 2854b7e

Please sign in to comment.