From d27d62e2f58543a9c15efcbfdb00475d39b5abdb Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Sat, 19 Mar 2022 13:41:33 +0530 Subject: [PATCH 01/16] Added Kernel and Bias Initializer to decoder --- keras_nlp/layers/transformer_decoder.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 5bea0697a0..33ccf10a38 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -16,6 +16,7 @@ import tensorflow as tf from tensorflow import keras +from tensorflow.keras import initializers from keras_nlp.layers.transformer_layer_utils import ( # isort:skip compute_causal_mask, @@ -40,6 +41,10 @@ class TransformerDecoder(keras.layers.Layer): layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. name: string, defaults to None. The name of the layer. + kernel_initializer: tf.keras.initializers initializer, defaults to None. Sets the + kernel initializer for the dense and multiheaded attention layers + bias_initializer: tf.keras.initializers initializer, defaults to None. Sets the + bias initializer for the dense and multiheaded attention layers **kwargs: other keyword arguments. Examples: @@ -75,6 +80,8 @@ def __init__( activation="relu", layer_norm_epsilon=1e-05, name=None, + kernel_initializer=None, + bias_initializer=None, **kwargs, ): super().__init__(name=name, **kwargs) @@ -84,7 +91,8 @@ def __init__( self.activation = activation self.layer_norm_epsilon = layer_norm_epsilon self._built = False - + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer def _build(self, input_shape): # Create layers based on input shape. self._built = True @@ -95,12 +103,16 @@ def _build(self, input_shape): key_dim=self._attention_head_size, value_dim=self._attention_head_size, dropout=self.dropout, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) self._encoder_decoder_attention_layer = keras.layers.MultiHeadAttention( num_heads=self.num_heads, key_dim=self._attention_head_size, value_dim=feature_size, dropout=self.dropout, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) self._decoder_attention_layernorm = keras.layers.LayerNormalization() @@ -114,11 +126,13 @@ def _build(self, input_shape): # First dense layer in the feedforward network, which maps input # feauture size to dimension `self.intermediate_dim`. self._intermediate_dense = keras.layers.Dense( - self.intermediate_dim, activation=self.activation + self.intermediate_dim, activation=self.activation, + kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, ) # Second dense layer in the feedforward network, which maps input # feature size back to the input feature size. - self._output_dense = keras.layers.Dense(feature_size) + self._output_dense = keras.layers.Dense(feature_size, + kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer) self._outputdropout = keras.layers.Dropout(rate=self.dropout) def _add_and_norm(self, input1, input2, norm_layer): @@ -221,6 +235,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, + "kernel_initializer": self.kernel_initializer, + "bias_initializer": self.bias_initializer, } ) return config From 6f4bbc87777c55218025747e1a738e054ed71367 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Sat, 19 Mar 2022 13:45:35 +0530 Subject: [PATCH 02/16] Added Initializer to Encoder and Decoder --- keras_nlp/layers/transformer_decoder.py | 3 ++- keras_nlp/layers/transformer_encoder.py | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 33ccf10a38..3bcc44c9bc 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -16,7 +16,7 @@ import tensorflow as tf from tensorflow import keras -from tensorflow.keras import initializers +from keras import initializers from keras_nlp.layers.transformer_layer_utils import ( # isort:skip compute_causal_mask, @@ -93,6 +93,7 @@ def __init__( self._built = False self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer + def _build(self, input_shape): # Create layers based on input shape. self._built = True diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 68a3de7fc2..c3e9238109 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -15,6 +15,7 @@ """Transformer encoder block implementation based on `keras.layers.Layer`.""" from tensorflow import keras +from keras import initializers from keras_nlp.layers.transformer_layer_utils import ( # isort:skip merge_padding_and_attention_mask, @@ -38,6 +39,10 @@ class TransformerEncoder(keras.layers.Layer): layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. name: string, defaults to None. The name of the layer. + kernel_initializer: tf.keras.initializers initializer, defaults to None. Sets the + kernel initializer for the dense and multiheaded attention layers + bias_initializer: tf.keras.initializers initializer, defaults to None. Sets the + bias initializer for the dense and multiheaded attention layers **kwargs: other keyword arguments. Examples: @@ -70,6 +75,8 @@ def __init__( activation="relu", layer_norm_epsilon=1e-05, name=None, + kernel_initializer=None, + bias_initializer=None, **kwargs ): super().__init__(name=name, **kwargs) @@ -79,6 +86,8 @@ def __init__( self.activation = activation self.layer_norm_epsilon = layer_norm_epsilon self._built = False + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer def _build(self, input_shape): # Create layers based on input shape. @@ -90,6 +99,8 @@ def _build(self, input_shape): key_dim=self._attention_head_size, value_dim=self._attention_head_size, dropout=self.dropout, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) self._attention_layernorm = keras.layers.LayerNormalization() @@ -98,9 +109,11 @@ def _build(self, input_shape): self._attentiondropout = keras.layers.Dropout(rate=self.dropout) self._intermediate_dense = keras.layers.Dense( - self.intermediate_dim, activation=self.activation + self.intermediate_dim, activation=self.activation, + kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, ) - self._output_dense = keras.layers.Dense(feature_size) + self._output_dense = keras.layers.Dense(feature_size, + kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer,) self._outputdropout = keras.layers.Dropout(rate=self.dropout) def _add_and_norm(self, input1, input2, norm_layer): @@ -163,6 +176,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, + "kernel_initializer": self.kernel_initializer, + "bias_initializer": self.bias_initializer, } ) return config From e407607fa215110aca430023f9a9dd581edff9c3 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Sat, 19 Mar 2022 13:54:49 +0530 Subject: [PATCH 03/16] Added Initializers to Expected Test Config --- keras_nlp/layers/transformer_decoder_test.py | 2 ++ keras_nlp/layers/transformer_encoder_test.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 1b3a0ac9a5..3a97eccd8e 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -68,6 +68,8 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, + "kernel_initializer": None, + "bias_initializer": None, } self.assertEqual(config, {**config, **expected_config_subset}) diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index b7a2a142f9..1b45d089e7 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -58,6 +58,8 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, + "kernel_initializer": None, + "bias_initializer": None, } self.assertEqual(config, {**config, **expected_config_subset}) From 7f4b06e397f9fa263acb32e3093f6f5d2b51f4d6 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Sat, 19 Mar 2022 14:18:31 +0530 Subject: [PATCH 04/16] Added Serialized Version to Config, Added New Test --- keras_nlp/layers/transformer_decoder.py | 6 +-- keras_nlp/layers/transformer_decoder_test.py | 43 ++++++++++++++++---- keras_nlp/layers/transformer_encoder.py | 4 +- keras_nlp/layers/transformer_encoder_test.py | 43 ++++++++++++++++---- 4 files changed, 75 insertions(+), 21 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 3bcc44c9bc..3ffbf74299 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -93,7 +93,7 @@ def __init__( self._built = False self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer - + def _build(self, input_shape): # Create layers based on input shape. self._built = True @@ -236,8 +236,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": self.kernel_initializer, - "bias_initializer": self.bias_initializer, + "kernel_initializer": initializers.serialize(self.kernel_initializer), + "bias_initializer": initializers.serialize(self.bias_initializer), } ) return config diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 3a97eccd8e..9c7b8a0095 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -17,7 +17,7 @@ import tensorflow as tf from tensorflow import keras - +from keras import initializers from keras_nlp.layers import transformer_decoder @@ -57,12 +57,21 @@ def test_valid_call_with_mask(self): ) def test_get_config_and_from_config(self): - decoder = transformer_decoder.TransformerDecoder( + decoder1 = transformer_decoder.TransformerDecoder( + intermediate_dim=4, + num_heads=2, + ) + decoder2 = transformer_decoder.TransformerDecoder( intermediate_dim=4, num_heads=2, + kernel_initializer=initializers.HeNormal(), + bias_initializer=initializers.Constant(value=2) ) - config = decoder.get_config() - expected_config_subset = { + + config1 = decoder1.get_config() + config2 = decoder2.get_config() + + expected_config_subset1 = { "intermediate_dim": 4, "num_heads": 2, "dropout": 0, @@ -71,13 +80,31 @@ def test_get_config_and_from_config(self): "kernel_initializer": None, "bias_initializer": None, } - self.assertEqual(config, {**config, **expected_config_subset}) + expected_config_subset2 = { + "intermediate_dim": 4, + "num_heads": 2, + "dropout": 0, + "activation": "relu", + "layer_norm_epsilon": 1e-05, + "kernel_initializer": initializers.serialize(initializers.HeNormal()), + "bias_initializer": initializers.serialize(initializers.Constant(value=2)), + } - restored_decoder = transformer_decoder.TransformerDecoder.from_config( - config, + self.assertEqual(config1, {**config1, **expected_config_subset1}) + self.assertEqual(config2, {**config2, **expected_config_subset2}) + + restored_decoder1 = transformer_decoder.TransformerDecoder.from_config( + config1, + ) + restored_decoder2 = transformer_decoder.TransformerDecoder.from_config( + config2, + ) + + self.assertEqual( + restored_decoder1.get_config(), {**config1, **expected_config_subset1} ) self.assertEqual( - restored_decoder.get_config(), {**config, **expected_config_subset} + restored_decoder2.get_config(), {**config2, **expected_config_subset2} ) def test_one_training_step_of_transformer_encoder(self): diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index c3e9238109..29891ef560 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -176,8 +176,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": self.kernel_initializer, - "bias_initializer": self.bias_initializer, + "kernel_initializer": initializers.serialize(self.kernel_initializer), + "bias_initializer": initializers.serialize(self.bias_initializer), } ) return config diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 1b45d089e7..69eda66665 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -17,7 +17,7 @@ import tensorflow as tf from tensorflow import keras - +from keras import initializers from keras_nlp.layers import transformer_encoder @@ -47,12 +47,21 @@ def test_valid_call_with_mask(self): encoder(input, mask) def test_get_config_and_from_config(self): - encoder = transformer_encoder.TransformerEncoder( + encoder1 = transformer_encoder.TransformerEncoder( + intermediate_dim=4, + num_heads=2, + ) + encoder2 = transformer_encoder.TransformerDecoder( intermediate_dim=4, num_heads=2, + kernel_initializer=initializers.HeNormal(), + bias_initializer=initializers.Constant(value=2) ) - config = encoder.get_config() - expected_config_subset = { + + config1 = encoder1.get_config() + config2 = encoder2.get_config() + + expected_config_subset1 = { "intermediate_dim": 4, "num_heads": 2, "dropout": 0, @@ -61,13 +70,31 @@ def test_get_config_and_from_config(self): "kernel_initializer": None, "bias_initializer": None, } - self.assertEqual(config, {**config, **expected_config_subset}) + expected_config_subset1 = { + "intermediate_dim": 4, + "num_heads": 2, + "dropout": 0, + "activation": "relu", + "layer_norm_epsilon": 1e-05, + "kernel_initializer": initializers.serialize(initializers.HeNormal()), + "bias_initializer": initializers.serialize(initializers.Constant(value=2)), + } + + self.assertEqual(config1, {**config1, **expected_config_subset1}) + self.assertEqual(config2, {**config2, **expected_config_subset1}) - restored_encoder = transformer_encoder.TransformerEncoder.from_config( - config, + restored_encoder1 = transformer_encoder.TransformerEncoder.from_config( + config1, + ) + restored_encoder2 = transformer_encoder.TransformerEncoder.from_config( + config2, + ) + + self.assertEqual( + restored_encoder1.get_config(), {**config1, **expected_config_subset1} ) self.assertEqual( - restored_encoder.get_config(), {**config, **expected_config_subset} + restored_encoder2.get_config(), {**config2, **expected_config_subset1} ) def test_one_training_step_of_transformer_encoder(self): From 7c5d4a7cdf7724b89208323d6c736cfa847cf9f3 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:21:50 +0530 Subject: [PATCH 05/16] Fixed Docstring for Encoder and Decoder --- keras_nlp/layers/transformer_decoder.py | 8 ++++---- keras_nlp/layers/transformer_encoder.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 3ffbf74299..bd97ca2d9c 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -41,10 +41,10 @@ class TransformerDecoder(keras.layers.Layer): layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. name: string, defaults to None. The name of the layer. - kernel_initializer: tf.keras.initializers initializer, defaults to None. Sets the - kernel initializer for the dense and multiheaded attention layers - bias_initializer: tf.keras.initializers initializer, defaults to None. Sets the - bias initializer for the dense and multiheaded attention layers + kernel_initializer: tf.keras.initializers initializer, defaults to None. + The kernel initializer for the dense and multiheaded attention layers. + bias_initializer: tf.keras.initializers initializer, defaults to None. + The bias initializer for the dense and multiheaded attention layers. **kwargs: other keyword arguments. Examples: diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 29891ef560..0587892315 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -39,10 +39,10 @@ class TransformerEncoder(keras.layers.Layer): layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. name: string, defaults to None. The name of the layer. - kernel_initializer: tf.keras.initializers initializer, defaults to None. Sets the - kernel initializer for the dense and multiheaded attention layers - bias_initializer: tf.keras.initializers initializer, defaults to None. Sets the - bias initializer for the dense and multiheaded attention layers + kernel_initializer: tf.keras.initializers initializer, defaults to None. + The kernel initializer for the dense and multiheaded attention layers. + bias_initializer: tf.keras.initializers initializer, defaults to None. + The bias initializer for the dense and multiheaded attention layers. **kwargs: other keyword arguments. Examples: From 2cac3299ffa9a69e51fce6b6414f798f07d21768 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:29:25 +0530 Subject: [PATCH 06/16] Changed initializer import to keras.initializer --- keras_nlp/layers/transformer_decoder.py | 9 ++++----- keras_nlp/layers/transformer_encoder.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index bd97ca2d9c..8c21eabbfd 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -16,7 +16,6 @@ import tensorflow as tf from tensorflow import keras -from keras import initializers from keras_nlp.layers.transformer_layer_utils import ( # isort:skip compute_causal_mask, @@ -40,11 +39,11 @@ class TransformerDecoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. - name: string, defaults to None. The name of the layer. kernel_initializer: tf.keras.initializers initializer, defaults to None. The kernel initializer for the dense and multiheaded attention layers. bias_initializer: tf.keras.initializers initializer, defaults to None. The bias initializer for the dense and multiheaded attention layers. + name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. Examples: @@ -79,9 +78,9 @@ def __init__( dropout=0, activation="relu", layer_norm_epsilon=1e-05, - name=None, kernel_initializer=None, bias_initializer=None, + name=None, **kwargs, ): super().__init__(name=name, **kwargs) @@ -236,8 +235,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": initializers.serialize(self.kernel_initializer), - "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_initializer": keras.initializers.serialize(self.kernel_initializer), + "bias_initializer": keras.initializers.serialize(self.bias_initializer), } ) return config diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 0587892315..e6d94828bb 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -15,7 +15,6 @@ """Transformer encoder block implementation based on `keras.layers.Layer`.""" from tensorflow import keras -from keras import initializers from keras_nlp.layers.transformer_layer_utils import ( # isort:skip merge_padding_and_attention_mask, @@ -38,11 +37,11 @@ class TransformerEncoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. - name: string, defaults to None. The name of the layer. kernel_initializer: tf.keras.initializers initializer, defaults to None. The kernel initializer for the dense and multiheaded attention layers. bias_initializer: tf.keras.initializers initializer, defaults to None. The bias initializer for the dense and multiheaded attention layers. + name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. Examples: @@ -74,9 +73,9 @@ def __init__( dropout=0, activation="relu", layer_norm_epsilon=1e-05, - name=None, kernel_initializer=None, bias_initializer=None, + name=None, **kwargs ): super().__init__(name=name, **kwargs) @@ -176,8 +175,8 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": initializers.serialize(self.kernel_initializer), - "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_initializer": keras.initializers.serialize(self.kernel_initializer), + "bias_initializer": keras.initializers.serialize(self.bias_initializer), } ) return config From 15881ba35de769882343be34c7acbe656ed7ee94 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:33:55 +0530 Subject: [PATCH 07/16] Removed Redudant Test From Encoder and Decoder --- keras_nlp/layers/transformer_decoder_test.py | 36 +++++--------------- keras_nlp/layers/transformer_encoder_test.py | 35 ++++--------------- 2 files changed, 15 insertions(+), 56 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 9c7b8a0095..743bfa13f3 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -57,30 +57,16 @@ def test_valid_call_with_mask(self): ) def test_get_config_and_from_config(self): - decoder1 = transformer_decoder.TransformerDecoder( - intermediate_dim=4, - num_heads=2, - ) - decoder2 = transformer_decoder.TransformerDecoder( + decoder = transformer_decoder.TransformerDecoder( intermediate_dim=4, num_heads=2, kernel_initializer=initializers.HeNormal(), bias_initializer=initializers.Constant(value=2) ) - config1 = decoder1.get_config() - config2 = decoder2.get_config() + config = decoder.get_config() - expected_config_subset1 = { - "intermediate_dim": 4, - "num_heads": 2, - "dropout": 0, - "activation": "relu", - "layer_norm_epsilon": 1e-05, - "kernel_initializer": None, - "bias_initializer": None, - } - expected_config_subset2 = { + expected_config_subset = { "intermediate_dim": 4, "num_heads": 2, "dropout": 0, @@ -90,21 +76,15 @@ def test_get_config_and_from_config(self): "bias_initializer": initializers.serialize(initializers.Constant(value=2)), } - self.assertEqual(config1, {**config1, **expected_config_subset1}) - self.assertEqual(config2, {**config2, **expected_config_subset2}) + self.assertEqual(config, {**config, **expected_config_subset}) + self.assertEqual(config, {**config, **expected_config_subset}) - restored_decoder1 = transformer_decoder.TransformerDecoder.from_config( - config1, - ) - restored_decoder2 = transformer_decoder.TransformerDecoder.from_config( - config2, + restored_decoder = transformer_decoder.TransformerDecoder.from_config( + config, ) self.assertEqual( - restored_decoder1.get_config(), {**config1, **expected_config_subset1} - ) - self.assertEqual( - restored_decoder2.get_config(), {**config2, **expected_config_subset2} + restored_decoder.get_config(), {**config, **expected_config_subset} ) def test_one_training_step_of_transformer_encoder(self): diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 69eda66665..1326893b71 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -47,30 +47,16 @@ def test_valid_call_with_mask(self): encoder(input, mask) def test_get_config_and_from_config(self): - encoder1 = transformer_encoder.TransformerEncoder( - intermediate_dim=4, - num_heads=2, - ) - encoder2 = transformer_encoder.TransformerDecoder( + encoder = transformer_encoder.TransformerDecoder( intermediate_dim=4, num_heads=2, kernel_initializer=initializers.HeNormal(), bias_initializer=initializers.Constant(value=2) ) - config1 = encoder1.get_config() - config2 = encoder2.get_config() + config = encoder.get_config() - expected_config_subset1 = { - "intermediate_dim": 4, - "num_heads": 2, - "dropout": 0, - "activation": "relu", - "layer_norm_epsilon": 1e-05, - "kernel_initializer": None, - "bias_initializer": None, - } - expected_config_subset1 = { + expected_config_subset = { "intermediate_dim": 4, "num_heads": 2, "dropout": 0, @@ -80,21 +66,14 @@ def test_get_config_and_from_config(self): "bias_initializer": initializers.serialize(initializers.Constant(value=2)), } - self.assertEqual(config1, {**config1, **expected_config_subset1}) - self.assertEqual(config2, {**config2, **expected_config_subset1}) + self.assertEqual(config, {**config, **expected_config_subset}) - restored_encoder1 = transformer_encoder.TransformerEncoder.from_config( - config1, - ) - restored_encoder2 = transformer_encoder.TransformerEncoder.from_config( - config2, + restored_encoder= transformer_encoder.TransformerEncoder.from_config( + config, ) self.assertEqual( - restored_encoder1.get_config(), {**config1, **expected_config_subset1} - ) - self.assertEqual( - restored_encoder2.get_config(), {**config2, **expected_config_subset1} + restored_encoder.get_config(), {**config, **expected_config_subset} ) def test_one_training_step_of_transformer_encoder(self): From 2cf82aa20c30177b953a66f70842254a22dd5df8 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:39:39 +0530 Subject: [PATCH 08/16] Changed Default to Glorot Uniform and Zeros --- keras_nlp/layers/transformer_decoder.py | 12 ++++++------ keras_nlp/layers/transformer_decoder_test.py | 9 ++++----- keras_nlp/layers/transformer_encoder.py | 12 ++++++------ keras_nlp/layers/transformer_encoder_test.py | 9 ++++----- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 8c21eabbfd..0e831bbd53 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -39,9 +39,9 @@ class TransformerDecoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to None. + kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to None. + bias_initializer: tf.keras.initializers initializer, defaults to "zeros". The bias initializer for the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. @@ -78,8 +78,8 @@ def __init__( dropout=0, activation="relu", layer_norm_epsilon=1e-05, - kernel_initializer=None, - bias_initializer=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", name=None, **kwargs, ): @@ -90,8 +90,8 @@ def __init__( self.activation = activation self.layer_norm_epsilon = layer_norm_epsilon self._built = False - self.kernel_initializer = kernel_initializer - self.bias_initializer = bias_initializer + self.kernel_initializer = keras.initializers.get(kernel_initializer) + self.bias_initializer = keras.initializers.get(bias_initializer) def _build(self, input_shape): # Create layers based on input shape. diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 743bfa13f3..4bb53d8794 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -17,7 +17,6 @@ import tensorflow as tf from tensorflow import keras -from keras import initializers from keras_nlp.layers import transformer_decoder @@ -60,8 +59,8 @@ def test_get_config_and_from_config(self): decoder = transformer_decoder.TransformerDecoder( intermediate_dim=4, num_heads=2, - kernel_initializer=initializers.HeNormal(), - bias_initializer=initializers.Constant(value=2) + kernel_initializer=keras.initializers.HeNormal(), + bias_initializer=keras.initializers.Constant(value=2) ) config = decoder.get_config() @@ -72,8 +71,8 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, - "kernel_initializer": initializers.serialize(initializers.HeNormal()), - "bias_initializer": initializers.serialize(initializers.Constant(value=2)), + "kernel_initializer": keras.initializers.serialize(keras.initializers.HeNormal()), + "bias_initializer": keras.initializers.serialize(keras.initializers.Constant(value=2)), } self.assertEqual(config, {**config, **expected_config_subset}) diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index e6d94828bb..21d5a13386 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -37,9 +37,9 @@ class TransformerEncoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to None. + kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to None. + bias_initializer: tf.keras.initializers initializer, defaults to "zeros". The bias initializer for the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. @@ -73,8 +73,8 @@ def __init__( dropout=0, activation="relu", layer_norm_epsilon=1e-05, - kernel_initializer=None, - bias_initializer=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", name=None, **kwargs ): @@ -85,8 +85,8 @@ def __init__( self.activation = activation self.layer_norm_epsilon = layer_norm_epsilon self._built = False - self.kernel_initializer = kernel_initializer - self.bias_initializer = bias_initializer + self.kernel_initializer = keras.initializers.get(kernel_initializer) + self.bias_initializer = keras.initializers.get(bias_initializer) def _build(self, input_shape): # Create layers based on input shape. diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 1326893b71..945f34e07c 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -17,7 +17,6 @@ import tensorflow as tf from tensorflow import keras -from keras import initializers from keras_nlp.layers import transformer_encoder @@ -50,8 +49,8 @@ def test_get_config_and_from_config(self): encoder = transformer_encoder.TransformerDecoder( intermediate_dim=4, num_heads=2, - kernel_initializer=initializers.HeNormal(), - bias_initializer=initializers.Constant(value=2) + kernel_initializer=keras.initializers.HeNormal(), + bias_initializer=keras.initializers.Constant(value=2) ) config = encoder.get_config() @@ -62,8 +61,8 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, - "kernel_initializer": initializers.serialize(initializers.HeNormal()), - "bias_initializer": initializers.serialize(initializers.Constant(value=2)), + "kernel_initializer": keras.initializers.serialize(keras.initializers.HeNormal()), + "bias_initializer": keras.initializers.serialize(keras.initializers.Constant(value=2)), } self.assertEqual(config, {**config, **expected_config_subset}) From d13ac7a6f2a35282f1cff4b981db90a77af63afd Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:42:00 +0530 Subject: [PATCH 09/16] Ensure friendly error if bad arg on layer creation --- keras_nlp/layers/transformer_decoder.py | 2 +- keras_nlp/layers/transformer_encoder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 0e831bbd53..47fb2fd13b 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -87,7 +87,7 @@ def __init__( self.intermediate_dim = intermediate_dim self.num_heads = num_heads self.dropout = dropout - self.activation = activation + self.activation = keras.activations.get(activation) self.layer_norm_epsilon = layer_norm_epsilon self._built = False self.kernel_initializer = keras.initializers.get(kernel_initializer) diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 21d5a13386..e211f28ad5 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -82,7 +82,7 @@ def __init__( self.intermediate_dim = intermediate_dim self.num_heads = num_heads self.dropout = dropout - self.activation = activation + self.activation = keras.activations.get(activation) self.layer_norm_epsilon = layer_norm_epsilon self._built = False self.kernel_initializer = keras.initializers.get(kernel_initializer) From c754ea6a94155051349cf0e17c56d79a254efa4e Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:51:26 +0530 Subject: [PATCH 10/16] Ran Black Formatter --- keras_nlp/layers/transformer_decoder.py | 21 +++++++++++++------- keras_nlp/layers/transformer_decoder_test.py | 10 +++++++--- keras_nlp/layers/transformer_encoder.py | 21 +++++++++++++------- keras_nlp/layers/transformer_encoder_test.py | 12 +++++++---- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 47fb2fd13b..e1aced9d3a 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -39,9 +39,9 @@ class TransformerDecoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". + kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to "zeros". + bias_initializer: tf.keras.initializers initializer, defaults to "zeros". The bias initializer for the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. @@ -126,13 +126,18 @@ def _build(self, input_shape): # First dense layer in the feedforward network, which maps input # feauture size to dimension `self.intermediate_dim`. self._intermediate_dense = keras.layers.Dense( - self.intermediate_dim, activation=self.activation, - kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, + self.intermediate_dim, + activation=self.activation, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) # Second dense layer in the feedforward network, which maps input # feature size back to the input feature size. - self._output_dense = keras.layers.Dense(feature_size, - kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer) + self._output_dense = keras.layers.Dense( + feature_size, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, + ) self._outputdropout = keras.layers.Dropout(rate=self.dropout) def _add_and_norm(self, input1, input2, norm_layer): @@ -235,7 +240,9 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": keras.initializers.serialize(self.kernel_initializer), + "kernel_initializer": keras.initializers.serialize( + self.kernel_initializer + ), "bias_initializer": keras.initializers.serialize(self.bias_initializer), } ) diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 4bb53d8794..f33dc2f133 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -60,7 +60,7 @@ def test_get_config_and_from_config(self): intermediate_dim=4, num_heads=2, kernel_initializer=keras.initializers.HeNormal(), - bias_initializer=keras.initializers.Constant(value=2) + bias_initializer=keras.initializers.Constant(value=2), ) config = decoder.get_config() @@ -71,8 +71,12 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, - "kernel_initializer": keras.initializers.serialize(keras.initializers.HeNormal()), - "bias_initializer": keras.initializers.serialize(keras.initializers.Constant(value=2)), + "kernel_initializer": keras.initializers.serialize( + keras.initializers.HeNormal() + ), + "bias_initializer": keras.initializers.serialize( + keras.initializers.Constant(value=2) + ), } self.assertEqual(config, {**config, **expected_config_subset}) diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index e211f28ad5..08fdb32234 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -37,9 +37,9 @@ class TransformerEncoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". + kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to "zeros". + bias_initializer: tf.keras.initializers initializer, defaults to "zeros". The bias initializer for the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. @@ -108,11 +108,16 @@ def _build(self, input_shape): self._attentiondropout = keras.layers.Dropout(rate=self.dropout) self._intermediate_dense = keras.layers.Dense( - self.intermediate_dim, activation=self.activation, - kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, + self.intermediate_dim, + activation=self.activation, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, + ) + self._output_dense = keras.layers.Dense( + feature_size, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) - self._output_dense = keras.layers.Dense(feature_size, - kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer,) self._outputdropout = keras.layers.Dropout(rate=self.dropout) def _add_and_norm(self, input1, input2, norm_layer): @@ -175,7 +180,9 @@ def get_config(self): "dropout": self.dropout, "activation": self.activation, "layer_norm_epsilon": self.layer_norm_epsilon, - "kernel_initializer": keras.initializers.serialize(self.kernel_initializer), + "kernel_initializer": keras.initializers.serialize( + self.kernel_initializer + ), "bias_initializer": keras.initializers.serialize(self.bias_initializer), } ) diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 945f34e07c..edb951a820 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -50,7 +50,7 @@ def test_get_config_and_from_config(self): intermediate_dim=4, num_heads=2, kernel_initializer=keras.initializers.HeNormal(), - bias_initializer=keras.initializers.Constant(value=2) + bias_initializer=keras.initializers.Constant(value=2), ) config = encoder.get_config() @@ -61,13 +61,17 @@ def test_get_config_and_from_config(self): "dropout": 0, "activation": "relu", "layer_norm_epsilon": 1e-05, - "kernel_initializer": keras.initializers.serialize(keras.initializers.HeNormal()), - "bias_initializer": keras.initializers.serialize(keras.initializers.Constant(value=2)), + "kernel_initializer": keras.initializers.serialize( + keras.initializers.HeNormal() + ), + "bias_initializer": keras.initializers.serialize( + keras.initializers.Constant(value=2) + ), } self.assertEqual(config, {**config, **expected_config_subset}) - restored_encoder= transformer_encoder.TransformerEncoder.from_config( + restored_encoder = transformer_encoder.TransformerEncoder.from_config( config, ) From 5b8789cf63c736740236728c89e679aee6e32796 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 16:02:21 +0530 Subject: [PATCH 11/16] Fixed Serialization Bug and Reran Black --- keras_nlp/layers/transformer_decoder.py | 2 +- keras_nlp/layers/transformer_decoder_test.py | 6 +++--- keras_nlp/layers/transformer_encoder.py | 2 +- keras_nlp/layers/transformer_encoder_test.py | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index e1aced9d3a..9a4234a174 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -238,7 +238,7 @@ def get_config(self): "intermediate_dim": self.intermediate_dim, "num_heads": self.num_heads, "dropout": self.dropout, - "activation": self.activation, + "activation": keras.activations.serialize(self.activation), "layer_norm_epsilon": self.layer_norm_epsilon, "kernel_initializer": keras.initializers.serialize( self.kernel_initializer diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index f33dc2f133..1d99b604c8 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -59,8 +59,8 @@ def test_get_config_and_from_config(self): decoder = transformer_decoder.TransformerDecoder( intermediate_dim=4, num_heads=2, - kernel_initializer=keras.initializers.HeNormal(), - bias_initializer=keras.initializers.Constant(value=2), + kernel_initializer="HeNormal", + bias_initializer="Zeros", ) config = decoder.get_config() @@ -75,7 +75,7 @@ def test_get_config_and_from_config(self): keras.initializers.HeNormal() ), "bias_initializer": keras.initializers.serialize( - keras.initializers.Constant(value=2) + keras.initializers.Zeros() ), } diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 08fdb32234..525b3d164f 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -178,7 +178,7 @@ def get_config(self): "intermediate_dim": self.intermediate_dim, "num_heads": self.num_heads, "dropout": self.dropout, - "activation": self.activation, + "activation": keras.activations.serialize(self.activation), "layer_norm_epsilon": self.layer_norm_epsilon, "kernel_initializer": keras.initializers.serialize( self.kernel_initializer diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index edb951a820..ac41565d31 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -46,11 +46,11 @@ def test_valid_call_with_mask(self): encoder(input, mask) def test_get_config_and_from_config(self): - encoder = transformer_encoder.TransformerDecoder( + encoder = transformer_encoder.TransformerEncoder( intermediate_dim=4, num_heads=2, - kernel_initializer=keras.initializers.HeNormal(), - bias_initializer=keras.initializers.Constant(value=2), + kernel_initializer="HeNormal", + bias_initializer="Zeros", ) config = encoder.get_config() @@ -65,7 +65,7 @@ def test_get_config_and_from_config(self): keras.initializers.HeNormal() ), "bias_initializer": keras.initializers.serialize( - keras.initializers.Constant(value=2) + keras.initializers.Zeros() ), } From 680811c1c435d511eebd1b47dbbdeace5f648566 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 16:25:52 +0530 Subject: [PATCH 12/16] Added Additional Tests for Testing Value Error --- keras_nlp/layers/transformer_decoder_test.py | 9 +++++++++ keras_nlp/layers/transformer_encoder_test.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index 1d99b604c8..e3594e3a82 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -90,6 +90,15 @@ def test_get_config_and_from_config(self): restored_decoder.get_config(), {**config, **expected_config_subset} ) + self.assertRaises( + ValueError, + transformer_decoder.TransformerDecoder, + intermediate_dim=4, + num_heads=2, + dropout=0.5, + kernel_initializer="Invalid", + ) + def test_one_training_step_of_transformer_encoder(self): class MyModel(keras.Model): def __init__(self): diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index ac41565d31..60dfbee0e6 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -79,6 +79,15 @@ def test_get_config_and_from_config(self): restored_encoder.get_config(), {**config, **expected_config_subset} ) + self.assertRaises( + ValueError, + transformer_encoder.TransformerEncoder, + intermediate_dim=4, + num_heads=2, + dropout=0.5, + kernel_initializer="Invalid", + ) + def test_one_training_step_of_transformer_encoder(self): encoder = transformer_encoder.TransformerEncoder( intermediate_dim=4, From e18c4ff270a21af7e6be4b85ff153c4af9dcaa02 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 17:45:41 +0530 Subject: [PATCH 13/16] Keeping Attribute Set. From Const. Arg. Together --- keras_nlp/layers/transformer_decoder.py | 2 +- keras_nlp/layers/transformer_encoder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 9a4234a174..e0c983c1d9 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -89,9 +89,9 @@ def __init__( self.dropout = dropout self.activation = keras.activations.get(activation) self.layer_norm_epsilon = layer_norm_epsilon - self._built = False self.kernel_initializer = keras.initializers.get(kernel_initializer) self.bias_initializer = keras.initializers.get(bias_initializer) + self._built = False def _build(self, input_shape): # Create layers based on input shape. diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 525b3d164f..46ab3d8b2c 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -84,9 +84,9 @@ def __init__( self.dropout = dropout self.activation = keras.activations.get(activation) self.layer_norm_epsilon = layer_norm_epsilon - self._built = False self.kernel_initializer = keras.initializers.get(kernel_initializer) self.bias_initializer = keras.initializers.get(bias_initializer) + self._built = False def _build(self, input_shape): # Create layers based on input shape. From 190404a4f27bb67fe655bdf8790d3bad0b8f5d82 Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Tue, 22 Mar 2022 23:55:39 +0530 Subject: [PATCH 14/16] New test for Value Error if Invalid Initializer --- keras_nlp/layers/transformer_decoder_test.py | 16 ++++++++-------- keras_nlp/layers/transformer_encoder_test.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index e3594e3a82..dc142a90e0 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -90,14 +90,14 @@ def test_get_config_and_from_config(self): restored_decoder.get_config(), {**config, **expected_config_subset} ) - self.assertRaises( - ValueError, - transformer_decoder.TransformerDecoder, - intermediate_dim=4, - num_heads=2, - dropout=0.5, - kernel_initializer="Invalid", - ) + def test_value_error_when_invalid_kernel_inititalizer(self): + with self.assertRaises(ValueError): + transformer_decoder.TransformerDecoder( + intermediate_dim=4, + num_heads=2, + dropout=0.5, + kernel_initializer="Invalid", + ) def test_one_training_step_of_transformer_encoder(self): class MyModel(keras.Model): diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 60dfbee0e6..dd3ae88e53 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -79,14 +79,14 @@ def test_get_config_and_from_config(self): restored_encoder.get_config(), {**config, **expected_config_subset} ) - self.assertRaises( - ValueError, - transformer_encoder.TransformerEncoder, - intermediate_dim=4, - num_heads=2, - dropout=0.5, - kernel_initializer="Invalid", - ) + def test_value_error_when_invalid_kernel_inititalizer(self): + with self.assertRaises(ValueError): + transformer_encoder.TransformerEncoder( + intermediate_dim=4, + num_heads=2, + dropout=0.5, + kernel_initializer="Invalid", + ) def test_one_training_step_of_transformer_encoder(self): encoder = transformer_encoder.TransformerEncoder( From d16effb3563d2a593b9fa8b7504fe2d3f790736e Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Wed, 23 Mar 2022 00:45:50 +0530 Subject: [PATCH 15/16] Ran format and lint --- keras_nlp/layers/transformer_decoder.py | 4 +++- keras_nlp/layers/transformer_decoder_test.py | 1 + keras_nlp/layers/transformer_encoder.py | 4 +++- keras_nlp/layers/transformer_encoder_test.py | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index e0c983c1d9..87f695218d 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -243,7 +243,9 @@ def get_config(self): "kernel_initializer": keras.initializers.serialize( self.kernel_initializer ), - "bias_initializer": keras.initializers.serialize(self.bias_initializer), + "bias_initializer": keras.initializers.serialize( + self.bias_initializer + ), } ) return config diff --git a/keras_nlp/layers/transformer_decoder_test.py b/keras_nlp/layers/transformer_decoder_test.py index dc142a90e0..6cbfd5fd5b 100644 --- a/keras_nlp/layers/transformer_decoder_test.py +++ b/keras_nlp/layers/transformer_decoder_test.py @@ -17,6 +17,7 @@ import tensorflow as tf from tensorflow import keras + from keras_nlp.layers import transformer_decoder diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 46ab3d8b2c..01e6005c1c 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -183,7 +183,9 @@ def get_config(self): "kernel_initializer": keras.initializers.serialize( self.kernel_initializer ), - "bias_initializer": keras.initializers.serialize(self.bias_initializer), + "bias_initializer": keras.initializers.serialize( + self.bias_initializer + ), } ) return config diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index dd3ae88e53..e131898949 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -17,6 +17,7 @@ import tensorflow as tf from tensorflow import keras + from keras_nlp.layers import transformer_encoder From 065d871429a8c5f8432f6a0d2c816e285c0376af Mon Sep 17 00:00:00 2001 From: Aflah <72096386+aflah02@users.noreply.github.com> Date: Thu, 24 Mar 2022 03:33:22 +0530 Subject: [PATCH 16/16] Fixed typo and also lines exceeding max length --- keras_nlp/layers/transformer_decoder.py | 12 +++++++----- keras_nlp/layers/transformer_encoder.py | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/keras_nlp/layers/transformer_decoder.py b/keras_nlp/layers/transformer_decoder.py index 87f695218d..65e02fbc76 100644 --- a/keras_nlp/layers/transformer_decoder.py +++ b/keras_nlp/layers/transformer_decoder.py @@ -39,17 +39,19 @@ class TransformerDecoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The eps value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". - The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to "zeros". - The bias initializer for the dense and multiheaded attention layers. + kernel_initializer: string or tf.keras.initializers initializer, + defaults to "glorot_uniform". The kernel initializer for + the dense and multiheaded attention layers. + bias_initializer: string or tf.keras.initializers initializer, + defaults to "zeros". The bias initializer for + the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. Examples: ```python # Create a single transformer decoder layer. - decoder = keras_nlp.layer.TransformerDecoder( + decoder = keras_nlp.layers.TransformerDecoder( intermediate_dim=64, num_heads=8) # Create a simple model containing the decoder. diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 01e6005c1c..17366429bc 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -37,10 +37,12 @@ class TransformerEncoder(keras.layers.Layer): activation function of feedforward network. layer_norm_epsilon: float, defaults to 1e-5. The epsilon value in layer normalization components. - kernel_initializer: tf.keras.initializers initializer, defaults to "glorot_uniform". - The kernel initializer for the dense and multiheaded attention layers. - bias_initializer: tf.keras.initializers initializer, defaults to "zeros". - The bias initializer for the dense and multiheaded attention layers. + kernel_initializer: string or tf.keras.initializers initializer, + defaults to "glorot_uniform". The kernel initializer for + the dense and multiheaded attention layers. + bias_initializer: string or tf.keras.initializers initializer, + defaults to "zeros". The bias initializer for + the dense and multiheaded attention layers. name: string, defaults to None. The name of the layer. **kwargs: other keyword arguments. @@ -48,7 +50,7 @@ class TransformerEncoder(keras.layers.Layer): ```python # Create a single transformer encoder layer. - encoder = keras_nlp.layer.TransformerEncoder( + encoder = keras_nlp.layers.TransformerEncoder( intermediate_dim=64, num_heads=8) # Create a simple model containing the encoder.