diff --git a/keras_nlp/models/roberta/roberta_backbone_test.py b/keras_nlp/models/roberta/roberta_backbone_test.py index 2247f06508..203d3474e3 100644 --- a/keras_nlp/models/roberta/roberta_backbone_test.py +++ b/keras_nlp/models/roberta/roberta_backbone_test.py @@ -26,22 +26,18 @@ class RobertaBackboneTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): - self.model = RobertaBackbone( - vocabulary_size=1000, + self.backbone = RobertaBackbone( + vocabulary_size=10, num_layers=2, num_heads=2, - hidden_dim=64, - intermediate_dim=128, - max_sequence_length=128, + hidden_dim=2, + intermediate_dim=4, + max_sequence_length=5, ) self.batch_size = 8 self.input_batch = { - "token_ids": tf.ones( - (self.batch_size, self.model.max_sequence_length), dtype="int32" - ), - "padding_mask": tf.ones( - (self.batch_size, self.model.max_sequence_length), dtype="int32" - ), + "token_ids": tf.ones((2, 5), dtype="int32"), + "padding_mask": tf.ones((2, 5), dtype="int32"), } self.input_dataset = tf.data.Dataset.from_tensor_slices( @@ -49,49 +45,46 @@ def setUp(self): ).batch(2) def test_valid_call_roberta(self): - self.model(self.input_batch) + self.backbone(self.input_batch) - # Check default name passed through - self.assertRegexpMatches(self.model.name, "roberta_backbone") + def test_token_embedding(self): + output = self.backbone.token_embedding(self.input_batch["token_ids"]) + self.assertEqual(output.shape, (2, 5, 2)) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_compile(self, jit_compile): - self.model.compile(jit_compile=jit_compile) - self.model.predict(self.input_batch) + def test_name(self): + self.assertRegexpMatches(self.backbone.name, "roberta_backbone") - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_compile_batched_ds(self, jit_compile): - self.model.compile(jit_compile=jit_compile) - self.model.predict(self.input_dataset) + def test_predict(self): + self.backbone.predict(self.input_batch) + self.backbone.predict(self.input_dataset) + + def test_serialization(self): + new_backbone = keras.utils.deserialize_keras_object( + keras.utils.serialize_keras_object(self.backbone) + ) + self.assertEqual(new_backbone.get_config(), self.backbone.get_config()) def test_variable_sequence_length_call_roberta(self): - for seq_length in (25, 50, 75): + for seq_length in (2, 3, 4): input_data = { - "token_ids": tf.ones( - (self.batch_size, seq_length), dtype="int32" - ), - "padding_mask": tf.ones( - (self.batch_size, seq_length), dtype="int32" - ), + "token_ids": tf.ones((2, seq_length), dtype="int32"), + "padding_mask": tf.ones((2, seq_length), dtype="int32"), } - output = self.model(input_data) + output = self.backbone(input_data) self.assertAllEqual( tf.shape(output), - [self.batch_size, seq_length, self.model.hidden_dim], + [2, seq_length, self.backbone.hidden_dim], ) @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large # Saving is slow, so mark these large. def test_saved_model(self, save_format, filename): - model_output = self.model(self.input_batch) + model_output = self.backbone(self.input_batch) save_path = os.path.join(self.get_temp_dir(), filename) - self.model.save(save_path, save_format=save_format) + self.backbone.save(save_path, save_format=save_format) restored_model = keras.models.load_model(save_path) # Check we got the real object back. @@ -107,7 +100,7 @@ def test_saved_model(self, save_format, filename): class RobertaBackboneTPUTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): with self.tpu_strategy.scope(): - self.model = RobertaBackbone( + self.backbone = RobertaBackbone( vocabulary_size=1000, num_layers=2, num_heads=2, @@ -124,5 +117,5 @@ def setUp(self): ).batch(2) def test_predict(self): - self.model.compile() - self.model.predict(self.input_dataset) + self.backbone.compile() + self.backbone.predict(self.input_dataset) diff --git a/keras_nlp/models/roberta/roberta_classifier_test.py b/keras_nlp/models/roberta/roberta_classifier_test.py index 13701e44d6..d0fbd67364 100644 --- a/keras_nlp/models/roberta/roberta_classifier_test.py +++ b/keras_nlp/models/roberta/roberta_classifier_test.py @@ -15,6 +15,7 @@ import os +import pytest import tensorflow as tf from absl.testing import parameterized from tensorflow import keras @@ -27,14 +28,6 @@ class RobertaClassifierTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): - self.backbone = RobertaBackbone( - vocabulary_size=1000, - num_layers=2, - num_heads=2, - hidden_dim=64, - intermediate_dim=128, - max_sequence_length=128, - ) self.vocab = { "": 0, "": 1, @@ -58,77 +51,69 @@ def setUp(self): self.merges = merges self.preprocessor = RobertaPreprocessor( RobertaTokenizer(vocabulary=self.vocab, merges=self.merges), - sequence_length=8, + sequence_length=5, + ) + self.backbone = RobertaBackbone( + vocabulary_size=self.preprocessor.tokenizer.vocabulary_size(), + num_layers=2, + num_heads=2, + hidden_dim=2, + intermediate_dim=4, + max_sequence_length=self.preprocessor.packer.sequence_length, ) self.classifier = RobertaClassifier( self.backbone, 4, preprocessor=self.preprocessor, ) - self.classifier_no_preprocessing = RobertaClassifier( - self.backbone, - 4, - preprocessor=None, - ) + # Setup data. self.raw_batch = tf.constant( [ " airplane at airport", " the airplane is the best", - " the best airport", - " kohli is the best", ] ) self.preprocessed_batch = self.preprocessor(self.raw_batch) self.raw_dataset = tf.data.Dataset.from_tensor_slices( - (self.raw_batch, tf.ones((4,))) + (self.raw_batch, tf.ones((2,))) ).batch(2) self.preprocessed_dataset = self.raw_dataset.map(self.preprocessor) def test_valid_call_classifier(self): self.classifier(self.preprocessed_batch) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_classifier_predict(self, jit_compile): - self.classifier.compile(jit_compile=jit_compile) + def test_classifier_predict(self): self.classifier.predict(self.raw_batch) + self.classifier.preprocessor = None + self.classifier.predict(self.preprocessed_batch) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_classifier_predict_no_preprocessing(self, jit_compile): - self.classifier_no_preprocessing.compile(jit_compile=jit_compile) - self.classifier_no_preprocessing.predict(self.preprocessed_batch) - - def test_roberta_classifier_fit_default_compile(self): + def test_classifier_fit(self): self.classifier.fit(self.raw_dataset) + self.classifier.preprocessor = None + self.classifier.fit(self.preprocessed_dataset) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_classifier_fit(self, jit_compile): + def test_classifier_fit_no_xla(self): + self.classifier.preprocessor = None self.classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - jit_compile=jit_compile, + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), + jit_compile=False, ) - self.classifier.fit(self.raw_dataset) - - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_classifier_fit_no_preprocessing(self, jit_compile): - self.classifier_no_preprocessing.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - jit_compile=jit_compile, + self.classifier.fit(self.preprocessed_dataset) + + def test_serialization(self): + config = keras.utils.serialize_keras_object(self.classifier) + new_classifier = keras.utils.deserialize_keras_object(config) + self.assertEqual( + new_classifier.get_config(), + self.classifier.get_config(), ) - self.classifier_no_preprocessing.fit(self.preprocessed_dataset) @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large # Saving is slow, so mark these large. def test_saved_model(self, save_format, filename): model_output = self.classifier.predict(self.raw_batch) save_path = os.path.join(self.get_temp_dir(), filename) diff --git a/keras_nlp/models/roberta/roberta_masked_lm_preprocessor_test.py b/keras_nlp/models/roberta/roberta_masked_lm_preprocessor_test.py index 4795cbbe02..97719d63f5 100644 --- a/keras_nlp/models/roberta/roberta_masked_lm_preprocessor_test.py +++ b/keras_nlp/models/roberta/roberta_masked_lm_preprocessor_test.py @@ -16,6 +16,7 @@ import os +import pytest import tensorflow as tf from absl.testing import parameterized from tensorflow import keras @@ -140,10 +141,19 @@ def test_no_masking_zero_rate(self): self.assertAllEqual(y, [0, 0, 0, 0, 0]) self.assertAllEqual(sw, [0.0, 0.0, 0.0, 0.0, 0.0]) + def test_serialization(self): + config = keras.utils.serialize_keras_object(self.preprocessor) + new_preprocessor = keras.utils.deserialize_keras_object(config) + self.assertEqual( + new_preprocessor.get_config(), + self.preprocessor.get_config(), + ) + @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large # Saving is slow, so mark these large. def test_saved_model(self, save_format, filename): input_data = tf.constant([" airplane at airport"]) diff --git a/keras_nlp/models/roberta/roberta_masked_lm_test.py b/keras_nlp/models/roberta/roberta_masked_lm_test.py index a80c83086b..315a22e72d 100644 --- a/keras_nlp/models/roberta/roberta_masked_lm_test.py +++ b/keras_nlp/models/roberta/roberta_masked_lm_test.py @@ -15,6 +15,7 @@ import os +import pytest import tensorflow as tf from absl.testing import parameterized from tensorflow import keras @@ -29,14 +30,6 @@ class RobertaMaskedLMTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): - self.backbone = RobertaBackbone( - vocabulary_size=1000, - num_layers=2, - num_heads=2, - hidden_dim=64, - intermediate_dim=128, - max_sequence_length=128, - ) self.vocab = { "": 0, "": 1, @@ -60,9 +53,17 @@ def setUp(self): self.merges = merges self.preprocessor = RobertaMaskedLMPreprocessor( RobertaTokenizer(vocabulary=self.vocab, merges=self.merges), - sequence_length=8, + sequence_length=5, mask_selection_length=2, ) + self.backbone = RobertaBackbone( + vocabulary_size=self.preprocessor.tokenizer.vocabulary_size(), + num_layers=2, + num_heads=2, + hidden_dim=2, + intermediate_dim=4, + max_sequence_length=self.preprocessor.packer.sequence_length, + ) self.masked_lm = RobertaMaskedLM( self.backbone, preprocessor=self.preprocessor, @@ -76,61 +77,50 @@ def setUp(self): [ " airplane at airport", " the airplane is the best", - " the best airport", - " kohli is the best", ] ) - self.preprocessed_batch = self.preprocessor(self.raw_batch)[0] + self.preprocessed_batch = self.preprocessor(self.raw_batch) self.raw_dataset = tf.data.Dataset.from_tensor_slices( self.raw_batch ).batch(2) self.preprocessed_dataset = self.raw_dataset.map(self.preprocessor) - def test_valid_call_masked_lm(self): - self.masked_lm(self.preprocessed_batch) + def test_valid_call_classifier(self): + self.masked_lm(self.preprocessed_batch[0]) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_masked_lm_predict(self, jit_compile): - self.masked_lm.compile(jit_compile=jit_compile) + def test_classifier_predict(self): self.masked_lm.predict(self.raw_batch) + self.masked_lm.preprocessor = None + self.masked_lm.predict(self.preprocessed_batch[0]) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_masked_lm_predict_no_preprocessing(self, jit_compile): - self.masked_lm_no_preprocessing.compile(jit_compile=jit_compile) - self.masked_lm_no_preprocessing.predict(self.preprocessed_batch) - - def test_roberta_masked_lm_fit_default_compile(self): + def test_classifier_fit(self): self.masked_lm.fit(self.raw_dataset) - - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_masked_lm_fit(self, jit_compile): - self.masked_lm.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - jit_compile=jit_compile, + self.masked_lm.preprocessor = None + self.masked_lm.fit(self.preprocessed_dataset) + + def test_serialization(self): + config = keras.utils.serialize_keras_object(self.masked_lm) + new_classifier = keras.utils.deserialize_keras_object(config) + self.assertEqual( + new_classifier.get_config(), + self.masked_lm.get_config(), ) - self.masked_lm.fit(self.raw_dataset) - @parameterized.named_parameters( - ("jit_compile_false", False), ("jit_compile_true", True) - ) - def test_roberta_masked_lm_fit_no_preprocessing(self, jit_compile): - self.masked_lm_no_preprocessing.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - jit_compile=jit_compile, + def test_classifier_fit_no_xla(self): + self.masked_lm.preprocessor = None + self.masked_lm.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), + jit_compile=False, ) - self.masked_lm_no_preprocessing.fit(self.preprocessed_dataset) + self.masked_lm.fit(self.preprocessed_dataset) @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large def test_saved_model(self, save_format, filename): + model_output = self.masked_lm.predict(self.raw_batch) save_path = os.path.join(self.get_temp_dir(), filename) self.masked_lm.save(save_path, save_format=save_format) restored_model = keras.models.load_model(save_path) @@ -138,7 +128,6 @@ def test_saved_model(self, save_format, filename): # Check we got the real object back. self.assertIsInstance(restored_model, RobertaMaskedLM) - model_output = self.masked_lm(self.preprocessed_batch) - restored_output = restored_model(self.preprocessed_batch) - + # Check that output matches. + restored_output = restored_model.predict(self.raw_batch) self.assertAllClose(model_output, restored_output) diff --git a/keras_nlp/models/roberta/roberta_preprocessor_test.py b/keras_nlp/models/roberta/roberta_preprocessor_test.py index b6021b86f1..b12a344324 100644 --- a/keras_nlp/models/roberta/roberta_preprocessor_test.py +++ b/keras_nlp/models/roberta/roberta_preprocessor_test.py @@ -16,6 +16,7 @@ import os +import pytest import tensorflow as tf from absl.testing import parameterized from tensorflow import keras @@ -139,10 +140,19 @@ def test_errors_for_2d_list_input(self): with self.assertRaises(ValueError): self.preprocessor(ambiguous_input) + def test_serialization(self): + config = keras.utils.serialize_keras_object(self.preprocessor) + new_preprocessor = keras.utils.deserialize_keras_object(config) + self.assertEqual( + new_preprocessor.get_config(), + self.preprocessor.get_config(), + ) + @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large def test_saved_model(self, save_format, filename): input_data = tf.constant([" airplane at airport"]) diff --git a/keras_nlp/models/roberta/roberta_tokenizer_test.py b/keras_nlp/models/roberta/roberta_tokenizer_test.py index 5da219b946..0e73285d88 100644 --- a/keras_nlp/models/roberta/roberta_tokenizer_test.py +++ b/keras_nlp/models/roberta/roberta_tokenizer_test.py @@ -16,6 +16,7 @@ import os +import pytest import tensorflow as tf from absl.testing import parameterized from tensorflow import keras @@ -70,10 +71,19 @@ def test_errors_missing_special_tokens(self): with self.assertRaises(ValueError): RobertaTokenizer(vocabulary=["a", "b", "c"], merges=[]) + def test_serialization(self): + config = keras.utils.serialize_keras_object(self.tokenizer) + new_tokenizer = keras.utils.deserialize_keras_object(config) + self.assertEqual( + new_tokenizer.get_config(), + self.tokenizer.get_config(), + ) + @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), ) + @pytest.mark.large def test_saved_model(self, save_format, filename): input_data = tf.constant([" airplane at airport"])