keras-team · mattdangerw · Jul 10, 2023 · May 17, 2023 · Jul 7, 2023 · Jul 7, 2023
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
@@ -102,7 +102,7 @@ jobs:
       env:
         KERAS_BACKEND: ${{ matrix.backend }}
       run: |
-        pytest --run_large keras_nlp/layers/modeling keras_nlp/samplers keras_nlp/tokenizers keras_nlp/metrics
+        pytest keras_nlp/
   format:
     name: Check the code format
     runs-on: ubuntu-latest

diff --git a/keras_nlp/conftest.py b/keras_nlp/conftest.py
@@ -122,7 +122,12 @@ def pytest_collection_modifyitems(config, items):
             item.add_marker(skip_tf_only)
 
 
+# Disable traceback filtering for quicker debugging of tests failures.
+tf.debugging.disable_traceback_filtering()
 if backend_config.multi_backend():
     keras.config.disable_traceback_filtering()
 
-tf.debugging.disable_traceback_filtering()
+# One off setup for dtensor tests.
+if not backend_config.multi_backend():
+    keras.backend.experimental.enable_tf_random_generator()
+    keras.utils.set_random_seed(1337)
diff --git a/keras_nlp/metrics/rouge_l.py b/keras_nlp/metrics/rouge_l.py
@@ -102,14 +102,13 @@ class RougeL(RougeBase):
 
     3. Pass the metric to `model.compile()`.
     >>> inputs = keras.Input(shape=(), dtype='string')
-    >>> outputs = tf.strings.lower(inputs)
+    >>> outputs = keras.layers.Identity()(inputs)
     >>> model = keras.Model(inputs, outputs)
     >>> model.compile(metrics=[keras_nlp.metrics.RougeL()])
-    >>> x = tf.constant(["HELLO THIS IS FUN"])
+    >>> y_pred = x = tf.constant(["hello this is fun"])
     >>> y = tf.constant(["hello this is awesome"])
-    >>> metric_dict = model.evaluate(x, y, return_dict=True)
-    >>> metric_dict["f1_score"]
-     0.75
+    >>> model.compute_metrics(x, y, y_pred, sample_weight=None)["f1_score"]
+    0.75
     """
 
     def __init__(

diff --git a/keras_nlp/metrics/rouge_n.py b/keras_nlp/metrics/rouge_n.py
@@ -121,13 +121,12 @@ class RougeN(RougeBase):
 
     3. Pass the metric to `model.compile()`.
     >>> inputs = keras.Input(shape=(), dtype='string')
-    >>> outputs = tf.strings.lower(inputs)
+    >>> outputs = keras.layers.Identity()(inputs)
     >>> model = keras.Model(inputs, outputs)
     >>> model.compile(metrics=[keras_nlp.metrics.RougeN()])
-    >>> x = tf.constant(["HELLO THIS IS FUN"])
+    >>> y_pred = x = tf.constant(["hello this is fun"])
     >>> y = tf.constant(["hello this is awesome"])
-    >>> metric_dict = model.evaluate(x, y, return_dict=True)
-    >>> metric_dict["f1_score"]
+    >>> model.compute_metrics(x, y, y_pred, sample_weight=None)["f1_score"]
     0.6666666865348816
     """
 

diff --git a/keras_nlp/models/albert/albert_backbone_test.py b/keras_nlp/models/albert/albert_backbone_test.py
@@ -19,6 +19,7 @@
 import tensorflow as tf
 
 from keras_nlp.backend import keras
+from keras_nlp.backend import ops
 from keras_nlp.models.albert.albert_backbone import AlbertBackbone
 from keras_nlp.tests.test_case import TestCase
 
@@ -38,9 +39,9 @@ def setUp(self):
         )
         self.batch_size = 8
         self.input_batch = {
-            "token_ids": tf.ones((2, 5), dtype="int32"),
-            "segment_ids": tf.ones((2, 5), dtype="int32"),
-            "padding_mask": tf.ones((2, 5), dtype="int32"),
+            "token_ids": ops.ones((2, 5), dtype="int32"),
+            "segment_ids": ops.ones((2, 5), dtype="int32"),
+            "padding_mask": ops.ones((2, 5), dtype="int32"),
         }
 
         self.input_dataset = tf.data.Dataset.from_tensor_slices(
@@ -57,9 +58,9 @@ def test_name(self):
     def test_variable_sequence_length_call_albert(self):
         for seq_length in (2, 3, 4):
             input_data = {
-                "token_ids": tf.ones((2, seq_length), dtype="int32"),
-                "segment_ids": tf.ones((2, seq_length), dtype="int32"),
-                "padding_mask": tf.ones((2, seq_length), dtype="int32"),
+                "token_ids": ops.ones((2, seq_length), dtype="int32"),
+                "segment_ids": ops.ones((2, seq_length), dtype="int32"),
+                "padding_mask": ops.ones((2, seq_length), dtype="int32"),
             }
             self.backbone(input_data)
 
@@ -121,9 +122,9 @@ def setUp(self):
             )
 
         self.input_batch = {
-            "token_ids": tf.ones((8, 128), dtype="int32"),
-            "segment_ids": tf.ones((8, 128), dtype="int32"),
-            "padding_mask": tf.ones((8, 128), dtype="int32"),
+            "token_ids": ops.ones((8, 128), dtype="int32"),
+            "segment_ids": ops.ones((8, 128), dtype="int32"),
+            "padding_mask": ops.ones((8, 128), dtype="int32"),
         }
         self.input_dataset = tf.data.Dataset.from_tensor_slices(
             self.input_batch

diff --git a/keras_nlp/models/albert/albert_classifier.py b/keras_nlp/models/albert/albert_classifier.py
@@ -22,7 +22,6 @@
 from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
 from keras_nlp.models.albert.albert_presets import backbone_presets
 from keras_nlp.models.task import Task
-from keras_nlp.utils.keras_utils import is_xla_compatible
 from keras_nlp.utils.python_utils import classproperty
 
 
@@ -192,7 +191,7 @@ def __init__(
             ),
             optimizer=keras.optimizers.Adam(5e-5),
             metrics=[keras.metrics.SparseCategoricalAccuracy()],
-            jit_compile=is_xla_compatible(self),
+            jit_compile=True,
         )
 
     def get_config(self):

diff --git a/keras_nlp/models/albert/albert_classifier_test.py b/keras_nlp/models/albert/albert_classifier_test.py
@@ -21,6 +21,7 @@
 import tensorflow as tf
 
 from keras_nlp.backend import keras
+from keras_nlp.backend import ops
 from keras_nlp.models.albert.albert_backbone import AlbertBackbone
 from keras_nlp.models.albert.albert_classifier import AlbertClassifier
 from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
@@ -77,15 +78,13 @@ def setUp(self):
             activation=keras.activations.softmax,
         )
 
-        self.raw_batch = tf.constant(
-            [
-                "the quick brown fox.",
-                "the slow brown fox.",
-            ]
-        )
+        self.raw_batch = [
+            "the quick brown fox.",
+            "the slow brown fox.",
+        ]
         self.preprocessed_batch = self.preprocessor(self.raw_batch)
         self.raw_dataset = tf.data.Dataset.from_tensor_slices(
-            (self.raw_batch, tf.ones((2,)))
+            (self.raw_batch, ops.ones((2,)))
         ).batch(2)
         self.preprocessed_dataset = self.raw_dataset.map(self.preprocessor)
 
@@ -99,7 +98,7 @@ def test_classifier_predict(self):
         # Assert predictions match.
         self.assertAllClose(preds1, preds2)
         # Assert valid softmax output.
-        self.assertAllClose(tf.reduce_sum(preds2, axis=-1), [1.0, 1.0])
+        self.assertAllClose(ops.sum(preds2, axis=-1), [1.0, 1.0])
 
     def test_classifier_fit(self):
         self.classifier.fit(self.raw_dataset)

diff --git a/keras_nlp/models/albert/albert_masked_lm.py b/keras_nlp/models/albert/albert_masked_lm.py
@@ -26,7 +26,6 @@
 )
 from keras_nlp.models.albert.albert_presets import backbone_presets
 from keras_nlp.models.task import Task
-from keras_nlp.utils.keras_utils import is_xla_compatible
 from keras_nlp.utils.python_utils import classproperty
 
 
@@ -135,7 +134,7 @@ def __init__(self, backbone, preprocessor=None, **kwargs):
             loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             optimizer=keras.optimizers.Adam(5e-5),
             weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
-            jit_compile=is_xla_compatible(self),
+            jit_compile=True,
         )
 
     @classproperty

diff --git a/keras_nlp/models/albert/albert_masked_lm_preprocessor_test.py b/keras_nlp/models/albert/albert_masked_lm_preprocessor_test.py
@@ -152,6 +152,7 @@ def test_serialization(self):
         )
 
     @pytest.mark.large
+    @pytest.mark.tf_only
     def test_saved_model(self):
         input_data = tf.constant(["the quick brown fox"])
 

diff --git a/keras_nlp/models/albert/albert_masked_lm_test.py b/keras_nlp/models/albert/albert_masked_lm_test.py
@@ -85,14 +85,12 @@ def setUp(self):
             preprocessor=None,
         )
 
-        self.raw_batch = tf.constant(
-            [
-                "quick brown fox",
-                "eagle flew over fox",
-                "the eagle flew quick",
-                "a brown eagle",
-            ]
-        )
+        self.raw_batch = [
+            "quick brown fox",
+            "eagle flew over fox",
+            "the eagle flew quick",
+            "a brown eagle",
+        ]
         self.preprocessed_batch = self.preprocessor(self.raw_batch)[0]
         self.raw_dataset = tf.data.Dataset.from_tensor_slices(
             self.raw_batch

diff --git a/keras_nlp/models/albert/albert_preprocessor_test.py b/keras_nlp/models/albert/albert_preprocessor_test.py
@@ -166,6 +166,7 @@ def test_serialization(self):
         )
 
     @pytest.mark.large
+    @pytest.mark.tf_only
     def test_saved_model(self):
         input_data = tf.constant(["the quick brown fox"])
         inputs = keras.Input(dtype="string", shape=())

diff --git a/keras_nlp/models/albert/albert_presets_test.py b/keras_nlp/models/albert/albert_presets_test.py
@@ -14,9 +14,9 @@
 """Tests for loading pretrained model presets."""
 
 import pytest
-import tensorflow as tf
 from absl.testing import parameterized
 
+from keras_nlp.backend import ops
 from keras_nlp.models.albert.albert_backbone import AlbertBackbone
 from keras_nlp.models.albert.albert_classifier import AlbertClassifier
 from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
@@ -53,7 +53,7 @@ def test_preprocessor_output(self):
         ("load_weights", True), ("no_load_weights", False)
     )
     def test_classifier_output(self, load_weights):
-        input_data = tf.constant(["The quick brown fox."])
+        input_data = ["The quick brown fox."]
         model = AlbertClassifier.from_preset(
             "albert_base_en_uncased",
             num_classes=2,
@@ -67,9 +67,9 @@ def test_classifier_output(self, load_weights):
     )
     def test_classifier_output_without_preprocessing(self, load_weights):
         input_data = {
-            "token_ids": tf.constant([[101, 1996, 4248, 102]]),
-            "segment_ids": tf.constant([[0, 0, 0, 0]]),
-            "padding_mask": tf.constant([[1, 1, 1, 1]]),
+            "token_ids": ops.array([[101, 1996, 4248, 102]]),
+            "segment_ids": ops.array([[0, 0, 0, 0]]),
+            "padding_mask": ops.array([[1, 1, 1, 1]]),
         }
         model = AlbertClassifier.from_preset(
             "albert_base_en_uncased",
@@ -85,9 +85,9 @@ def test_classifier_output_without_preprocessing(self, load_weights):
     )
     def test_backbone_output(self, load_weights):
         input_data = {
-            "token_ids": tf.constant([[2, 13, 1, 3]]),
-            "segment_ids": tf.constant([[0, 0, 0, 0]]),
-            "padding_mask": tf.constant([[1, 1, 1, 1]]),
+            "token_ids": ops.array([[2, 13, 1, 3]]),
+            "segment_ids": ops.array([[0, 0, 0, 0]]),
+            "padding_mask": ops.array([[1, 1, 1, 1]]),
         }
         model = AlbertBackbone.from_preset(
             "albert_base_en_uncased", load_weights=load_weights
@@ -139,13 +139,11 @@ def test_load_albert(self, load_weights):
                 preset, load_weights=load_weights
             )
             input_data = {
-                "token_ids": tf.random.uniform(
+                "token_ids": ops.random.uniform(
                     shape=(1, 512), dtype="int64", maxval=model.vocabulary_size
                 ),
-                "segment_ids": tf.constant(
-                    [0] * 200 + [1] * 312, shape=(1, 512)
-                ),
-                "padding_mask": tf.constant([1] * 512, shape=(1, 512)),
+                "segment_ids": ops.array([0] * 200 + [1] * 312, shape=(1, 512)),
+                "padding_mask": ops.array([1] * 512, shape=(1, 512)),
             }
             model(input_data)
 
@@ -159,7 +157,7 @@ def test_load_albert_classifier(self, load_weights):
                 num_classes=2,
                 load_weights=load_weights,
             )
-            input_data = tf.constant(["This quick brown fox"])
+            input_data = ["This quick brown fox."]
             classifier.predict(input_data)
 
     @parameterized.named_parameters(
@@ -174,15 +172,13 @@ def test_load_albert_classifier_without_preprocessing(self, load_weights):
                 load_weights=load_weights,
             )
             input_data = {
-                "token_ids": tf.random.uniform(
+                "token_ids": ops.random.uniform(
                     shape=(1, 512),
                     dtype="int64",
                     maxval=classifier.backbone.vocabulary_size,
                 ),
-                "segment_ids": tf.constant(
-                    [0] * 200 + [1] * 312, shape=(1, 512)
-                ),
-                "padding_mask": tf.constant([1] * 512, shape=(1, 512)),
+                "segment_ids": ops.array([0] * 200 + [1] * 312, shape=(1, 512)),
+                "padding_mask": ops.array([1] * 512, shape=(1, 512)),
             }
             classifier.predict(input_data)
 

diff --git a/keras_nlp/models/albert/albert_tokenizer_test.py b/keras_nlp/models/albert/albert_tokenizer_test.py
@@ -56,14 +56,14 @@ def test_tokenize(self):
         self.assertAllEqual(output, [5, 10, 6, 8])
 
     def test_tokenize_batch(self):
-        input_data = tf.constant(["the quick brown fox", "the earth is round"])
+        input_data = ["the quick brown fox", "the earth is round"]
         output = self.tokenizer(input_data)
         self.assertAllEqual(output, [[5, 10, 6, 8], [5, 7, 9, 11]])
 
     def test_detokenize(self):
-        input_data = tf.constant([[5, 10, 6, 8]])
+        input_data = [[5, 10, 6, 8]]
         output = self.tokenizer.detokenize(input_data)
-        self.assertEqual(output, tf.constant(["the quick brown fox"]))
+        self.assertEqual(output, ["the quick brown fox"])
 
     def test_vocabulary_size(self):
         tokenizer = AlbertTokenizer(proto=self.proto)
@@ -91,6 +91,7 @@ def test_serialization(self):
         )
 
     @pytest.mark.large
+    @pytest.mark.tf_only
     def test_saved_model(self):
         input_data = tf.constant(["the quick brown fox"])