diff --git a/keras_nlp/models/albert/albert_backbone.py b/keras_nlp/models/albert/albert_backbone.py index a65df56453..36424faf85 100644 --- a/keras_nlp/models/albert/albert_backbone.py +++ b/keras_nlp/models/albert/albert_backbone.py @@ -76,13 +76,9 @@ class AlbertBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "segment_ids": tf.constant( - [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]]), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Randomly initialized ALBERT encoder diff --git a/keras_nlp/models/albert/albert_classifier.py b/keras_nlp/models/albert/albert_classifier.py index fe2bd7c7ff..e884d05d88 100644 --- a/keras_nlp/models/albert/albert_classifier.py +++ b/keras_nlp/models/albert/albert_classifier.py @@ -85,13 +85,9 @@ class AlbertClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/albert/albert_masked_lm.py b/keras_nlp/models/albert/albert_masked_lm.py index 9843282c5a..fa04b83b09 100644 --- a/keras_nlp/models/albert/albert_masked_lm.py +++ b/keras_nlp/models/albert/albert_masked_lm.py @@ -81,14 +81,10 @@ class AlbertMaskedLM(Task): ```python # Create preprocessed batch where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)), - "segment_ids": tf.constant([[0, 0, 0, 0, 0, 0, 0, 0]] * 2, shape=(2, 8)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2), + "segment_ids": np.array([[0, 0, 0, 0, 0, 0, 0, 0]] * 2), } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/bart/bart_backbone.py b/keras_nlp/models/bart/bart_backbone.py index 3e6020421e..594f2467a0 100644 --- a/keras_nlp/models/bart/bart_backbone.py +++ b/keras_nlp/models/bart/bart_backbone.py @@ -65,13 +65,13 @@ class BartBackbone(Backbone): Examples: ```python input_data = { - "encoder_token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "encoder_padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) + "encoder_token_ids": np.ones(shape=(1, 12), dtype="int32"), + "encoder_padding_mask": np.array( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] ), - "decoder_token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "decoder_padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], shape=(1, 12) + "decoder_token_ids": np.ones(shape=(1, 12), dtype="int32"), + "decoder_padding_mask": np.array( + [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]] ), } diff --git a/keras_nlp/models/bart/bart_seq_2_seq_lm.py b/keras_nlp/models/bart/bart_seq_2_seq_lm.py index 50868c4810..17c02e8e80 100644 --- a/keras_nlp/models/bart/bart_seq_2_seq_lm.py +++ b/keras_nlp/models/bart/bart_seq_2_seq_lm.py @@ -109,12 +109,12 @@ class BartSeq2SeqLM(GenerativeTask): # "The quick brown fox", and the decoder inputs to "The fast". Use # `"padding_mask"` to indicate values that should not be overridden. prompt = { - "encoder_token_ids": tf.constant([[0, 133, 2119, 6219, 23602, 2, 1, 1]]), - "encoder_padding_mask": tf.constant( + "encoder_token_ids": np.array([[0, 133, 2119, 6219, 23602, 2, 1, 1]]), + "encoder_padding_mask": np.array( [[True, True, True, True, True, True, False, False]] ), - "decoder_token_ids": tf.constant([[2, 0, 133, 1769, 2, 1, 1]]), - "decoder_padding_mask": tf.constant([[True, True, True, True, False, False]]) + "decoder_token_ids": np.array([[2, 0, 133, 1769, 2, 1, 1]]), + "decoder_padding_mask": np.array([[True, True, True, True, False, False]]) } bart_lm = keras_nlp.models.BartSeq2SeqLM.from_preset( @@ -137,13 +137,13 @@ class BartSeq2SeqLM(GenerativeTask): Call `fit()` without preprocessing. ```python x = { - "encoder_token_ids": tf.constant([[0, 133, 2119, 2, 1]] * 2), - "encoder_padding_mask": tf.constant([[1, 1, 1, 1, 0]] * 2), - "decoder_token_ids": tf.constant([[2, 0, 133, 1769, 2]] * 2), - "decoder_padding_mask": tf.constant([[1, 1, 1, 1, 1]] * 2), + "encoder_token_ids": np.array([[0, 133, 2119, 2, 1]] * 2), + "encoder_padding_mask": np.array([[1, 1, 1, 1, 0]] * 2), + "decoder_token_ids": np.array([[2, 0, 133, 1769, 2]] * 2), + "decoder_padding_mask": np.array([[1, 1, 1, 1, 1]] * 2), } - y = tf.constant([[0, 133, 1769, 2, 1]] * 2) - sw = tf.constant([[1, 1, 1, 1, 0]] * 2) + y = np.array([[0, 133, 1769, 2, 1]] * 2) + sw = np.array([[1, 1, 1, 1, 0]] * 2) bart_lm = keras_nlp.models.BartSeq2SeqLM.from_preset( "bart_base_en", diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 056086fe28..8bb6057acb 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -65,13 +65,9 @@ class BertBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "segment_ids": tf.constant( - [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]]), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained BERT encoder. diff --git a/keras_nlp/models/bert/bert_classifier.py b/keras_nlp/models/bert/bert_classifier.py index 3dff53ed8d..719dab5dfc 100644 --- a/keras_nlp/models/bert/bert_classifier.py +++ b/keras_nlp/models/bert/bert_classifier.py @@ -86,13 +86,9 @@ class BertClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/bert/bert_masked_lm.py b/keras_nlp/models/bert/bert_masked_lm.py index 1b1cd4853f..f99314e91b 100644 --- a/keras_nlp/models/bert/bert_masked_lm.py +++ b/keras_nlp/models/bert/bert_masked_lm.py @@ -80,14 +80,10 @@ class BertMaskedLM(Task): ```python # Create preprocessed batch where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)), - "segment_ids": tf.constant([[0, 0, 0, 0, 0, 0, 0, 0]] * 2, shape=(2, 8)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2), + "segment_ids": np.array([[0, 0, 0, 0, 0, 0, 0, 0]] * 2) } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/deberta_v3/deberta_v3_backbone.py b/keras_nlp/models/deberta_v3/deberta_v3_backbone.py index 1c889fc36c..1707e6d3fd 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_backbone.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_backbone.py @@ -73,9 +73,8 @@ class DebertaV3Backbone(Backbone): Example usage: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12)), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained DeBERTa encoder. diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index 2d15a95ab9..dc2e8b1db8 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -95,10 +95,8 @@ class DebertaV3Classifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py index 8fe6e0c919..d79453a4e1 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py @@ -84,13 +84,9 @@ class DebertaV3MaskedLM(Task): ```python # Create preprocessed batch where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)), + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2), } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/distil_bert/distil_bert_backbone.py b/keras_nlp/models/distil_bert/distil_bert_backbone.py index 87097acb2b..23f9cfbfc2 100644 --- a/keras_nlp/models/distil_bert/distil_bert_backbone.py +++ b/keras_nlp/models/distil_bert/distil_bert_backbone.py @@ -68,10 +68,8 @@ class DistilBertBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained DistilBERT encoder. diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index 1ae94cb2fa..22a68dda8a 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -97,10 +97,8 @@ class DistilBertClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2) } labels = [0, 3] diff --git a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py index 1bc7f0dbf1..b307ea1031 100644 --- a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py +++ b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py @@ -84,13 +84,9 @@ class DistilBertMaskedLM(Task): ```python # Create preprocessed batch where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2) } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/f_net/f_net_backbone.py b/keras_nlp/models/f_net/f_net_backbone.py index 2965676066..166dae56c8 100644 --- a/keras_nlp/models/f_net/f_net_backbone.py +++ b/keras_nlp/models/f_net/f_net_backbone.py @@ -70,10 +70,8 @@ class FNetBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "segment_ids": tf.constant( - [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained BERT encoder. diff --git a/keras_nlp/models/f_net/f_net_classifier.py b/keras_nlp/models/f_net/f_net_classifier.py index ca15a52392..3bb812090a 100644 --- a/keras_nlp/models/f_net/f_net_classifier.py +++ b/keras_nlp/models/f_net/f_net_classifier.py @@ -87,10 +87,8 @@ class FNetClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "segment_ids": np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/f_net/f_net_masked_lm.py b/keras_nlp/models/f_net/f_net_masked_lm.py index f6ecd549d8..cc8d44eb62 100644 --- a/keras_nlp/models/f_net/f_net_masked_lm.py +++ b/keras_nlp/models/f_net/f_net_masked_lm.py @@ -79,13 +79,9 @@ class FNetMaskedLM(Task): ```python # Create a preprocessed dataset where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "segment_ids": tf.constant( - [[0, 0, 0, 1, 1, 1, 0, 0]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "segment_ids": np.array([[0, 0, 0, 1, 1, 1, 0, 0]] * 2), + "mask_positions": np.array([[2, 4]] * 2) } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/gpt2/gpt2_backbone.py b/keras_nlp/models/gpt2/gpt2_backbone.py index 40aefda88b..6a00e6ecc5 100644 --- a/keras_nlp/models/gpt2/gpt2_backbone.py +++ b/keras_nlp/models/gpt2/gpt2_backbone.py @@ -69,10 +69,8 @@ class GPT2Backbone(Backbone): Example usage: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained GPT-2 decoder. diff --git a/keras_nlp/models/gpt2/gpt2_causal_lm.py b/keras_nlp/models/gpt2/gpt2_causal_lm.py index 2bc67088e7..bd99128f00 100644 --- a/keras_nlp/models/gpt2/gpt2_causal_lm.py +++ b/keras_nlp/models/gpt2/gpt2_causal_lm.py @@ -99,8 +99,8 @@ class GPT2CausalLM(GenerativeTask): # Prompt the model with `5338, 318` (the token ids for `"Who is"`). # Use `"padding_mask"` to indicate values that should not be overridden. prompt = { - "token_ids": tf.constant([[5338, 318, 0, 0, 0]] * 2), - "padding_mask": tf.constant([[1, 1, 0, 0, 0]] * 2), + "token_ids": np.array([[5338, 318, 0, 0, 0]] * 2), + "padding_mask": np.array([[1, 1, 0, 0, 0]] * 2), } gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset( @@ -120,11 +120,11 @@ class GPT2CausalLM(GenerativeTask): Call `fit()` without preprocessing. ```python x = { - "token_ids": tf.constant([[50256, 1, 2, 3, 4]] * 2), - "padding_mask": tf.constant([[1, 1, 1, 1, 1]] * 2), + "token_ids": np.array([[50256, 1, 2, 3, 4]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1]] * 2), } - y = tf.constant([[1, 2, 3, 4, 50256]] * 2) - sw = tf.constant([[1, 1, 1, 1, 1]] * 2) + y = np.array([[1, 2, 3, 4, 50256]] * 2) + sw = np.array([[1, 1, 1, 1, 1]] * 2) gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset( "gpt2_base_en", diff --git a/keras_nlp/models/opt/opt_backbone.py b/keras_nlp/models/opt/opt_backbone.py index fb7bb0bd70..996511f66d 100644 --- a/keras_nlp/models/opt/opt_backbone.py +++ b/keras_nlp/models/opt/opt_backbone.py @@ -67,10 +67,8 @@ class OPTBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12) - ), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained OPT decoder diff --git a/keras_nlp/models/opt/opt_causal_lm.py b/keras_nlp/models/opt/opt_causal_lm.py index 0bdedf1ed9..06730f9010 100644 --- a/keras_nlp/models/opt/opt_causal_lm.py +++ b/keras_nlp/models/opt/opt_causal_lm.py @@ -99,8 +99,8 @@ class OPTCausalLM(GenerativeTask): # Prompt the model with `5338, 318` (the token ids for `"Who is"`). # Use `"padding_mask"` to indicate values that should not be overridden. prompt = { - "token_ids": tf.constant([[5338, 318, 0, 0, 0]] * 2), - "padding_mask": tf.constant([[1, 1, 0, 0, 0]] * 2), + "token_ids": np.array([[5338, 318, 0, 0, 0]] * 2), + "padding_mask": np.array([[1, 1, 0, 0, 0]] * 2), } opt_lm = keras_nlp.models.OPTCausalLM.from_preset( @@ -120,11 +120,11 @@ class OPTCausalLM(GenerativeTask): Call `fit()` without preprocessing. ```python x = { - "token_ids": tf.constant([[1, 2, 3, 4, 5]] * 2), - "padding_mask": tf.constant([[1, 1, 1, 1, 1]] * 2), + "token_ids": np.array([[1, 2, 3, 4, 5]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1]] * 2), } - y = tf.constant([[2, 3, 4, 5, 0]] * 2) - sw = tf.constant([[1, 1, 1, 1, 1]] * 2) + y = np.array([[2, 3, 4, 5, 0]] * 2) + sw = np.array([[1, 1, 1, 1, 1]] * 2) opt_lm = keras_nlp.models.OPTCausalLM.from_preset( "opt_base_en", diff --git a/keras_nlp/models/roberta/roberta_backbone.py b/keras_nlp/models/roberta/roberta_backbone.py index 0166fb66c0..f8440b0c02 100644 --- a/keras_nlp/models/roberta/roberta_backbone.py +++ b/keras_nlp/models/roberta/roberta_backbone.py @@ -67,8 +67,8 @@ class RobertaBackbone(Backbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array( [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12)), } diff --git a/keras_nlp/models/roberta/roberta_classifier.py b/keras_nlp/models/roberta/roberta_classifier.py index 3f838c1d4c..c5e9f53de1 100644 --- a/keras_nlp/models/roberta/roberta_classifier.py +++ b/keras_nlp/models/roberta/roberta_classifier.py @@ -87,10 +87,8 @@ class RobertaClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/roberta/roberta_masked_lm.py b/keras_nlp/models/roberta/roberta_masked_lm.py index 659e6c2e00..37f5dc3920 100644 --- a/keras_nlp/models/roberta/roberta_masked_lm.py +++ b/keras_nlp/models/roberta/roberta_masked_lm.py @@ -82,13 +82,9 @@ class RobertaMaskedLM(Task): ```python # Create a preprocessed dataset where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2) } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/models/whisper/whisper_backbone.py b/keras_nlp/models/whisper/whisper_backbone.py index 1f68853ba1..d13961f1d8 100644 --- a/keras_nlp/models/whisper/whisper_backbone.py +++ b/keras_nlp/models/whisper/whisper_backbone.py @@ -78,10 +78,10 @@ class WhisperBackbone(Backbone): ```python input_data = { - "encoder_features": tf.ones(shape=(1, 12, 80), dtype="int64"), - "decoder_token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "decoder_padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], shape=(1, 12) + "encoder_features": np.ones(shape=(1, 12, 80), dtype="int32"), + "decoder_token_ids": np.ones(shape=(1, 12), dtype="int32"), + "decoder_padding_mask": np.array( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] ), } diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_backbone.py b/keras_nlp/models/xlm_roberta/xlm_roberta_backbone.py index bd25322394..81a069cb18 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_backbone.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_backbone.py @@ -58,9 +58,8 @@ class XLMRobertaBackbone(roberta_backbone.RobertaBackbone): Examples: ```python input_data = { - "token_ids": tf.ones(shape=(1, 12), dtype="int64"), - "padding_mask": tf.constant( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12)), + "token_ids": np.ones(shape=(1, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), } # Pretrained XLM-R encoder. diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index 344d19b8a6..1eec0588e7 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -89,10 +89,8 @@ class XLMRobertaClassifier(Task): Preprocessed integer data. ```python features = { - "token_ids": tf.ones(shape=(2, 12), dtype="int64"), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), + "token_ids": np.ones(shape=(2, 12), dtype="int32"), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2), } labels = [0, 3] diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py index eeeb051366..80715624f5 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py @@ -85,13 +85,9 @@ class XLMRobertaMaskedLM(Task): ```python # Create a preprocessed dataset where 0 is the mask token. features = { - "token_ids": tf.constant( - [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) - ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)) + "token_ids": np.array([[1, 2, 0, 4, 0, 6, 7, 8]] * 2), + "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1]] * 2), + "mask_positions": np.array([[2, 4]] * 2) } # Labels are the original masked values. labels = [[3, 5]] * 2 diff --git a/keras_nlp/samplers/beam_sampler.py b/keras_nlp/samplers/beam_sampler.py index 0e4ac64668..077f250b91 100644 --- a/keras_nlp/samplers/beam_sampler.py +++ b/keras_nlp/samplers/beam_sampler.py @@ -53,14 +53,14 @@ class BeamSampler(Sampler): def next(prompt, cache, index): prompt_batch_size = tf.shape(prompt)[0] - hidden_states = tf.ones((prompt_batch_size, 10)) + hidden_states = np.ones((prompt_batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((prompt_batch_size, vocab_size)) + logits = np.ones((prompt_batch_size, vocab_size)) return logits, hidden_states, cache output = keras_nlp.samplers.BeamSampler()( next=next, - prompt=tf.fill((batch_size, length), char_lookup["z"]), + prompt=np.full((batch_size, length), char_lookup["z"], dtype="int32"), index=5, ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()]) @@ -76,14 +76,14 @@ def next(prompt, cache, index): def next(prompt, cache, index): prompt_batch_size = tf.shape(prompt)[0] - hidden_states = tf.ones((prompt_batch_size, 10)) + hidden_states = np.ones((prompt_batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((batch_size, vocab_size)) + logits = np.ones((batch_size, vocab_size)) return logits, hidden_states, cache beams, probs = keras_nlp.samplers.BeamSampler(return_all_beams=True)( next=next, - prompt=tf.fill((batch_size, length,), char_lookup['z']), + prompt=np.full((batch_size, length,), char_lookup['z'], dtype="int32"), index=5, ) diff --git a/keras_nlp/samplers/contrastive_sampler.py b/keras_nlp/samplers/contrastive_sampler.py index ea5d668c54..5a5f1fd543 100644 --- a/keras_nlp/samplers/contrastive_sampler.py +++ b/keras_nlp/samplers/contrastive_sampler.py @@ -56,16 +56,16 @@ class ContrastiveSampler(Sampler): def next(prompt, cache, index): prompt_batch_size = tf.shape(prompt)[0] - hidden_states = tf.ones((prompt_batch_size, hidden_size)) + hidden_states = np.ones((prompt_batch_size, hidden_size)) # A uniform distribution over our alphabet. - logits = tf.ones((prompt_batch_size, vocab_size)) + logits = np.ones((prompt_batch_size, vocab_size)) return logits, hidden_states, cache output = keras_nlp.samplers.ContrastiveSampler()( next=next, - prompt=tf.fill((batch_size, length), char_lookup["z"]), + prompt=np.full((batch_size, length), char_lookup["z"], dtype="int32"), index=index, - hidden_states=tf.ones([batch_size, index, hidden_size]), + hidden_states=np.ones([batch_size, index, hidden_size]), ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()]) # >>> "zzzzzeeeeeee" diff --git a/keras_nlp/samplers/greedy_sampler.py b/keras_nlp/samplers/greedy_sampler.py index ed087f4c4f..62bbb06ed3 100644 --- a/keras_nlp/samplers/greedy_sampler.py +++ b/keras_nlp/samplers/greedy_sampler.py @@ -39,14 +39,14 @@ class GreedySampler(Sampler): batch_size, length, vocab_size = 1, 12, len(int_lookup) def next(prompt, cache, index): - hidden_states = tf.ones((batch_size, 10)) + hidden_states = np.ones((batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((batch_size, vocab_size)) + logits = np.ones((batch_size, vocab_size)) return logits, hidden_states, cache output = keras_nlp.samplers.GreedySampler()( next=next, - prompt=tf.fill((batch_size, length,), char_lookup['z']), + prompt=np.full((batch_size, length,), char_lookup['z'], dtype="int32"), index=5, ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()]) diff --git a/keras_nlp/samplers/random_sampler.py b/keras_nlp/samplers/random_sampler.py index baf13cf875..e18de34f2d 100644 --- a/keras_nlp/samplers/random_sampler.py +++ b/keras_nlp/samplers/random_sampler.py @@ -44,14 +44,14 @@ class RandomSampler(Sampler): batch_size, length, vocab_size = 1, 12, len(int_lookup) def next(prompt, state, index): - hidden_states = tf.ones((batch_size, 10)) + hidden_states = np.ones((batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((batch_size, vocab_size)) + logits = np.ones((batch_size, vocab_size)) return logits, hidden_states, state output = keras_nlp.samplers.RandomSampler()( next=next, - prompt=tf.fill((batch_size, length,), char_lookup['z']), + prompt=np.full((batch_size, length,), char_lookup['z'], dtype="int32"), index=5, ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()]) diff --git a/keras_nlp/samplers/top_k_sampler.py b/keras_nlp/samplers/top_k_sampler.py index 39d50d2ad4..bceacd0a00 100644 --- a/keras_nlp/samplers/top_k_sampler.py +++ b/keras_nlp/samplers/top_k_sampler.py @@ -45,14 +45,14 @@ class TopKSampler(Sampler): batch_size, length, vocab_size = 1, 12, len(int_lookup) def next(prompt, cache, index): - hidden_states = tf.ones((batch_size, 10)) + hidden_states = np.ones((batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((batch_size, vocab_size)) + logits = np.ones((batch_size, vocab_size)) return logits, hidden_states, cache output = keras_nlp.samplers.TopKSampler(k=3)( next=next, - prompt=tf.fill((batch_size, length,), char_lookup['z']), + prompt=np.full((batch_size, length,), char_lookup['z'], dtypes="int32"), index=5, ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()]) diff --git a/keras_nlp/samplers/top_p_sampler.py b/keras_nlp/samplers/top_p_sampler.py index 8ff3147b72..608f227121 100644 --- a/keras_nlp/samplers/top_p_sampler.py +++ b/keras_nlp/samplers/top_p_sampler.py @@ -53,14 +53,14 @@ class TopPSampler(Sampler): batch_size, length, vocab_size = 1, 12, len(int_lookup) def next(prompt, cache, index): - hidden_states = tf.ones((batch_size, 10)) + hidden_states = np.ones((batch_size, 10)) # A uniform distribution over our alphabet. - logits = tf.ones((batch_size, vocab_size)) + logits = np.ones((batch_size, vocab_size)) return logits, hidden_states, cache output = keras_nlp.samplers.TopPSampler(p=0.1)( next=next, - prompt=tf.fill((batch_size, length,), char_lookup['z']), + prompt=np.full((batch_size, length,), char_lookup['z'], dtype="int32"), index=5, ) print(["".join([int_lookup[i] for i in s]) for s in output.numpy()])