diff --git a/keras_nlp/models/deberta_v3/deberta_v3_backbone.py b/keras_nlp/models/deberta_v3/deberta_v3_backbone.py index 9d3e36bb04..9aaa3bc83c 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_backbone.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_backbone.py @@ -79,11 +79,11 @@ class DebertaV3Backbone(Backbone): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12)), } - # Pretrained DeBERTa encoder + # Pretrained DeBERTa encoder. model = keras_nlp.models.DebertaV3Backbone.from_preset( - "deberta_base_en", + "deberta_v3_base_en", ) - output = model(input_data) + model(input_data) # Randomly initialized DeBERTa encoder with custom config model = keras_nlp.models.DebertaV3Backbone( @@ -96,7 +96,7 @@ class DebertaV3Backbone(Backbone): bucket_size=256, ) # Call the model on the input data. - output = model(input_data) + model(input_data) ``` """ diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index 7283a4c039..a179b248f1 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -68,84 +68,34 @@ class DebertaV3Classifier(Task): Examples: - Example usage. + Raw string data. ```python - # Define the preprocessed inputs. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(1, 12)), - } - labels = [0, 3] - - # Randomly initialized DeBERTa encoder - backbone = keras_nlp.models.DebertaV3Backbone( - vocabulary_size=128100, - num_layers=12, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=12, - bucket_size=6, - ) - - # Create a DeBERTa classifier and fit your data. - classifier = keras_nlp.models.DebertaV3Classifier( - backbone, - num_classes=4, - preprocessor=None, - ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - - # Access backbone programatically (e.g., to change `trainable`) - classifier.backbone.trainable = False - ``` - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. features = ["The quick brown fox jumped.", "I forgot my homework."] labels = [0, 3] - # Create a DebertaV3Classifier and fit your data. + # Pretrained classifier. classifier = keras_nlp.models.DebertaV3Classifier.from_preset( "deberta_v3_base_en", num_classes=4, ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) classifier.fit(x=features, y=labels, batch_size=2) - ``` + classifier.predict(x=features, batch_size=2) - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset( - "deberta_v3_base_en", - sequence_length=128, - ) - - # Create a DebertaV3Classifier and fit your data. - classifier = keras_nlp.models.DebertaV3Classifier.from_preset( - "deberta_v3_base_en", - num_classes=4, - preprocessor=preprocessor, - ) + # Re-compile (e.g., with a new learning rate). classifier.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + jit_compile=True, ) + # Access backbone programatically (e.g., to change `trainable`). + classifier.backbone.trainable = False + # Fit again. classifier.fit(x=features, y=labels, batch_size=2) ``` - Preprocessed inputs. + Preprocessed integer data. ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { + features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), "padding_mask": tf.constant( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) @@ -153,16 +103,57 @@ class DebertaV3Classifier(Task): } labels = [0, 3] - # Create a DebertaV3Classifier and fit your data. + # Pretrained classifier without preprocessing. classifier = keras_nlp.models.DebertaV3Classifier.from_preset( "deberta_v3_base_en", num_classes=4, preprocessor=None, ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Custom backbone and vocabulary. + ```python + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + bytes_io = io.BytesIO() + ds = tf.data.Dataset.from_tensor_slices(features) + sentencepiece.SentencePieceTrainer.train( + sentence_iterator=ds.as_numpy_iterator(), + model_writer=bytes_io, + vocab_size=10, + model_type="WORD", + pad_id=0, + bos_id=1, + eos_id=2, + unk_id=3, + pad_piece="[PAD]", + bos_piece="[CLS]", + eos_piece="[SEP]", + unk_piece="[UNK]", + ) + tokenizer = keras_nlp.models.DebertaV3Tokenizer( + proto=bytes_io.getvalue(), ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + preprocessor = keras_nlp.models.DebertaV3Preprocessor( + tokenizer=tokenizer, + sequence_length=128, + ) + backbone = keras_nlp.models.DebertaV3Backbone( + vocabulary_size=30552, + num_layers=4, + num_heads=4, + hidden_dim=256, + intermediate_dim=512, + max_sequence_length=128, + ) + classifier = keras_nlp.models.DebertaV3Classifier( + backbone=backbone, + preprocessor=preprocessor, + num_classes=4, + ) + classifier.fit(x=features, y=labels, batch_size=2) ``` """ diff --git a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py index aa4c58b646..a7ecc2192a 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py @@ -60,55 +60,48 @@ class DebertaV3MaskedLM(Task): Example usage: - Raw string inputs and pretrained backbone. + Raw string data. ```python - # Create a dataset with raw string features. Labels are inferred. features = ["The quick brown fox jumped.", "I forgot my homework."] - # Create a DebertaV3MaskedLM with a pretrained backbone and further train - # on an MLM task. + # Pretrained language model. masked_lm = keras_nlp.models.DebertaV3MaskedLM.from_preset( "deberta_v3_base_en", ) + masked_lm.fit(x=features, batch_size=2) + + # Re-compile (e.g., with a new learning rate). masked_lm.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + jit_compile=True, ) + # Access backbone programatically (e.g., to change `trainable`). + masked_lm.backbone.trainable = False + # Fit again. masked_lm.fit(x=features, batch_size=2) ``` - Preprocessed inputs and custom backbone. + Preprocessed integer data. ```python - # Create a preprocessed dataset where 0 is the mask token. - preprocessed_features = { + # Create preprocessed batch where 0 is the mask token. + features = { "token_ids": tf.constant( [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) ), "padding_mask": tf.constant( [[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8) ), - "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)) + "mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2)), } # Labels are the original masked values. labels = [[3, 5]] * 2 - # Randomly initialize a DeBERTaV3 encoder - backbone = keras_nlp.models.DebertaV3Backbone( - vocabulary_size=50265, - num_layers=12, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=12 - ) - # Create a DeBERTaV3 masked_lm and fit the data. - masked_lm = keras_nlp.models.DebertaV3MaskedLM( - backbone, + masked_lm = keras_nlp.models.DebertaV3MaskedLM.from_preset( + "deberta_v3_base_en", preprocessor=None, ) - masked_lm.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - masked_lm.fit(x=preprocessed_features, y=labels, batch_size=2) + masked_lm.fit(x=features, y=labels, batch_size=2) ``` """ diff --git a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py index 6f3a675545..59c8aeeb12 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py @@ -71,78 +71,47 @@ class DebertaV3MaskedLMPreprocessor(DebertaV3Preprocessor): out of budget. It supports an arbitrary number of segments. Examples: + Directly calling the layer on data. ```python - # Load the preprocessor from a preset. preprocessor = keras_nlp.models.DebertaV3MaskedLMPreprocessor.from_preset( "deberta_v3_base_en" ) - # Tokenize and pack a single sentence. - sentence = tf.constant("The quick brown fox jumped.") - preprocessor(sentence) - # Same output. + # Tokenize and mask a single sentence. preprocessor("The quick brown fox jumped.") - # Tokenize and a batch of single sentences. - sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - preprocessor(sentences) - # Same output. - preprocessor( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) + # Tokenize and mask a batch of single sentences. + preprocessor(["The quick brown fox jumped.", "Call me Ishmael."]) - # Tokenize and pack a sentence pair. - first_sentence = tf.constant("The quick brown fox jumped.") - second_sentence = tf.constant("The fox tripped.") - preprocessor((first_sentence, second_sentence)) + # Tokenize and mask sentence pairs. + # In this case, always convert input to tensors before calling the layer. + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + preprocessor((first, second)) + ``` - # Map a dataset to preprocess a single sentence. - features = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] + Mapping with `tf.data.Dataset`. + ```python + preprocessor = keras_nlp.models.DebertaV3MaskedLMPreprocessor.from_preset( + "deberta_v3_base_en" ) - labels = tf.constant([0, 1]) - ds = tf.data.Dataset.from_tensor_slices((features, labels)) - ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Map a dataset to preprocess sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - labels = tf.constant([1, 1]) - ds = tf.data.Dataset.from_tensor_slices( - ( - (first_sentences, second_sentences), labels - ) - ) + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + + # Map single sentences. + ds = tf.data.Dataset.from_tensor_slices(first) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Map a dataset to preprocess unlabeled sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - ds = tf.data.Dataset.from_tensor_slices((first_sentences, second_sentences)) + # Map sentence pairs. + ds = tf.data.Dataset.from_tensor_slices((first, second)) # Watch out for tf.data's default unpacking of tuples here! # Best to invoke the `preprocessor` directly in this case. ds = ds.map( - lambda s1, s2: preprocessor(x=(s1, s2)), + lambda first, second: preprocessor(x=(first, second)), num_parallel_calls=tf.data.AUTOTUNE, ) - - # Alternatively, you can create a preprocessor from your own vocabulary. - # The usage is the exactly same as above. - tokenizer = keras_nlp.models.DebertaV3MaskedLMTokenizer(proto="model.spm") - preprocessor = keras_nlp.models.DebertaV3MaskedLMPreprocessor( - tokenizer=tokenizer, - sequence_length=10, - ) + ``` """ def __init__( diff --git a/keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py b/keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py index b87fb968ed..6b5e870c13 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py @@ -71,76 +71,78 @@ class DebertaV3Preprocessor(Preprocessor): out of budget. It supports an arbitrary number of segments. Examples: + Directly calling the layer on data. ```python - # Load the preprocessor from a preset. - preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset("deberta_v3_base_en") + preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset( + "deberta_v3_base_en" + ) # Tokenize and pack a single sentence. - sentence = tf.constant("The quick brown fox jumped.") - preprocessor(sentence) - # Same output. preprocessor("The quick brown fox jumped.") - # Tokenize and a batch of single sentences. - sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] + # Tokenize a batch of single sentences. + preprocessor(["The quick brown fox jumped.", "Call me Ishmael."]) + + # Preprocess a batch of sentence pairs. + # When handling multiple sequences, always convert to tensors first! + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + preprocessor((first, second)) + + # Custom vocabulary. + bytes_io = io.BytesIO() + ds = tf.data.Dataset.from_tensor_slices(["The quick brown fox jumped."]) + sentencepiece.SentencePieceTrainer.train( + sentence_iterator=ds.as_numpy_iterator(), + model_writer=bytes_io, + vocab_size=9, + model_type="WORD", + pad_id=0, + bos_id=1, + eos_id=2, + unk_id=3, + pad_piece="[PAD]", + bos_piece="[CLS]", + eos_piece="[SEP]", + unk_piece="[UNK]", ) - preprocessor(sentences) - # Same output. - preprocessor( - ["The quick brown fox jumped.", "Call me Ishmael."] + tokenizer = keras_nlp.models.DebertaV3Tokenizer( + proto=bytes_io.getvalue(), ) + preprocessor = keras_nlp.models.DebertaV3Preprocessor(tokenizer) + preprocessor("The quick brown fox jumped.") + ``` - # Tokenize and pack a sentence pair. - first_sentence = tf.constant("The quick brown fox jumped.") - second_sentence = tf.constant("The fox tripped.") - preprocessor((first_sentence, second_sentence)) - - # Map a dataset to preprocess a single sentence. - features = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] + Mapping with `tf.data.Dataset`. + ```python + preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset( + "deberta_v3_base_en" ) - labels = tf.constant([0, 1]) - ds = tf.data.Dataset.from_tensor_slices((features, labels)) + + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + label = tf.constant([1, 1]) + + # Map labeled single sentences. + ds = tf.data.Dataset.from_tensor_slices((first, label)) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Map a dataset to preprocess sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - labels = tf.constant([1, 1]) - ds = tf.data.Dataset.from_tensor_slices( - ( - (first_sentences, second_sentences), labels - ) - ) + # Map unlabeled single sentences. + ds = tf.data.Dataset.from_tensor_slices(first) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Map a dataset to preprocess unlabeled sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - ds = tf.data.Dataset.from_tensor_slices((first_sentences, second_sentences)) + # Map labeled sentence pairs. + ds = tf.data.Dataset.from_tensor_slices(((first, second), label)) + ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) + + # Map unlabeled sentence pairs. + ds = tf.data.Dataset.from_tensor_slices((first, second)) # Watch out for tf.data's default unpacking of tuples here! # Best to invoke the `preprocessor` directly in this case. ds = ds.map( - lambda s1, s2: preprocessor(x=(s1, s2)), + lambda first, second: preprocessor(x=(first, second)), num_parallel_calls=tf.data.AUTOTUNE, ) - - # Alternatively, you can create a preprocessor from your own vocabulary. - # The usage is the exactly same as above. - tokenizer = keras_nlp.models.DebertaV3Tokenizer(proto="model.spm") - preprocessor = keras_nlp.models.DebertaV3Preprocessor( - tokenizer=tokenizer, - sequence_length=10, - ) ``` """ diff --git a/keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py b/keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py index c5400e7a7f..f04b78aa0f 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py @@ -58,16 +58,39 @@ class DebertaV3Tokenizer(SentencePieceTokenizer): Examples: ```python - tokenizer = keras_nlp.models.DebertaV3Tokenizer(proto="model.spm") + # Unbatched input. + tokenizer = keras_nlp.models.DebertaV3Tokenizer.from_preset( + "deberta_v3_base_en", + ) + tokenizer("The quick brown fox jumped.") # Batched inputs. tokenizer(["the quick brown fox", "the earth is round"]) - # Unbatched inputs. - tokenizer("the quick brown fox") - # Detokenization. - tokenizer.detokenize(tf.constant([[1, 4, 9, 5, 7, 2]])) + tokenizer.detokenize(tokenizer("The quick brown fox jumped.")) + + # Custom vocabulary. + bytes_io = io.BytesIO() + ds = tf.data.Dataset.from_tensor_slices(["The quick brown fox jumped."]) + sentencepiece.SentencePieceTrainer.train( + sentence_iterator=ds.as_numpy_iterator(), + model_writer=bytes_io, + vocab_size=9, + model_type="WORD", + pad_id=0, + bos_id=1, + eos_id=2, + unk_id=3, + pad_piece="[PAD]", + bos_piece="[CLS]", + eos_piece="[SEP]", + unk_piece="[UNK]", + ) + tokenizer = keras_nlp.models.DebertaV3Tokenizer( + proto=bytes_io.getvalue(), + ) + tokenizer("The quick brown fox jumped.") ``` """ diff --git a/keras_nlp/models/deberta_v3/disentangled_attention_encoder.py b/keras_nlp/models/deberta_v3/disentangled_attention_encoder.py index 005d482174..a605fdeb11 100644 --- a/keras_nlp/models/deberta_v3/disentangled_attention_encoder.py +++ b/keras_nlp/models/deberta_v3/disentangled_attention_encoder.py @@ -58,23 +58,6 @@ class DisentangledAttentionEncoder(keras.layers.Layer): bias_initializer: string or `keras.initializers` initializer, defaults to "zeros". The bias initializer for the dense and disentangled self-attention layers. - - Examples: - - ```python - # Create a single disentangled attention encoder layer. - encoder = keras_nlp.layers.DisentangledAttentionEncoder( - intermediate_dim=64, num_heads=8) - - # Create a simple model containing the encoder. - input = keras.Input(shape=[10, 64]) - output = encoder(input) - model = keras.Model(inputs=input, outputs=output) - - # Call encoder on the inputs. - input_data = tf.random.uniform(shape=[2, 10, 64]) - output = model(input_data) - ``` """ def __init__(