diff --git a/keras_nlp/models/distil_bert/distil_bert_backbone.py b/keras_nlp/models/distil_bert/distil_bert_backbone.py index 9d5138ece5..63df398583 100644 --- a/keras_nlp/models/distil_bert/distil_bert_backbone.py +++ b/keras_nlp/models/distil_bert/distil_bert_backbone.py @@ -35,7 +35,7 @@ def distilbert_kernel_initializer(stddev=0.02): @keras_nlp_export("keras_nlp.models.DistilBertBackbone") class DistilBertBackbone(Backbone): - """DistilBERT encoder network. + """A DistilBERT encoder network. This network implements a bi-directional Transformer-based encoder as described in ["DistilBERT, a distilled version of BERT: smaller, faster, @@ -45,8 +45,8 @@ class DistilBertBackbone(Backbone): The default constructor gives a fully customizable, randomly initialized DistilBERT encoder with any number of layers, heads, and embedding - dimensions. To load preset architectures and weights, use the `from_preset` - constructor. + dimensions. To load preset architectures and weights, use the + `from_preset()` constructor. Disclaimer: Pre-trained models are provided on an "as is" basis, without warranties or conditions of any kind. The underlying model is provided by a @@ -76,22 +76,22 @@ class DistilBertBackbone(Backbone): ), } - # Pretrained DistilBERT encoder + # Pretrained DistilBERT encoder. model = keras_nlp.models.DistilBertBackbone.from_preset( "distil_bert_base_en_uncased" ) - output = model(input_data) + model(input_data) - # Randomly initialized DistilBERT encoder with custom config + # Randomly initialized DistilBERT encoder with custom config. model = keras_nlp.models.DistilBertBackbone( vocabulary_size=30552, - num_layers=6, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=12, + num_layers=4, + num_heads=4, + hidden_dim=256, + intermediate_dim=512, + max_sequence_length=128, ) - output = model(input_data) + model(input_data) ``` """ diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index 805a1d74bd..538abf7788 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -36,9 +36,9 @@ class DistilBertClassifier(Task): """An end-to-end DistilBERT model for classification tasks. This model attaches a classification head to a - `keras_nlp.model.DistilBertBackbone` model, mapping from the backbone - outputs to logit output suitable for a classification task. For usage of - this model with pre-trained weights, see the `from_preset()` method. + `keras_nlp.model.DistilBertBackbone` instance, mapping from the backbone + outputs to logits suitable for a classification task. For usage of + this model with pre-trained weights, see the `from_preset()` constructor. This model can optionally be configured with a `preprocessor` layer, in which case it will automatically apply preprocessing to raw inputs during @@ -62,60 +62,8 @@ class DistilBertClassifier(Task): Examples: - Example usage. + Raw string data. ```python - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)), - } - labels = [0, 3] - - # Randomly initialized DistilBertBackbone - backbone = keras_nlp.models.DistilBertBackbone( - vocabulary_size=30552, - num_layers=6, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=512 - ) - - # Create a DistilBertClassifier and fit your data. - classifier = keras_nlp.models.DistilBertClassifier( - backbone, - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - - # Access backbone programatically (e.g., to change `trainable`) - classifier.backbone.trainable = False - ``` - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a DistilBertClassifier and fit your data. - classifier = keras_nlp.models.DistilBertClassifier.from_preset( - "distil_bert_base_en_uncased", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. features = ["The quick brown fox jumped.", "I forgot my homework."] labels = [0, 3] @@ -124,43 +72,72 @@ class DistilBertClassifier(Task): "distil_bert_base_en_uncased", sequence_length=128, ) - # Create a DistilBertClassifier and fit your data. + # Pretrained classifier. classifier = keras_nlp.models.DistilBertClassifier.from_preset( "distil_bert_base_en_uncased", num_classes=4, preprocessor=preprocessor, ) + classifier.fit(x=features, y=labels, batch_size=2) + + # Re-compile (e.g., with a new learning rate) classifier.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + jit_compile=True, ) + # Access backbone programatically (e.g., to change `trainable`). + classifier.backbone.trainable = False + # Fit again. classifier.fit(x=features, y=labels, batch_size=2) ``` - Preprocessed inputs. + Preprocessed integer data. ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { + features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), "padding_mask": tf.constant( [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) ), } labels = [0, 3] - # Create a DistilBERT classifier and fit your data. + # Pretrained classifier without preprocessing. classifier = keras_nlp.models.DistilBertClassifier.from_preset( "distil_bert_base_en_uncased", num_classes=4, preprocessor=None, ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + classifier.fit(x=features, y=labels, batch_size=2) ``` + + Custom backbone and vocabulary. + ```python + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] + vocab += ["The", "quick", "brown", "fox", "jumped", "."] + tokenizer = keras_nlp.models.DistilBertTokenizer( + vocabulary=vocab, + ) + preprocessor = keras_nlp.models.DistilBertPreprocessor( + tokenizer=tokenizer, + sequence_length=128, + ) + backbone = keras_nlp.models.DistilBertBackbone( + vocabulary_size=30552, + num_layers=4, + num_heads=4, + hidden_dim=256, + intermediate_dim=512, + max_sequence_length=128, + ) + classifier = keras_nlp.models.DistilBertClassifier( + backbone=backbone, + preprocessor=preprocessor, + num_classes=4, + ) + classifier.fit(x=features, y=labels, batch_size=2) """ def __init__( diff --git a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py index 37f3edc15c..86ac5b38ec 100644 --- a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py +++ b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py @@ -39,7 +39,7 @@ class DistilBertMaskedLM(Task): This model will train DistilBERT on a masked language modeling task. The model will predict labels for a number of masked tokens in the input data. For usage of this model with pre-trained weights, see the - `from_preset()` method. + `from_preset()` constructor. This model can optionally be configured with a `preprocessor` layer, in which case inputs can be raw string features during `fit()`, `predict()`, @@ -60,26 +60,32 @@ class DistilBertMaskedLM(Task): Example usage: - Raw string inputs and pretrained backbone. + Raw string data. ```python - # Create a dataset with raw string features. Labels are inferred. features = ["The quick brown fox jumped.", "I forgot my homework."] - # Create a DistilBertMaskedLM with a pretrained backbone and further train - # on an MLM task. + # Pretrained language model. masked_lm = keras_nlp.models.DistilBertMaskedLM.from_preset( - "distil_bert_base_en", + "distil_bert_base_en_uncased", ) + masked_lm.fit(x=features, batch_size=2) + + # Re-compile (e.g., with a new learning rate). masked_lm.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + jit_compile=True, ) + # Access backbone programatically (e.g., to change `trainable`). + masked_lm.backbone.trainable = False + # Fit again. masked_lm.fit(x=features, batch_size=2) ``` - Preprocessed inputs and custom backbone. + Preprocessed integer data. ```python - # Create a preprocessed dataset where 0 is the mask token. - preprocessed_features = { + # Create preprocessed batch where 0 is the mask token. + features = { "token_ids": tf.constant( [[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8) ), @@ -91,24 +97,11 @@ class DistilBertMaskedLM(Task): # Labels are the original masked values. labels = [[3, 5]] * 2 - # Randomly initialize a DistilBERT encoder - backbone = keras_nlp.models.DistilBertBackbone( - vocabulary_size=50265, - num_layers=12, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=12 - ) - # Create a DistilBERT masked_lm and fit the data. - masked_lm = keras_nlp.models.DistilBertMaskedLM( - backbone, + masked_lm = keras_nlp.models.DistilBertMaskedLM.from_preset( + "distil_bert_base_en_uncased", preprocessor=None, ) - masked_lm.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - masked_lm.fit(x=preprocessed_features, y=labels, batch_size=2) + masked_lm.fit(x=features, y=labels, batch_size=2) ``` """ diff --git a/keras_nlp/models/distil_bert/distil_bert_masked_lm_preprocessor.py b/keras_nlp/models/distil_bert/distil_bert_masked_lm_preprocessor.py index 0fc647b88d..4ecc569d73 100644 --- a/keras_nlp/models/distil_bert/distil_bert_masked_lm_preprocessor.py +++ b/keras_nlp/models/distil_bert/distil_bert_masked_lm_preprocessor.py @@ -33,47 +33,89 @@ class DistilBertMaskedLMPreprocessor(DistilBertPreprocessor): `keras_nlp.models.DistilBertMaskedLM` task model. Preprocessing will occur in multiple steps. - - Tokenize any number of input segments using the `tokenizer`. - - Pack the inputs together using a `keras_nlp.layers.MultiSegmentPacker`. + 1. Tokenize any number of input segments using the `tokenizer`. + 2. Pack the inputs together using a `keras_nlp.layers.MultiSegmentPacker`. with the appropriate `"[CLS]"`, `"[SEP]"` and `"[PAD]"` tokens. - - Randomly select non-special tokens to mask, controlled by + 3. Randomly select non-special tokens to mask, controlled by `mask_selection_rate`. - - Construct a `(x, y, sample_weight)` tuple suitable for training with a + 4. Construct a `(x, y, sample_weight)` tuple suitable for training with a `keras_nlp.models.DistilBertMaskedLM` task model. + Args: + tokenizer: A `keras_nlp.models.DistilBertTokenizer` instance. + sequence_length: int. The length of the packed inputs. + truncate: string. The algorithm to truncate a list of batched segments + to fit within `sequence_length`. The value can be either + `round_robin` or `waterfall`: + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. + mask_selection_rate: float. The probability an input token will be + dynamically masked. + mask_selection_length: int. The maximum number of masked tokens + in a given sample. + mask_token_rate: float. The probability the a selected token will be + replaced with the mask token. + random_token_rate: float. The probability the a selected token will be + replaced with a random token from the vocabulary. A selected token + will be left as is with probability + `1 - mask_token_rate - random_token_rate`. + + Call arguments: + x: A tensor of single string sequences, or a tuple of multiple + tensor sequences to be packed together. Inputs may be batched or + unbatched. For single sequences, raw python inputs will be converted + to tensors. For multiple sequences, pass tensors directly. + y: Label data. Should always be `None` as the layer generates labels. + sample_weight: Label weights. Should always be `None` as the layer + generates label weights. + Examples: + + Directly calling the layer on data. ```python - # Load the preprocessor from a preset. preprocessor = keras_nlp.models.DistilBertMaskedLMPreprocessor.from_preset( - "distil_bert_base_en" + "distil_bert_base_en_uncased" ) # Tokenize and mask a single sentence. - sentence = tf.constant("The quick brown fox jumped.") - preprocessor(sentence) + preprocessor("The quick brown fox jumped.") - # Tokenize and mask a batch of sentences. - sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - preprocessor(sentences) + # Tokenize and mask a batch of single sentences. + preprocessor(["The quick brown fox jumped.", "Call me Ishmael."]) + + # Tokenize and mask sentence pairs. + # In this case, always convert input to tensors before calling the layer. + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + preprocessor((first, second)) + ``` - # Tokenize and mask a dataset of sentences. - features = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] + Mapping with `tf.data.Dataset`. + ```python + preprocessor = keras_nlp.models.DistilBertMaskedLMPreprocessor.from_preset( + "distil_bert_base_en_uncased" ) - ds = tf.data.Dataset.from_tensor_slices((features)) + + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + + # Map single sentences. + ds = tf.data.Dataset.from_tensor_slices(first) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Alternatively, you can create a preprocessor from your own vocabulary. - # The usage is exactly the same as above. - vocab = ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"] - vocab += ["The", "qu", "##ick", "br", "##own", "fox", "tripped"] - vocab += ["Call", "me", "Ish", "##mael", "."] - vocab += ["Oh", "look", "a", "whale"] - vocab += ["I", "forgot", "my", "home", "##work"] - tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) - preprocessor = keras_nlp.models.DistilBertMaskedLMPreprocessor(tokenizer) + # Map sentence pairs. + ds = tf.data.Dataset.from_tensor_slices((first, second)) + # Watch out for tf.data's default unpacking of tuples here! + # Best to invoke the `preprocessor` directly in this case. + ds = ds.map( + lambda first, second: preprocessor(x=(first, second)), + num_parallel_calls=tf.data.AUTOTUNE, + ) ``` """ diff --git a/keras_nlp/models/distil_bert/distil_bert_preprocessor.py b/keras_nlp/models/distil_bert/distil_bert_preprocessor.py index 725279288e..d65bf7582f 100644 --- a/keras_nlp/models/distil_bert/distil_bert_preprocessor.py +++ b/keras_nlp/models/distil_bert/distil_bert_preprocessor.py @@ -35,29 +35,16 @@ class DistilBertPreprocessor(Preprocessor): This preprocessing layer will do three things: - - Tokenize any number of input segments using the `tokenizer`. - - Pack the inputs together using a `keras_nlp.layers.MultiSegmentPacker`. + 1. Tokenize any number of input segments using the `tokenizer`. + 2. Pack the inputs together using a `keras_nlp.layers.MultiSegmentPacker`. with the appropriate `"[CLS]"`, `"[SEP]"` and `"[PAD]"` tokens. - - Construct a dictionary of with keys `"token_ids"` and `"padding_mask"`, + 3. Construct a dictionary of with keys `"token_ids"` and `"padding_mask"`, that can be passed directly to a DistilBERT model. This layer can be used directly with `tf.data.Dataset.map` to preprocess string data in the `(x, y, sample_weight)` format used by `keras.Model.fit`. - The call method of this layer accepts three arguments, `x`, `y`, and - `sample_weight`. `x` can be a python string or tensor representing a single - segment, a list of python strings representing a batch of single segments, - or a list of tensors representing multiple segments to be packed together. - `y` and `sample_weight` are both optional, can have any format, and will be - passed through unaltered. - - Special care should be taken when using `tf.data` to map over an unlabeled - tuple of string segments. `tf.data.Dataset.map` will unpack this tuple - directly into the call arguments of this layer, rather than forward all - argument to `x`. To handle this case, it is recommended to explicitly call - the layer, e.g. `ds.map(lambda seg1, seg2: preprocessor(x=(seg1, seg2)))`. - Args: tokenizer: A `keras_nlp.models.DistilBertTokenizer` instance. sequence_length: The length of the packed inputs. @@ -72,79 +59,61 @@ class DistilBertPreprocessor(Preprocessor): left-to-right manner and fills up the buckets until we run out of budget. It supports an arbitrary number of segments. + Call arguments: + x: A tensor of single string sequences, or a tuple of multiple + tensor sequences to be packed together. Inputs may be batched or + unbatched. For single sequences, raw python inputs will be converted + to tensors. For multiple sequences, pass tensors directly. + y: Any label data. Will be passed through unaltered. + sample_weight: Any label weight data. Will be passed through unaltered. + Examples: + + Directly calling the layer on data. ```python - # Load the preprocessor from a preset. - preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset("distil_bert_base_en_uncased") + preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset( + "distil_bert_base_en_uncased" + ) + preprocessor(["The quick brown fox jumped.", "Call me Ishmael."]) - # Tokenize and pack a single sentence. - sentence = tf.constant("The quick brown fox jumped.") - preprocessor(sentence) - # Same output. + # Custom vocabulary. + vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] + vocab += ["The", "quick", "brown", "fox", "jumped", "."] + tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) + preprocessor = keras_nlp.models.DistilBertPreprocessor(tokenizer) preprocessor("The quick brown fox jumped.") + ``` - # Tokenize and a batch of single sentences. - sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - preprocessor(sentences) - # Same output. - preprocessor( - ["The quick brown fox jumped.", "Call me Ishmael."] + Mapping with `tf.data.Dataset`. + ```python + preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset( + "distil_bert_base_en_uncased" ) - # Tokenize and pack a sentence pair. - first_sentence = tf.constant("The quick brown fox jumped.") - second_sentence = tf.constant("The fox tripped.") - preprocessor((first_sentence, second_sentence)) + first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) + second = tf.constant(["The fox tripped.", "Oh look, a whale."]) + label = tf.constant([1, 1]) + # Map labeled single sentences. + ds = tf.data.Dataset.from_tensor_slices((first, label)) + ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) + - # Map a dataset to preprocess a single sentence. - features = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - labels = tf.constant([0, 1]) - ds = tf.data.Dataset.from_tensor_slices((features, labels)) + # Map unlabeled single sentences. + ds = tf.data.Dataset.from_tensor_slices(first) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) - # Map a dataset to preprocess sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - labels = tf.constant([1, 1]) - ds = tf.data.Dataset.from_tensor_slices( - ( - (first_sentences, second_sentences), labels - ) - ) + # Map labeled sentence pairs. + ds = tf.data.Dataset.from_tensor_slices(((first, second), label)) ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) + # Map unlabeled sentence pairs. + ds = tf.data.Dataset.from_tensor_slices((first, second)) - # Map a dataset to preprocess unlabeled sentence pairs. - first_sentences = tf.constant( - ["The quick brown fox jumped.", "Call me Ishmael."] - ) - second_sentences = tf.constant( - ["The fox tripped.", "Oh look, a whale."] - ) - ds = tf.data.Dataset.from_tensor_slices((first_sentences, second_sentences)) # Watch out for tf.data's default unpacking of tuples here! # Best to invoke the `preprocessor` directly in this case. ds = ds.map( - lambda s1, s2: preprocessor(x=(s1, s2)), + lambda first, second: preprocessor(x=(first, second)), num_parallel_calls=tf.data.AUTOTUNE, ) - - # Alternatively, you can create a preprocessor from your own vocabulary. - # The usage is exactly the same as above. - vocab = ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"] - vocab += ["The", "qu", "##ick", "br", "##own", "fox", "tripped"] - vocab += ["Call", "me", "Ish", "##mael", "."] - vocab += ["Oh", "look", "a", "whale"] - vocab += ["I", "forgot", "my", "home", "##work"] - tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) - preprocessor = keras_nlp.models.DistilBertPreprocessor(tokenizer) ``` """ diff --git a/keras_nlp/models/distil_bert/distil_bert_tokenizer.py b/keras_nlp/models/distil_bert/distil_bert_tokenizer.py index 6a706a7316..f954925ce3 100644 --- a/keras_nlp/models/distil_bert/distil_bert_tokenizer.py +++ b/keras_nlp/models/distil_bert/distil_bert_tokenizer.py @@ -50,29 +50,22 @@ class DistilBertTokenizer(WordPieceTokenizer): Examples: - Batched input. - >>> vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] - >>> vocab += ["The", "qu", "##ick", "brown", "fox", "."] - >>> inputs = ["The quick brown fox.", "The fox."] - >>> tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) - >>> tokenizer(inputs) - - - Unbatched input. - >>> vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] - >>> vocab += ["The", "qu", "##ick", "brown", "fox", "."] - >>> inputs = "The fox." - >>> tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) - >>> tokenizer(inputs) - - - Detokenization. - >>> vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] - >>> vocab += ["The", "qu", "##ick", "brown", "fox", "."] - >>> inputs = "The quick brown fox." - >>> tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) - >>> tokenizer.detokenize(tokenizer.tokenize(inputs)).numpy().decode('utf-8') - 'The quick brown fox .' + ```python + # Unbatched input. + tokenizer = keras_nlp.models.DistilBertTokenizer.from_preset( + "distil_bert_base_en_uncased", + ) + tokenizer("The quick brown fox jumped.") + # Batched input. + tokenizer(["The quick brown fox jumped.", "The fox slept."]) + # Detokenization. + tokenizer.detokenize(tokenizer("The quick brown fox jumped.")) + # Custom vocabulary. + vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] + vocab += ["The", "quick", "brown", "fox", "jumped", "."] + tokenizer = keras_nlp.models.DistilBertTokenizer(vocabulary=vocab) + tokenizer("The quick brown fox jumped.") + ``` """ def __init__(