Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions keras_nlp/models/distil_bert/distil_bert_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def distilbert_kernel_initializer(stddev=0.02):

@keras_nlp_export("keras_nlp.models.DistilBertBackbone")
class DistilBertBackbone(Backbone):
"""DistilBERT encoder network.
"""A DistilBERT encoder network.

This network implements a bi-directional Transformer-based encoder as
described in ["DistilBERT, a distilled version of BERT: smaller, faster,
Expand All @@ -45,8 +45,8 @@ class DistilBertBackbone(Backbone):

The default constructor gives a fully customizable, randomly initialized
DistilBERT encoder with any number of layers, heads, and embedding
dimensions. To load preset architectures and weights, use the `from_preset`
constructor.
dimensions. To load preset architectures and weights, use the
`from_preset()` constructor.

Disclaimer: Pre-trained models are provided on an "as is" basis, without
warranties or conditions of any kind. The underlying model is provided by a
Expand Down Expand Up @@ -76,22 +76,22 @@ class DistilBertBackbone(Backbone):
),
}

# Pretrained DistilBERT encoder
# Pretrained DistilBERT encoder.
model = keras_nlp.models.DistilBertBackbone.from_preset(
"distil_bert_base_en_uncased"
)
output = model(input_data)
model(input_data)

# Randomly initialized DistilBERT encoder with custom config
# Randomly initialized DistilBERT encoder with custom config.
model = keras_nlp.models.DistilBertBackbone(
vocabulary_size=30552,
num_layers=6,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=12,
num_layers=4,
num_heads=4,
hidden_dim=256,
intermediate_dim=512,
max_sequence_length=128,
)
output = model(input_data)
model(input_data)
```
"""

Expand Down
113 changes: 45 additions & 68 deletions keras_nlp/models/distil_bert/distil_bert_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class DistilBertClassifier(Task):
"""An end-to-end DistilBERT model for classification tasks.

This model attaches a classification head to a
`keras_nlp.model.DistilBertBackbone` model, mapping from the backbone
outputs to logit output suitable for a classification task. For usage of
this model with pre-trained weights, see the `from_preset()` method.
`keras_nlp.model.DistilBertBackbone` instance, mapping from the backbone
outputs to logits suitable for a classification task. For usage of
this model with pre-trained weights, see the `from_preset()` constructor.

This model can optionally be configured with a `preprocessor` layer, in
which case it will automatically apply preprocessing to raw inputs during
Expand All @@ -62,60 +62,8 @@ class DistilBertClassifier(Task):

Examples:

Example usage.
Raw string data.
```python
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)),
}
labels = [0, 3]

# Randomly initialized DistilBertBackbone
backbone = keras_nlp.models.DistilBertBackbone(
vocabulary_size=30552,
num_layers=6,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=512
)

# Create a DistilBertClassifier and fit your data.
classifier = keras_nlp.models.DistilBertClassifier(
backbone,
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)

# Access backbone programatically (e.g., to change `trainable`)
classifier.backbone.trainable = False
```

Raw string inputs.
```python
# Create a dataset with raw string features in an `(x, y)` format.
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

# Create a DistilBertClassifier and fit your data.
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
"distil_bert_base_en_uncased",
num_classes=4,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=features, y=labels, batch_size=2)
```

Raw string inputs with customized preprocessing.
```python
# Create a dataset with raw string features in an `(x, y)` format.
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

Expand All @@ -124,43 +72,72 @@ class DistilBertClassifier(Task):
"distil_bert_base_en_uncased",
sequence_length=128,
)
# Create a DistilBertClassifier and fit your data.
# Pretrained classifier.
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
"distil_bert_base_en_uncased",
num_classes=4,
preprocessor=preprocessor,
)
classifier.fit(x=features, y=labels, batch_size=2)

# Re-compile (e.g., with a new learning rate)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(5e-5),
jit_compile=True,
)
# Access backbone programatically (e.g., to change `trainable`).
classifier.backbone.trainable = False
# Fit again.
classifier.fit(x=features, y=labels, batch_size=2)
```

Preprocessed inputs.
Preprocessed integer data.
```python
# Create a dataset with preprocessed features in an `(x, y)` format.
preprocessed_features = {
features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"segment_ids": tf.constant(
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
}
labels = [0, 3]

# Create a DistilBERT classifier and fit your data.
# Pretrained classifier without preprocessing.
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
"distil_bert_base_en_uncased",
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
classifier.fit(x=features, y=labels, batch_size=2)
```

Custom backbone and vocabulary.
```python
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]
vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
vocab += ["The", "quick", "brown", "fox", "jumped", "."]
tokenizer = keras_nlp.models.DistilBertTokenizer(
vocabulary=vocab,
)
preprocessor = keras_nlp.models.DistilBertPreprocessor(
tokenizer=tokenizer,
sequence_length=128,
)
backbone = keras_nlp.models.DistilBertBackbone(
vocabulary_size=30552,
num_layers=4,
num_heads=4,
hidden_dim=256,
intermediate_dim=512,
max_sequence_length=128,
)
classifier = keras_nlp.models.DistilBertClassifier(
backbone=backbone,
preprocessor=preprocessor,
num_classes=4,
)
classifier.fit(x=features, y=labels, batch_size=2)
"""

def __init__(
Expand Down
43 changes: 18 additions & 25 deletions keras_nlp/models/distil_bert/distil_bert_masked_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class DistilBertMaskedLM(Task):
This model will train DistilBERT on a masked language modeling task.
The model will predict labels for a number of masked tokens in the
input data. For usage of this model with pre-trained weights, see the
`from_preset()` method.
`from_preset()` constructor.

This model can optionally be configured with a `preprocessor` layer, in
which case inputs can be raw string features during `fit()`, `predict()`,
Expand All @@ -60,26 +60,32 @@ class DistilBertMaskedLM(Task):

Example usage:

Raw string inputs and pretrained backbone.
Raw string data.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still needs some updates to match the new style.

```python
# Create a dataset with raw string features. Labels are inferred.
features = ["The quick brown fox jumped.", "I forgot my homework."]

# Create a DistilBertMaskedLM with a pretrained backbone and further train
# on an MLM task.
# Pretrained language model.
masked_lm = keras_nlp.models.DistilBertMaskedLM.from_preset(
"distil_bert_base_en",
"distil_bert_base_en_uncased",
)
masked_lm.fit(x=features, batch_size=2)

# Re-compile (e.g., with a new learning rate).
masked_lm.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(5e-5),
jit_compile=True,
)
# Access backbone programatically (e.g., to change `trainable`).
masked_lm.backbone.trainable = False
# Fit again.
masked_lm.fit(x=features, batch_size=2)
```

Preprocessed inputs and custom backbone.
Preprocessed integer data.
```python
# Create a preprocessed dataset where 0 is the mask token.
preprocessed_features = {
# Create preprocessed batch where 0 is the mask token.
features = {
"token_ids": tf.constant(
[[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8)
),
Expand All @@ -91,24 +97,11 @@ class DistilBertMaskedLM(Task):
# Labels are the original masked values.
labels = [[3, 5]] * 2

# Randomly initialize a DistilBERT encoder
backbone = keras_nlp.models.DistilBertBackbone(
vocabulary_size=50265,
num_layers=12,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=12
)
# Create a DistilBERT masked_lm and fit the data.
masked_lm = keras_nlp.models.DistilBertMaskedLM(
backbone,
masked_lm = keras_nlp.models.DistilBertMaskedLM.from_preset(
"distil_bert_base_en_uncased",
preprocessor=None,
)
masked_lm.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
masked_lm.fit(x=preprocessed_features, y=labels, batch_size=2)
masked_lm.fit(x=features, y=labels, batch_size=2)
```
"""

Expand Down
Loading