Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions keras_nlp/models/bert/bert_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ def bert_kernel_initializer(stddev=0.02):

@keras_nlp_export("keras_nlp.models.BertBackbone")
class BertBackbone(Backbone):
"""BERT encoder network.
"""A BERT encoder network.

This class implements a bi-directional Transformer-based encoder as
described in ["BERT: Pre-training of Deep Bidirectional Transformers for
Language Understanding"](https://arxiv.org/abs/1810.04805). It includes the
embedding lookups and transformer layers, but not the masked language model
or next sentence prediction heads.

The default constructor gives a fully customizable, randomly initialized BERT
encoder with any number of layers, heads, and embedding dimensions. To load
preset architectures and weights, use the `from_preset` constructor.
The default constructor gives a fully customizable, randomly initialized
BERT encoder with any number of layers, heads, and embedding dimensions. To
load preset architectures and weights, use the `from_preset()` constructor.

Disclaimer: Pre-trained models are provided on an "as is" basis, without
warranties or conditions of any kind.
Expand Down Expand Up @@ -76,20 +76,20 @@ class BertBackbone(Backbone):
),
}

# Pretrained BERT encoder
# Pretrained BERT encoder.
model = keras_nlp.models.BertBackbone.from_preset("bert_base_en_uncased")
output = model(input_data)
model(input_data)

# Randomly initialized BERT encoder with a custom config
# Randomly initialized BERT encoder with a custom config.
model = keras_nlp.models.BertBackbone(
vocabulary_size=30552,
num_layers=12,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=12,
num_layers=4,
num_heads=4,
hidden_dim=256,
intermediate_dim=512,
max_sequence_length=128,
)
output = model(input_data)
model(input_data)
```
"""

Expand Down
123 changes: 47 additions & 76 deletions keras_nlp/models/bert/bert_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@

@keras_nlp_export("keras_nlp.models.BertClassifier")
class BertClassifier(Task):
"""An end-to-end BERT model for classification tasks
"""An end-to-end BERT model for classification tasks.

This model attaches a classification head to a `keras_nlp.model.BertBackbone`
backbone, mapping from the backbone outputs to logit output suitable for
a classification task. For usage of this model with pre-trained weights, see
the `from_preset()` method.
This model attaches a classification head to a
`keras_nlp.model.BertBackbone` instance, mapping from the backbone outputs
to logits suitable for a classification task. For usage of this model with
pre-trained weights, use the `from_preset()` constructor.

This model can optionally be configured with a `preprocessor` layer, in
which case it will automatically apply preprocessing to raw inputs during
Expand All @@ -56,90 +56,34 @@ class BertClassifier(Task):

Examples:

Example usage.
Raw string data.
```python
# Define the preprocessed inputs.
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"segment_ids": tf.constant(
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
}
labels = [0, 3]

# Randomly initialize a BERT backbone.
backbone = keras_nlp.models.BertBackbone(
vocabulary_size=30552,
num_layers=12,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=12
)

# Create a BERT classifier and fit your data.
classifier = keras_nlp.models.BertClassifier(
backbone,
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)

# Access backbone programatically (e.g., to change `trainable`)
classifier.backbone.trainable = False
```

Raw string inputs.
```python
# Create a dataset with raw string features in an `(x, y)` format.
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

# Create a BertClassifier and fit your data.
# Pretrained classifier.
classifier = keras_nlp.models.BertClassifier.from_preset(
"bert_base_en_uncased",
num_classes=4,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=features, y=labels, batch_size=2)
```

Raw string inputs with customized preprocessing.
```python
# Create a dataset with raw string features in an `(x, y)` format.
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

# Use a shorter sequence length.
preprocessor = keras_nlp.models.BertPreprocessor.from_preset(
"bert_base_en_uncased",
sequence_length=128,
)
classifier.predict(x=features, batch_size=2)

# Create a BertClassifier and fit your data.
classifier = keras_nlp.models.BertClassifier.from_preset(
"bert_base_en_uncased",
num_classes=4,
preprocessor=preprocessor,
)
# Re-compile (e.g., with a new learning rate).
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(5e-5),
jit_compile=True,
)
# Access backbone programatically (e.g., to change `trainable`).
classifier.backbone.trainable = False
# Fit again.
classifier.fit(x=features, y=labels, batch_size=2)
```

Preprocessed inputs.
Preprocessed integer data.
```python
# Create a dataset with preprocessed features in an `(x, y)` format.
preprocessed_features = {
features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"segment_ids": tf.constant(
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
Expand All @@ -150,16 +94,43 @@ class BertClassifier(Task):
}
labels = [0, 3]

# Create a BERT classifier and fit your data.
# Pretrained classifier without preprocessing.
classifier = keras_nlp.models.BertClassifier.from_preset(
"bert_base_en_uncased",
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
classifier.fit(x=features, y=labels, batch_size=2)
```

Custom backbone and vocabulary.
```python
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

vocab = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
vocab += ["The", "quick", "brown", "fox", "jumped", "."]
tokenizer = keras_nlp.models.BertTokenizer(
vocabulary=vocab,
)
preprocessor = keras_nlp.models.BertPreprocessor(
tokenizer=tokenizer,
sequence_length=128,
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
backbone = keras_nlp.models.BertBackbone(
vocabulary_size=30552,
num_layers=4,
num_heads=4,
hidden_dim=256,
intermediate_dim=512,
max_sequence_length=128,
)
classifier = keras_nlp.models.BertClassifier(
backbone=backbone,
preprocessor=preprocessor,
num_classes=4,
)
classifier.fit(x=features, y=labels, batch_size=2)
```
"""

Expand Down
43 changes: 18 additions & 25 deletions keras_nlp/models/bert/bert_masked_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class BertMaskedLM(Task):
This model will train BERT on a masked language modeling task.
The model will predict labels for a number of masked tokens in the
input data. For usage of this model with pre-trained weights, see the
`from_preset()` method.
`from_preset()` constructor.

This model can optionally be configured with a `preprocessor` layer, in
which case inputs can be raw string features during `fit()`, `predict()`,
Expand All @@ -56,26 +56,32 @@ class BertMaskedLM(Task):

Example usage:

Raw string inputs and pretrained backbone.
Raw string data.
```python
# Create a dataset with raw string features. Labels are inferred.
features = ["The quick brown fox jumped.", "I forgot my homework."]

# Create a BertMaskedLM with a pretrained backbone and further train
# on an MLM task.
# Pretrained language model.
masked_lm = keras_nlp.models.BertMaskedLM.from_preset(
"bert_base_en",
"bert_base_en_uncased",
)
masked_lm.fit(x=features, batch_size=2)

# Re-compile (e.g., with a new learning rate).
masked_lm.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(5e-5),
jit_compile=True,
)
# Access backbone programatically (e.g., to change `trainable`).
masked_lm.backbone.trainable = False
# Fit again.
masked_lm.fit(x=features, batch_size=2)
```

Preprocessed inputs and custom backbone.
Preprocessed integer data.
```python
# Create a preprocessed dataset where 0 is the mask token.
preprocessed_features = {
# Create preprocessed batch where 0 is the mask token.
features = {
"token_ids": tf.constant(
[[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8)
),
Expand All @@ -88,24 +94,11 @@ class BertMaskedLM(Task):
# Labels are the original masked values.
labels = [[3, 5]] * 2

# Randomly initialize a BERT encoder
backbone = keras_nlp.models.BertBackbone(
vocabulary_size=50265,
num_layers=12,
num_heads=12,
hidden_dim=768,
intermediate_dim=3072,
max_sequence_length=12
)
# Create a BERT masked LM model and fit the data.
masked_lm = keras_nlp.models.BertMaskedLM(
backbone,
masked_lm = keras_nlp.models.BertMaskedLM.from_preset(
"bert_base_en_uncased",
preprocessor=None,
)
masked_lm.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
masked_lm.fit(x=preprocessed_features, y=labels, batch_size=2)
masked_lm.fit(x=features, y=labels, batch_size=2)
```
"""

Expand Down
Loading