-
Notifications
You must be signed in to change notification settings - Fork 301
Improve docstring of Albert #862
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,10 +31,10 @@ | |
class AlbertClassifier(Task): | ||
"""An end-to-end ALBERT model for classification tasks | ||
|
||
This model attaches a classification head to a `keras_nlp.model.AlbertBackbone` | ||
backbone, mapping from the backbone outputs to logit output suitable for | ||
a classification task. For usage of this model with pre-trained weights, see | ||
the `from_preset()` method. | ||
This model attaches a classification head to a | ||
`keras_nlp.model.AlbertBackbone` instance, mapping from the backbone outputs to logit output suitable for | ||
a classification task. For usage of this model with pre-trained weights, use | ||
the `from_preset()` constructor. | ||
|
||
This model can optionally be configured with a `preprocessor` layer, in | ||
which case it will automatically apply preprocessing to raw inputs during | ||
|
@@ -55,49 +55,8 @@ class AlbertClassifier(Task): | |
|
||
Examples: | ||
|
||
Example usage. | ||
Raw string data. | ||
```python | ||
# Define the preprocessed inputs. | ||
preprocessed_features = { | ||
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), | ||
"segment_ids": tf.constant( | ||
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) | ||
), | ||
"padding_mask": tf.constant( | ||
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) | ||
), | ||
} | ||
labels = [0, 3] | ||
|
||
# Randomly initialize a ALBERT backbone. | ||
backbone = AlbertBackbone( | ||
vocabulary_size=1000, | ||
num_layers=2, | ||
num_heads=2, | ||
embedding_dim=8, | ||
hidden_dim=64, | ||
intermediate_dim=128, | ||
max_sequence_length=128, | ||
name="encoder", | ||
) | ||
|
||
# Create a ALBERT classifier and fit your data. | ||
classifier = keras_nlp.models.AlbertClassifier( | ||
backbone, | ||
num_classes=4, | ||
preprocessor=None, | ||
) | ||
classifier.compile( | ||
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), | ||
) | ||
classifier.fit(x=preprocessed_features, y=labels, batch_size=2) | ||
|
||
# Access backbone programatically (e.g., to change `trainable`) | ||
classifier.backbone.trainable = False | ||
|
||
Raw string inputs with customized preprocessing. | ||
```python | ||
# Create a dataset with raw string features in an `(x, y)` format. | ||
features = ["The quick brown fox jumped.", "I forgot my homework."] | ||
labels = [0, 3] | ||
|
||
|
@@ -107,19 +66,25 @@ class AlbertClassifier(Task): | |
sequence_length=128, | ||
) | ||
|
||
# Create a AlbertClassifier and fit your data. | ||
# Pretrained classifier. | ||
classifier = keras_nlp.models.AlbertClassifier.from_preset( | ||
"albert_base_en_uncased", | ||
num_classes=4, | ||
preprocessor=preprocessor, | ||
) | ||
classifier.fit(x=features, y=labels, batch_size=2) | ||
classifier.predict(x=features, batch_size=2) | ||
|
||
# Re-compile (e.g., with a new learning rate). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you are missing in the bert classifier example where we run fit before "re compiling" |
||
classifier.compile( | ||
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), | ||
optimizer=keras.optimizers.Adam(5e-5), | ||
jit_compile=True, | ||
) | ||
classifier.fit(x=features, y=labels, batch_size=2) | ||
``` | ||
|
||
Preprocessed inputs. | ||
Preprocessed integer data. | ||
```python | ||
# Create a dataset with preprocessed features in an `(x, y)` format. | ||
preprocessed_features = { | ||
|
@@ -133,17 +98,42 @@ class AlbertClassifier(Task): | |
} | ||
labels = [0, 3] | ||
|
||
# Create a ALBERT classifier and fit your data. | ||
# Pretrained classifier without preprocessing. | ||
classifier = keras_nlp.models.AlbertClassifier.from_preset( | ||
"albert_base_en_uncased", | ||
num_classes=4, | ||
preprocessor=None, | ||
) | ||
classifier.compile( | ||
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), | ||
) | ||
classifier.fit(x=preprocessed_features, y=labels, batch_size=2) | ||
``` | ||
|
||
Custom backbone and vocabulary. | ||
```python | ||
features = ["The quick brown fox jumped.", "I forgot my homework."] | ||
labels = [0, 3] | ||
vocab = ["[CLS]", "[SEP]","[UNK]", "[PAD]", "[MASK]"] | ||
vocab += ["The", "quick", "brown", "fox", "jumped", "."] | ||
tokenizer = keras_nlp.models.AlbertTokenizer( | ||
vocabulary=vocab, | ||
) | ||
preprocessor = keras_nlp.models.AlbertPreprocessor( | ||
tokenizer=tokenizer, | ||
sequence_length=128, | ||
) | ||
backbone = keras_nlp.models.AlbertBackbone( | ||
vocabulary_size=30552, | ||
num_layers=4, | ||
num_heads=4, | ||
hidden_dim=256, | ||
intermediate_dim=512, | ||
max_sequence_length=128, | ||
) | ||
classifier = keras_nlp.models.AlbertClassifier( | ||
backbone=backbone, | ||
preprocessor=preprocessor, | ||
num_classes=4, | ||
) | ||
classifier.fit(x=features, y=labels, batch_size=2) | ||
""" | ||
|
||
def __init__( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,14 +31,14 @@ class AlbertMaskedLMPreprocessor(AlbertPreprocessor): | |
`keras_nlp.models.AlbertMaskedLM` task model. Preprocessing will occur in | ||
multiple steps. | ||
|
||
- Tokenize any number of input segments using the `tokenizer`. | ||
- Pack the inputs together with the appropriate `"<s>"`, `"</s>"` and | ||
1. Tokenize any number of input segments using the `tokenizer`. | ||
2. Pack the inputs together with the appropriate `"<s>"`, `"</s>"` and | ||
`"<pad>"` tokens, i.e., adding a single `"<s>"` at the start of the | ||
entire sequence, `"</s></s>"` between each segment, | ||
and a `"</s>"` at the end of the entire sequence. | ||
- Randomly select non-special tokens to mask, controlled by | ||
3. Randomly select non-special tokens to mask, controlled by | ||
`mask_selection_rate`. | ||
- Construct a `(x, y, sample_weight)` tuple suitable for training with a | ||
4. Construct a `(x, y, sample_weight)` tuple suitable for training with a | ||
`keras_nlp.models.AlbertMaskedLM` task model. | ||
|
||
Args: | ||
|
@@ -68,6 +68,15 @@ class AlbertMaskedLMPreprocessor(AlbertPreprocessor): | |
left-to-right manner and fills up the buckets until we run | ||
out of budget. It supports an arbitrary number of segments. | ||
|
||
Call arguments: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove indent (should align with Args) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment has not been addressed, please fix it, thanks! |
||
x: A tensor of single string sequences, or a tuple of multiple | ||
tensor sequences to be packed together. Inputs may be batched or | ||
unbatched. For single sequences, raw python inputs will be converted | ||
to tensors. For multiple sequences, pass tensors directly. | ||
y: Label data. Should always be `None` as the layer generates labels. | ||
sample_weight: Label weights. Should always be `None` as the layer | ||
generates label weights. | ||
|
||
Examples: | ||
```python | ||
# Load the preprocessor from a preset. | ||
|
@@ -76,54 +85,37 @@ class AlbertMaskedLMPreprocessor(AlbertPreprocessor): | |
) | ||
|
||
# Tokenize and mask a single sentence. | ||
sentence = tf.constant("The quick brown fox jumped.") | ||
preprocessor(sentence) | ||
preprocessor("The quick brown fox jumped.") | ||
|
||
# Tokenize and mask a batch of sentences. | ||
sentences = tf.constant( | ||
["The quick brown fox jumped.", "Call me Ishmael."] | ||
) | ||
preprocessor(sentences) | ||
preprocessor("The quick brown fox jumped.", "Call me Ishmael.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. inner args should be in a list There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one too - |
||
|
||
# Tokenize and mask a dataset of sentences. | ||
features = tf.constant( | ||
["The quick brown fox jumped.", "Call me Ishmael."] | ||
) | ||
ds = tf.data.Dataset.from_tensor_slices((features)) | ||
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) | ||
|
||
# Alternatively, you can create a preprocessor from your own vocabulary. | ||
vocab_data = tf.data.Dataset.from_tensor_slices( | ||
["the quick brown fox", "the earth is round"] | ||
) | ||
# Tokenize and mask sentence pairs. | ||
# In this case, always convert input to tensors before calling the layer. | ||
first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) | ||
second = tf.constant(["The fox tripped.", "Oh look, a whale."]) | ||
preprocessor((first, second)) | ||
``` | ||
|
||
# Creating sentencepiece tokenizer for ALBERT LM preprocessor | ||
bytes_io = io.BytesIO() | ||
|
||
sentencepiece.SentencePieceTrainer.train( | ||
sentence_iterator=vocab_data.as_numpy_iterator(), | ||
model_writer=bytes_io, | ||
vocab_size=12, | ||
model_type="WORD", | ||
pad_id=0, | ||
unk_id=1, | ||
bos_id=2, | ||
eos_id=3, | ||
pad_piece="<pad>", | ||
unk_piece="<unk>", | ||
bos_piece="[CLS]", | ||
eos_piece="[SEP]", | ||
user_defined_symbols="[MASK]" | ||
Mapping with `tf.data.Dataset`. | ||
```python | ||
preprocessor = keras_nlp.models.BertMaskedLMPreprocessor.from_preset( | ||
"albert_base_en_uncased" | ||
) | ||
|
||
proto = bytes_io.getvalue() | ||
|
||
tokenizer = keras_nlp.models.AlbertTokenizer(proto=proto) | ||
|
||
preprocessor = keras_nlp.models.AlbertMaskedLMPreprocessor( | ||
tokenizer=tokenizer | ||
first = tf.constant(["The quick brown fox jumped.", "Call me Ishmael."]) | ||
second = tf.constant(["The fox tripped.", "Oh look, a whale."]) | ||
# Map single sentences. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. take a look at the source example, but you removed all the empty newlines, please add them back |
||
ds = tf.data.Dataset.from_tensor_slices(first) | ||
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE) | ||
# Map sentence pairs. | ||
ds = tf.data.Dataset.from_tensor_slices((first, second)) | ||
# Watch out for tf.data's default unpacking of tuples here! | ||
# Best to invoke the `preprocessor` directly in this case. | ||
ds = ds.map( | ||
lambda first, second: preprocessor(x=(first, second)), | ||
num_parallel_calls=tf.data.AUTOTUNE, | ||
) | ||
|
||
``` | ||
""" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's remove the linebreak here, it's okay to exceed the limit if it's a hyperlink.