diff --git a/keras_nlp/src/layers/preprocessing/multi_segment_packer.py b/keras_nlp/src/layers/preprocessing/multi_segment_packer.py index 47b1ad756c..3ca11723ff 100644 --- a/keras_nlp/src/layers/preprocessing/multi_segment_packer.py +++ b/keras_nlp/src/layers/preprocessing/multi_segment_packer.py @@ -72,13 +72,13 @@ class MultiSegmentPacker(PreprocessingLayer): truncate: str. The algorithm to truncate a list of batched segments to fit a per-example length limit. The value can be either `"round_robin"` or `"waterfall"`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It support arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It support arbitrary number of segments. Returns: A tuple with two elements. The first is the dense, packed token diff --git a/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py b/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py index f40c6031fb..19312b2dd1 100644 --- a/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py +++ b/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py @@ -61,13 +61,13 @@ class AlbertMaskedLMPreprocessor(AlbertPreprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Examples: diff --git a/keras_nlp/src/models/albert/albert_preprocessor.py b/keras_nlp/src/models/albert/albert_preprocessor.py index e52144dc67..983cfb12e2 100644 --- a/keras_nlp/src/models/albert/albert_preprocessor.py +++ b/keras_nlp/src/models/albert/albert_preprocessor.py @@ -61,13 +61,13 @@ class AlbertPreprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Examples: Directly calling the layer on data. diff --git a/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py b/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py index 3af5b70561..704fdd9751 100644 --- a/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py +++ b/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py @@ -45,13 +45,13 @@ class BertMaskedLMPreprocessor(BertPreprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. mask_selection_rate: float. The probability an input token will be dynamically masked. mask_selection_length: int. The maximum number of masked tokens diff --git a/keras_nlp/src/models/bert/bert_preprocessor.py b/keras_nlp/src/models/bert/bert_preprocessor.py index 64b0acbeb6..3ea2af9edf 100644 --- a/keras_nlp/src/models/bert/bert_preprocessor.py +++ b/keras_nlp/src/models/bert/bert_preprocessor.py @@ -47,13 +47,13 @@ class BertPreprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Call arguments: x: A tensor of single string sequences, or a tuple of multiple diff --git a/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py b/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py index 31df4b894c..92b5f4ebf2 100644 --- a/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +++ b/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py @@ -63,13 +63,13 @@ class DebertaV3MaskedLMPreprocessor(DebertaV3Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Examples: Directly calling the layer on data. diff --git a/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py b/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py index c60cf25402..d99500eb05 100644 --- a/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py +++ b/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py @@ -63,13 +63,13 @@ class DebertaV3Preprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Examples: Directly calling the layer on data. diff --git a/keras_nlp/src/models/electra/electra_preprocessor.py b/keras_nlp/src/models/electra/electra_preprocessor.py index 7cac1a43b7..70148ce2ad 100644 --- a/keras_nlp/src/models/electra/electra_preprocessor.py +++ b/keras_nlp/src/models/electra/electra_preprocessor.py @@ -47,13 +47,13 @@ class ElectraPreprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Call arguments: x: A tensor of single string sequences, or a tuple of multiple diff --git a/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py b/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py index 6b1cafd3d4..9408213d01 100644 --- a/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py +++ b/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py @@ -60,13 +60,13 @@ class FNetMaskedLMPreprocessor(FNetPreprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Examples: diff --git a/keras_nlp/src/models/f_net/f_net_preprocessor.py b/keras_nlp/src/models/f_net/f_net_preprocessor.py index da1becb9d8..184ef43bea 100644 --- a/keras_nlp/src/models/f_net/f_net_preprocessor.py +++ b/keras_nlp/src/models/f_net/f_net_preprocessor.py @@ -48,13 +48,13 @@ class FNetPreprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Call arguments: x: A tensor of single string sequences, or a tuple of multiple diff --git a/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py b/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py index 1a14ded94c..4470873007 100644 --- a/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py +++ b/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py @@ -49,13 +49,13 @@ class RobertaMaskedLMPreprocessor(RobertaPreprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. mask_selection_rate: float. The probability an input token will be dynamically masked. mask_selection_length: int. The maximum number of masked tokens diff --git a/keras_nlp/src/models/roberta/roberta_preprocessor.py b/keras_nlp/src/models/roberta/roberta_preprocessor.py index 428acf8f2a..84ff9491ae 100644 --- a/keras_nlp/src/models/roberta/roberta_preprocessor.py +++ b/keras_nlp/src/models/roberta/roberta_preprocessor.py @@ -50,13 +50,13 @@ class RobertaPreprocessor(Preprocessor): truncate: string. The algorithm to truncate a list of batched segments to fit within `sequence_length`. The value can be either `round_robin` or `waterfall`: - - `"round_robin"`: Available space is assigned one token at a - time in a round-robin fashion to the inputs that still need - some, until the limit is reached. - - `"waterfall"`: The allocation of the budget is done using a - "waterfall" algorithm that allocates quota in a - left-to-right manner and fills up the buckets until we run - out of budget. It supports an arbitrary number of segments. + - `"round_robin"`: Available space is assigned one token at a + time in a round-robin fashion to the inputs that still need + some, until the limit is reached. + - `"waterfall"`: The allocation of the budget is done using a + "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run + out of budget. It supports an arbitrary number of segments. Call arguments: x: A tensor of single string sequences, or a tuple of multiple