diff --git a/keras_nlp/src/layers/preprocessing/multi_segment_packer.py b/keras_nlp/src/layers/preprocessing/multi_segment_packer.py
index 47b1ad756c..3ca11723ff 100644
--- a/keras_nlp/src/layers/preprocessing/multi_segment_packer.py
+++ b/keras_nlp/src/layers/preprocessing/multi_segment_packer.py
@@ -72,13 +72,13 @@ class MultiSegmentPacker(PreprocessingLayer):
         truncate: str. The algorithm to truncate a list of batched segments to
             fit a per-example length limit. The value can be either
             `"round_robin"` or `"waterfall"`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It support arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It support arbitrary number of segments.
 
     Returns:
         A tuple with two elements. The first is the dense, packed token
diff --git a/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py b/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py
index f40c6031fb..19312b2dd1 100644
--- a/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py
+++ b/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py
@@ -61,13 +61,13 @@ class AlbertMaskedLMPreprocessor(AlbertPreprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Examples:
 
diff --git a/keras_nlp/src/models/albert/albert_preprocessor.py b/keras_nlp/src/models/albert/albert_preprocessor.py
index e52144dc67..983cfb12e2 100644
--- a/keras_nlp/src/models/albert/albert_preprocessor.py
+++ b/keras_nlp/src/models/albert/albert_preprocessor.py
@@ -61,13 +61,13 @@ class AlbertPreprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Examples:
     Directly calling the layer on data.
diff --git a/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py b/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py
index 3af5b70561..704fdd9751 100644
--- a/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py
+++ b/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py
@@ -45,13 +45,13 @@ class BertMaskedLMPreprocessor(BertPreprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
         mask_selection_rate: float. The probability an input token will be
             dynamically masked.
         mask_selection_length: int. The maximum number of masked tokens
diff --git a/keras_nlp/src/models/bert/bert_preprocessor.py b/keras_nlp/src/models/bert/bert_preprocessor.py
index 64b0acbeb6..3ea2af9edf 100644
--- a/keras_nlp/src/models/bert/bert_preprocessor.py
+++ b/keras_nlp/src/models/bert/bert_preprocessor.py
@@ -47,13 +47,13 @@ class BertPreprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Call arguments:
         x: A tensor of single string sequences, or a tuple of multiple
diff --git a/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py b/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py
index 31df4b894c..92b5f4ebf2 100644
--- a/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py
+++ b/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py
@@ -63,13 +63,13 @@ class DebertaV3MaskedLMPreprocessor(DebertaV3Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Examples:
     Directly calling the layer on data.
diff --git a/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py b/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py
index c60cf25402..d99500eb05 100644
--- a/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py
+++ b/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py
@@ -63,13 +63,13 @@ class DebertaV3Preprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Examples:
     Directly calling the layer on data.
diff --git a/keras_nlp/src/models/electra/electra_preprocessor.py b/keras_nlp/src/models/electra/electra_preprocessor.py
index 7cac1a43b7..70148ce2ad 100644
--- a/keras_nlp/src/models/electra/electra_preprocessor.py
+++ b/keras_nlp/src/models/electra/electra_preprocessor.py
@@ -47,13 +47,13 @@ class ElectraPreprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Call arguments:
         x: A tensor of single string sequences, or a tuple of multiple
diff --git a/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py b/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py
index 6b1cafd3d4..9408213d01 100644
--- a/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py
+++ b/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py
@@ -60,13 +60,13 @@ class FNetMaskedLMPreprocessor(FNetPreprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Examples:
 
diff --git a/keras_nlp/src/models/f_net/f_net_preprocessor.py b/keras_nlp/src/models/f_net/f_net_preprocessor.py
index da1becb9d8..184ef43bea 100644
--- a/keras_nlp/src/models/f_net/f_net_preprocessor.py
+++ b/keras_nlp/src/models/f_net/f_net_preprocessor.py
@@ -48,13 +48,13 @@ class FNetPreprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Call arguments:
         x: A tensor of single string sequences, or a tuple of multiple
diff --git a/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py b/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py
index 1a14ded94c..4470873007 100644
--- a/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py
+++ b/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py
@@ -49,13 +49,13 @@ class RobertaMaskedLMPreprocessor(RobertaPreprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
         mask_selection_rate: float. The probability an input token will be
             dynamically masked.
         mask_selection_length: int. The maximum number of masked tokens
diff --git a/keras_nlp/src/models/roberta/roberta_preprocessor.py b/keras_nlp/src/models/roberta/roberta_preprocessor.py
index 428acf8f2a..84ff9491ae 100644
--- a/keras_nlp/src/models/roberta/roberta_preprocessor.py
+++ b/keras_nlp/src/models/roberta/roberta_preprocessor.py
@@ -50,13 +50,13 @@ class RobertaPreprocessor(Preprocessor):
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
             `round_robin` or `waterfall`:
-                - `"round_robin"`: Available space is assigned one token at a
-                    time in a round-robin fashion to the inputs that still need
-                    some, until the limit is reached.
-                - `"waterfall"`: The allocation of the budget is done using a
-                    "waterfall" algorithm that allocates quota in a
-                    left-to-right manner and fills up the buckets until we run
-                    out of budget. It supports an arbitrary number of segments.
+            - `"round_robin"`: Available space is assigned one token at a
+                time in a round-robin fashion to the inputs that still need
+                some, until the limit is reached.
+            - `"waterfall"`: The allocation of the budget is done using a
+                "waterfall" algorithm that allocates quota in a
+                left-to-right manner and fills up the buckets until we run
+                out of budget. It supports an arbitrary number of segments.
 
     Call arguments:
         x: A tensor of single string sequences, or a tuple of multiple