From 9cad896d634ac1fe73909f02a73dbc0688427d5d Mon Sep 17 00:00:00 2001 From: Warlord-K Date: Mon, 20 Mar 2023 21:16:37 +0530 Subject: [PATCH 1/3] Custom vocab from true vocab --- keras_nlp/models/opt/opt_backbone.py | 18 +++++++++--------- keras_nlp/models/opt/opt_tokenizer.py | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/keras_nlp/models/opt/opt_backbone.py b/keras_nlp/models/opt/opt_backbone.py index 6efd867069..5938b4ca27 100644 --- a/keras_nlp/models/opt/opt_backbone.py +++ b/keras_nlp/models/opt/opt_backbone.py @@ -35,13 +35,13 @@ def opt_kernel_initializer(stddev=0.02): @keras_nlp_export("keras_nlp.models.OPTBackbone") class OPTBackbone(Backbone): - """OPT decoder network. + """A OPT decoder network. This class implements a Transformer-based decoder model as described in ["OPT: Open Pre-trained Transformer Language Models"](https://arxiv.org/abs/2205.01068). The default constructor gives a fully customizable, randomly initialized OPT model with any number of layers, heads, and embedding dimensions. To load - preset architectures and weights, use the `from_preset` constructor. + preset architectures and weights, use the `from_preset()` constructor. Disclaimer: Pre-trained models are provided on an "as is" basis, without warranties or conditions of any kind. The underlying model is provided by a @@ -73,18 +73,18 @@ class OPTBackbone(Backbone): # Pretrained OPT decoder model = keras_nlp.models.OPTBackbone.from_preset("opt_125m_en") - output = model(input_data) + model(input_data) # Randomly initialized OPT decoder model with a custom config model = keras_nlp.models.OPTBackbone( vocabulary_size=50265, - num_layers=6, - num_heads=12, - hidden_dim=768, - intermediate_dim=3072, - max_sequence_length=12, + num_layers=4, + num_heads=4, + hidden_dim=256, + intermediate_dim=512, + max_sequence_length=128, ) - output = model(input_data) + model(input_data) ``` """ diff --git a/keras_nlp/models/opt/opt_tokenizer.py b/keras_nlp/models/opt/opt_tokenizer.py index f03329796e..79e978c32a 100644 --- a/keras_nlp/models/opt/opt_tokenizer.py +++ b/keras_nlp/models/opt/opt_tokenizer.py @@ -50,7 +50,7 @@ class OPTTokenizer(BytePairTokenizer): Examples: Batched inputs. - >>> vocab = {"": 1, "": 2, "a": 3, "Ġquick": 4, "Ġfox": 5} + >>> vocab = {"": 1, "": 2, "a": 102, "Ġquick": 2119, "Ġfox": 23602} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( @@ -58,10 +58,10 @@ class OPTTokenizer(BytePairTokenizer): ... merges=merges, ... ) >>> tokenizer(["a quick fox", "a fox quick"]) - + Unbatched input. - >>> vocab = {"": 1, "": 2, "a": 3, "Ġquick": 4, "Ġfox": 5} + >>> vocab = {"": 1, "": 2, "a": 102, "Ġquick": 2119, "Ġfox": 23602} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( @@ -69,10 +69,10 @@ class OPTTokenizer(BytePairTokenizer): ... merges=merges, ... ) >>> tokenizer("a quick fox") - + Detokenization. - >>> vocab = {"": 1, "": 2, "Ġquick": 4, "Ġfox": 5} + >>> vocab = {"": 1, "": 2, "Ġquick": 2119, "Ġfox": 23602} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( From 3423e525bdedafd6e7ab1adf06cee15a9438401d Mon Sep 17 00:00:00 2001 From: Warlord-K Date: Mon, 20 Mar 2023 21:22:10 +0530 Subject: [PATCH 2/3] Custom Vocab Changes reverted --- keras_nlp/models/opt/opt_tokenizer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/keras_nlp/models/opt/opt_tokenizer.py b/keras_nlp/models/opt/opt_tokenizer.py index 79e978c32a..f03329796e 100644 --- a/keras_nlp/models/opt/opt_tokenizer.py +++ b/keras_nlp/models/opt/opt_tokenizer.py @@ -50,7 +50,7 @@ class OPTTokenizer(BytePairTokenizer): Examples: Batched inputs. - >>> vocab = {"": 1, "": 2, "a": 102, "Ġquick": 2119, "Ġfox": 23602} + >>> vocab = {"": 1, "": 2, "a": 3, "Ġquick": 4, "Ġfox": 5} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( @@ -58,10 +58,10 @@ class OPTTokenizer(BytePairTokenizer): ... merges=merges, ... ) >>> tokenizer(["a quick fox", "a fox quick"]) - + Unbatched input. - >>> vocab = {"": 1, "": 2, "a": 102, "Ġquick": 2119, "Ġfox": 23602} + >>> vocab = {"": 1, "": 2, "a": 3, "Ġquick": 4, "Ġfox": 5} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( @@ -69,10 +69,10 @@ class OPTTokenizer(BytePairTokenizer): ... merges=merges, ... ) >>> tokenizer("a quick fox") - + Detokenization. - >>> vocab = {"": 1, "": 2, "Ġquick": 2119, "Ġfox": 23602} + >>> vocab = {"": 1, "": 2, "Ġquick": 4, "Ġfox": 5} >>> merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"] >>> merges += ["Ġ f", "o x", "Ġf ox"] >>> tokenizer = keras_nlp.models.OPTTokenizer( From a63236b3d719e80988e097b7414f797c756d6b65 Mon Sep 17 00:00:00 2001 From: Matt Watson <1389937+mattdangerw@users.noreply.github.com> Date: Mon, 20 Mar 2023 20:06:31 -0700 Subject: [PATCH 3/3] Minor docstring edit --- keras_nlp/models/opt/opt_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/models/opt/opt_backbone.py b/keras_nlp/models/opt/opt_backbone.py index 5938b4ca27..4fa442f11a 100644 --- a/keras_nlp/models/opt/opt_backbone.py +++ b/keras_nlp/models/opt/opt_backbone.py @@ -35,7 +35,7 @@ def opt_kernel_initializer(stddev=0.02): @keras_nlp_export("keras_nlp.models.OPTBackbone") class OPTBackbone(Backbone): - """A OPT decoder network. + """An OPT decoder network. This class implements a Transformer-based decoder model as described in ["OPT: Open Pre-trained Transformer Language Models"](https://arxiv.org/abs/2205.01068).