diff --git a/keras_nlp/models/gpt2/gpt2_presets.py b/keras_nlp/models/gpt2/gpt2_presets.py index 60bb48bf2e..90c0ce7dff 100644 --- a/keras_nlp/models/gpt2/gpt2_presets.py +++ b/keras_nlp/models/gpt2/gpt2_presets.py @@ -35,9 +35,6 @@ "max_sequence_length": 1024, }, "preprocessor_config": {}, - "description": ( - "Base size of GPT-2 with 124M parameters. Trained on WebText." - ), "weights_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/model.h5", "weights_hash": "f4ea6e1b214516dd7de452461ee6e16e", "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/vocab.json", @@ -65,9 +62,6 @@ "max_sequence_length": 1024, }, "preprocessor_config": {}, - "description": ( - "Medium size of GPT-2 with 355M parameters. Trained on WebText." - ), "weights_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_medium_en/v1/model.h5", "weights_hash": "580ff9b79c04fc90e6d6f47e975c5afe", "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_medium_en/v1/vocab.json", @@ -95,9 +89,6 @@ "max_sequence_length": 1024, }, "preprocessor_config": {}, - "description": ( - "Large size of GPT-2 with 774M parameters. Trained on WebText." - ), "weights_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_large_en/v1/model.h5", "weights_hash": "67957cb3dfc9e965960dabe068811e1a", "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_large_en/v1/vocab.json", @@ -125,10 +116,6 @@ "max_sequence_length": 1024, }, "preprocessor_config": {}, - "description": ( - "Extra large size of GPT-2 with 1558M parameters. " - "Trained on WebText." - ), "weights_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/model.h5", "weights_hash": "d093c1ee0d9705d845c0190909aa2917", "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/vocab.json", @@ -136,4 +123,31 @@ "merges_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/merges.txt", "merges_hash": "75a37753dd7a28a2c5df80c28bf06e4e", }, + "gpt2_base_en_cnn_dailymail": { + "metadata": { + "description": ( + "12-layer GPT-2 model where case is maintained. " + "Finetuned on the CNN/DailyMail summarization dataset." + ), + "params": 124439808, + "official_name": "GPT-2", + "path": "gpt2", + }, + "config": { + "vocabulary_size": 50257, + "num_layers": 12, + "num_heads": 12, + "hidden_dim": 768, + "intermediate_dim": 3072, + "dropout": 0.1, + "max_sequence_length": 1024, + }, + "preprocessor_config": {}, + "weights_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_base_en_news/v1/model.h5", + "weights_hash": "09d86ca6e1b4213886b720a1392f2a70", + "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_base_en_news/v1/vocab.json", + "vocabulary_hash": "dffec25a898b1f5e569bec4dffd7e5c0", + "merges_url": "https://storage.googleapis.com/keras-nlp/models/gpt2_base_en_news/v1/merges.txt", + "merges_hash": "75a37753dd7a28a2c5df80c28bf06e4e", + }, }