keras-team · mattdangerw · Aug 6, 2024 · Aug 5, 2024 · Aug 6, 2024
diff --git a/keras_nlp/src/models/llama/llama_presets.py b/keras_nlp/src/models/llama/llama_presets.py
@@ -17,27 +17,59 @@
 backbone_presets = {
     "llama2_7b_en": {
         "metadata": {
-            "description": "LLaMA 2 7B Base model",
+            "description": "7 billion parameter, 32-layer, base LLaMA 2 model.",
             "params": 6738415616,
             "official_name": "LLaMA 2",
             "path": "llama2",
             "model_card": "https://github.com/meta-llama/llama",
         },
         "kaggle_handle": "kaggle://keras/llama2/keras/llama2_7b_en/1",
     },
+    "llama2_7b_en_int8": {
+        "metadata": {
+            "description": (
+                "7 billion parameter, 32-layer, base LLaMA 2 model with "
+                "activation and weights quantized to int8."
+            ),
+            "params": 6739839488,
+            "official_name": "LLaMA 2",
+            "path": "llama2",
+            "model_card": "https://github.com/meta-llama/llama",
+        },
+        "kaggle_handle": "kaggle://keras/llama2/keras/llama2_7b_en_int8/1",
+    },
     "llama2_instruct_7b_en": {
         "metadata": {
-            "description": "LLaMA 2 7B Chat model",
+            "description": (
+                "7 billion parameter, 32-layer, instruction tuned LLaMA 2 "
+                "model."
+            ),
             "params": 6738415616,
             "official_name": "LLaMA 2",
             "path": "llama2",
             "model_card": "https://github.com/meta-llama/llama",
         },
         "kaggle_handle": "kaggle://keras/llama2/keras/llama2_instruct_7b_en/1",
     },
+    "llama2_instruct_7b_en_int8": {
+        "metadata": {
+            "description": (
+                "7 billion parameter, 32-layer, instruction tuned LLaMA 2 "
+                "model with activation and weights quantized to int8."
+            ),
+            "params": 6739839488,
+            "official_name": "LLaMA 2",
+            "path": "llama2",
+            "model_card": "https://github.com/meta-llama/llama",
+        },
+        "kaggle_handle": "kaggle://keras/llama2/keras/llama2_instruct_7b_en_int8/1",
+    },
     "vicuna_1.5_7b_en": {
         "metadata": {
-            "description": "Vicuna v1.5 7B Chat model",
+            "description": (
+                "7 billion parameter, 32-layer, instruction tuned Vicuna v1.5 "
+                "model."
+            ),
             "params": 6738415616,
             "official_name": "Vicuna",
             "path": "vicuna",

diff --git a/keras_nlp/src/models/llama3/llama3_presets.py b/keras_nlp/src/models/llama3/llama3_presets.py
@@ -17,22 +17,53 @@
 backbone_presets = {
     "llama3_8b_en": {
         "metadata": {
-            "description": "LLaMA 3 8B Base model",
+            "description": "8 billion parameter, 32-layer, base LLaMA 3 model.",
             "params": 8030261248,
             "official_name": "LLaMA 3",
             "path": "llama3",
             "model_card": "https://github.com/meta-llama/llama3",
         },
         "kaggle_handle": "kaggle://keras/llama3/keras/llama3_8b_en/3",
     },
+    "llama3_8b_en_int8": {
+        "metadata": {
+            "description": (
+                "8 billion parameter, 32-layer, base LLaMA 3 model with "
+                "activation and weights quantized to int8."
+            ),
+            "params": 8031894016,
+            "official_name": "LLaMA 3",
+            "path": "llama3",
+            "model_card": "https://github.com/meta-llama/llama3",
+        },
+        "kaggle_handle": "kaggle://keras/llama3/keras/llama3_8b_en_int8/1",
+    },
     "llama3_instruct_8b_en": {
         "metadata": {
-            "description": "LLaMA 3 8B Instruct model",
+            "description": (
+                "8 billion parameter, 32-layer, instruction tuned LLaMA 3 "
+                "model."
+            ),
             "params": 8030261248,
             "official_name": "LLaMA 3",
             "path": "llama3",
             "model_card": "https://github.com/meta-llama/llama3",
         },
         "kaggle_handle": "kaggle://keras/llama3/keras/llama3_instruct_8b_en/3",
     },
+    "llama3_instruct_8b_en_int8": {
+        "metadata": {
+            "description": (
+                "8 billion parameter, 32-layer, instruction tuned LLaMA 3 "
+                "model with activation and weights quantized to int8."
+            ),
+            "params": 8031894016,
+            "official_name": "LLaMA 3",
+            "path": "llama3",
+            "model_card": "https://github.com/meta-llama/llama3",
+        },
+        "kaggle_handle": (
+            "kaggle://keras/llama3/keras/llama3_instruct_8b_en_int8/1"
+        ),
+    },
 }