diff --git a/keras_hub/src/models/pali_gemma/pali_gemma_presets.py b/keras_hub/src/models/pali_gemma/pali_gemma_presets.py index 615cc6c5ec..7426d3d49c 100644 --- a/keras_hub/src/models/pali_gemma/pali_gemma_presets.py +++ b/keras_hub/src/models/pali_gemma/pali_gemma_presets.py @@ -83,6 +83,96 @@ }, "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_ft_docci_10b_448/2", }, + "pali_gemma2_mix_3b_224": { + "metadata": { + "description": ( + "3 billion parameter, image size 224, 27-layer for " + "SigLIP-So400m vision encoder and 26-layer Gemma2 2B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 3032094960, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_3b_224/2", + }, + "pali_gemma2_mix_3b_448": { + "metadata": { + "description": ( + "3 billion parameter, image size 448, 27-layer for " + "SigLIP-So400m vision encoder and 26-layer Gemma2 2B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 3032979696, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_3b_448/2", + }, + "pali_gemma2_mix_10b_224": { + "metadata": { + "description": ( + "10 billion parameter, image size 224, 27-layer for " + "SigLIP-So400m vision encoder and 42-layer Gemma2 9B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 9662409456, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_224/2", + }, + "pali_gemma2_mix_10b_448": { + "metadata": { + "description": ( + "10 billion parameter, image size 448, 27-layer for " + "SigLIP-So400m vision encoder and 42-layer Gemma2 9B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 9663294192, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_448/2", + }, + "pali_gemma2_mix_28b_224": { + "metadata": { + "description": ( + "28 billion parameter, image size 224, 27-layer for " + "SigLIP-So400m vision encoder and 46-layer Gemma2 27B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 27650192112, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_224/2", + }, + "pali_gemma2_mix_28b_448": { + "metadata": { + "description": ( + "28 billion parameter, image size 448, 27-layer for " + "SigLIP-So400m vision encoder and 46-layer Gemma2 27B lanuage " + "model. This model has been fine-tuned on a wide range of " + "vision-language tasks and domains." + ), + "params": 27650192112, + "official_name": "PaliGemma2", + "path": "pali_gemma2", + "model_card": "https://www.kaggle.com/models/google/paligemma-2", + }, + "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_448/2", + }, "pali_gemma2_pt_3b_224": { "metadata": { "description": ( @@ -181,7 +271,7 @@ "model. This model has been pre-trained on a mixture of " "datasets." ), - "params": 9662409456, + "params": 27650192112, "official_name": "PaliGemma2", "path": "pali_gemma2", "model_card": "https://www.kaggle.com/models/google/paligemma-2", @@ -196,7 +286,7 @@ "model. This model has been pre-trained on a mixture of " "datasets." ), - "params": 9663294192, + "params": 27650192112, "official_name": "PaliGemma2", "path": "pali_gemma2", "model_card": "https://www.kaggle.com/models/google/paligemma-2", @@ -211,7 +301,7 @@ "model. This model has been pre-trained on a mixture of " "datasets." ), - "params": 9666833136, + "params": 27650192112, "official_name": "PaliGemma2", "path": "pali_gemma2", "model_card": "https://www.kaggle.com/models/google/paligemma-2", diff --git a/tools/checkpoint_conversion/convert_pali_gemma2_checkpoints.py b/tools/checkpoint_conversion/convert_pali_gemma2_checkpoints.py index cf8beb1902..23bda97b72 100644 --- a/tools/checkpoint_conversion/convert_pali_gemma2_checkpoints.py +++ b/tools/checkpoint_conversion/convert_pali_gemma2_checkpoints.py @@ -69,6 +69,12 @@ "pali_gemma2_10b_ft_docci_448": ( "google/paligemma-2/jax/paligemma2-10b-ft-docci-448" ), + "pali_gemma2_3b_mix_224": "google/paligemma-2/jax/paligemma2-3b-mix-224", + "pali_gemma2_3b_mix_448": "google/paligemma-2/jax/paligemma2-3b-mix-448", + "pali_gemma2_10b_mix_224": "google/paligemma-2/jax/paligemma2-10b-mix-224", + "pali_gemma2_10b_mix_448": "google/paligemma-2/jax/paligemma2-10b-mix-448", + "pali_gemma2_28b_mix_224": "google/paligemma-2/jax/paligemma2-28b-mix-224", + "pali_gemma2_28b_mix_448": "google/paligemma-2/jax/paligemma2-28b-mix-448", "pali_gemma2_3b_pt_224": "google/paligemma-2/jax/paligemma2-3b-pt-224", "pali_gemma2_3b_pt_448": "google/paligemma-2/jax/paligemma2-3b-pt-448", "pali_gemma2_3b_pt_896": "google/paligemma-2/jax/paligemma2-3b-pt-896",