diff --git a/requirements.dev.txt b/requirements.dev.txt index e8409a724..1692fa00c 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -40,4 +40,5 @@ certifi==2019.11.28 idna==2.8 six==1.14.0 typing-extensions==4.5.0 -urllib3==1.25.8 \ No newline at end of file +urllib3==1.25.8 +marqo-commons @ git+https://github.com/marqo-ai/marqo-commons \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1c808ef97..f3bd2f0e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ fastapi-utils==0.2.1 jsonschema==4.17.1 typing-extensions==4.5.0 urllib3==1.25.8 +marqo-commons @ git+https://github.com/marqo-ai/marqo-commons \ No newline at end of file diff --git a/src/marqo/s2_inference/model_loaders.py b/src/marqo/s2_inference/model_loaders.py new file mode 100644 index 000000000..486e44507 --- /dev/null +++ b/src/marqo/s2_inference/model_loaders.py @@ -0,0 +1,25 @@ +from marqo.s2_inference.hf_utils import HF_MODEL +from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX +from marqo.s2_inference.sbert_utils import SBERT, TEST +from marqo.s2_inference.random_utils import Random +from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP, get_multilingual_clip_properties +from marqo.s2_inference.types import Any, Dict, List, Optional, Union, FloatTensor +from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX +from marqo.s2_inference.no_model_utils import NO_MODEL + +# we need to keep track of the embed dim and model load functions/classes +# we can use this as a registry + +def get_model_loaders() -> Dict: + return {'clip': CLIP, + 'open_clip': OPEN_CLIP, + 'sbert': SBERT, + 'test': TEST, + 'sbert_onnx': SBERT_ONNX, + 'clip_onnx': CLIP_ONNX, + "multilingual_clip": MULTILINGUAL_CLIP, + "fp16_clip": FP16_CLIP, + 'random': Random, + 'hf': HF_MODEL, + 'no_model': NO_MODEL + } diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py deleted file mode 100644 index 0223f7673..000000000 --- a/src/marqo/s2_inference/model_registry.py +++ /dev/null @@ -1,1784 +0,0 @@ -from marqo.s2_inference.hf_utils import HF_MODEL -from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX -from marqo.s2_inference.sbert_utils import SBERT, TEST -from marqo.s2_inference.random_utils import Random -from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP, get_multilingual_clip_properties -from marqo.s2_inference.types import Any, Dict, List, Optional, Union, FloatTensor -from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX -from marqo.s2_inference.no_model_utils import NO_MODEL - -# we need to keep track of the embed dim and model load functions/classes -# we can use this as a registry -def _get_clip_properties() -> Dict: - CLIP_MODEL_PROPERTIES = { - 'RN50': - {"name": "RN50", - "dimensions": 1024, - "notes": "CLIP resnet50", - "type": "clip", - }, - 'RN101': - {"name": "RN101", - "dimensions": 512, - "notes": "CLIP resnet101", - "type": "clip", - }, - 'RN50x4': - {"name": "RN50x4", - "dimensions": 640, - "notes": "CLIP resnet50x4", - "type": "clip", - }, - 'RN50x16': - {"name": "RN50x16", - "dimensions": 768, - "notes": "CLIP resnet50x16", - "type": "clip", - }, - 'RN50x64': - {"name": "RN50x64", - "dimensions": 1024, - "notes": "CLIP resnet50x64", - "type": "clip", - }, - 'ViT-B/32': - {"name": "ViT-B/32", - "dimensions": 512, - "notes": "CLIP ViT-B/32", - "type": "clip", - }, - 'ViT-B/16': - {"name": "ViT-B/16", - "dimensions": 512, - "notes": "CLIP ViT-B/16", - "type":"clip", - }, - 'ViT-L/14': - {"name": "ViT-L/14", - "dimensions": 768, - "notes": "CLIP ViT-L/14", - "type":"clip", - }, - 'ViT-L/14@336px': - {"name": "ViT-L/14@336px", - "dimensions": 768, - "notes": "CLIP ViT-L/14@336px", - "type":"clip", - }, - - } - return CLIP_MODEL_PROPERTIES - - -def _get_open_clip_properties() -> Dict: - # use this link to find all the model_configs - # https://github.com/mlfoundations/open_clip/tree/main/src/open_clip/model_configs - OPEN_CLIP_MODEL_PROPERTIES = { - 'open_clip/RN50/openai': {'name': 'open_clip/RN50/openai', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN50/yfcc15m': {'name': 'open_clip/RN50/yfcc15m', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'yfcc15m'}, - 'open_clip/RN50/cc12m': {'name': 'open_clip/RN50/cc12m', 'dimensions': 1024, 'note': 'open_clip models', - 'type': 'open_clip', 'pretrained': 'cc12m'}, - 'open_clip/RN50-quickgelu/openai': {'name': 'open_clip/RN50-quickgelu/openai', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN50-quickgelu/yfcc15m': {'name': 'open_clip/RN50-quickgelu/yfcc15m', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'yfcc15m'}, - 'open_clip/RN50-quickgelu/cc12m': {'name': 'open_clip/RN50-quickgelu/cc12m', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'cc12m'}, - 'open_clip/RN101/openai': {'name': 'open_clip/RN101/openai', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN101/yfcc15m': {'name': 'open_clip/RN101/yfcc15m', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'yfcc15m'}, - 'open_clip/RN101-quickgelu/openai': {'name': 'open_clip/RN101-quickgelu/openai', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN101-quickgelu/yfcc15m': {'name': 'open_clip/RN101-quickgelu/yfcc15m', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'yfcc15m'}, - 'open_clip/RN50x4/openai': {'name': 'open_clip/RN50x4/openai', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN50x16/openai': {'name': 'open_clip/RN50x16/openai', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/RN50x64/openai': {'name': 'open_clip/RN50x64/openai', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-B-32/openai': {'name': 'open_clip/ViT-B-32/openai', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-B-32/laion400m_e31': {'name': 'open_clip/ViT-B-32/laion400m_e31', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e31'}, - 'open_clip/ViT-B-32/laion400m_e32': {'name': 'open_clip/ViT-B-32/laion400m_e32', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e32'}, - 'open_clip/ViT-B-32/laion2b_e16': {'name': 'open_clip/ViT-B-32/laion2b_e16', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_e16'}, - 'open_clip/ViT-B-32/laion2b_s34b_b79k': {'name': 'open_clip/ViT-B-32/laion2b_s34b_b79k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s34b_b79k'}, - 'open_clip/ViT-B-32-quickgelu/openai': {'name': 'open_clip/ViT-B-32-quickgelu/openai', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-B-32-quickgelu/laion400m_e31': {'name': 'open_clip/ViT-B-32-quickgelu/laion400m_e31', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e31'}, - 'open_clip/ViT-B-32-quickgelu/laion400m_e32': {'name': 'open_clip/ViT-B-32-quickgelu/laion400m_e32', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e32'}, - 'open_clip/ViT-B-16/openai': {'name': 'open_clip/ViT-B-16/openai', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-B-16/laion400m_e31': {'name': 'open_clip/ViT-B-16/laion400m_e31', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e31'}, - 'open_clip/ViT-B-16/laion400m_e32': {'name': 'open_clip/ViT-B-16/laion400m_e32', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e32'}, - 'open_clip/ViT-B-16/laion2b_s34b_b88k': {'name': 'open_clip/ViT-B-16/laion2b_s34b_b88k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s34b_b88k'}, - 'open_clip/ViT-B-16-plus-240/laion400m_e31': {'name': 'open_clip/ViT-B-16-plus-240/laion400m_e31', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e31'}, - 'open_clip/ViT-B-16-plus-240/laion400m_e32': {'name': 'open_clip/ViT-B-16-plus-240/laion400m_e32', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e32'}, - 'open_clip/ViT-L-14/openai': {'name': 'open_clip/ViT-L-14/openai', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-L-14/laion400m_e31': {'name': 'open_clip/ViT-L-14/laion400m_e31', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e31'}, - 'open_clip/ViT-L-14/laion400m_e32': {'name': 'open_clip/ViT-L-14/laion400m_e32', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_e32'}, - 'open_clip/ViT-L-14/laion2b_s32b_b82k': {'name': 'open_clip/ViT-L-14/laion2b_s32b_b82k', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s32b_b82k'}, - 'open_clip/ViT-L-14-336/openai': {'name': 'open_clip/ViT-L-14-336/openai', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'openai'}, - 'open_clip/ViT-H-14/laion2b_s32b_b79k': {'name': 'open_clip/ViT-H-14/laion2b_s32b_b79k', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s32b_b79k'}, - 'open_clip/ViT-g-14/laion2b_s12b_b42k': {'name': 'open_clip/ViT-g-14/laion2b_s12b_b42k', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s12b_b42k'}, - 'open_clip/ViT-g-14/laion2b_s34b_b88k': {'name': 'open_clip/ViT-g-14/laion2b_s34b_b88k', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s34b_b88k'}, - 'open_clip/ViT-bigG-14/laion2b_s39b_b160k': {'name': 'open_clip/ViT-bigG-14/laion2b_s39b_b160k', - 'dimensions': 1280, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s39b_b160k'}, - 'open_clip/roberta-ViT-B-32/laion2b_s12b_b32k': {'name': 'open_clip/roberta-ViT-B-32/laion2b_s12b_b32k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s12b_b32k'}, - 'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k': { - 'name': 'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion5b_s13b_b90k'}, - 'open_clip/xlm-roberta-large-ViT-H-14/frozen_laion5b_s13b_b90k': { - 'name': 'open_clip/xlm-roberta-large-ViT-H-14/frozen_laion5b_s13b_b90k', - 'dimensions': 1024, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'frozen_laion5b_s13b_b90k'}, - 'open_clip/convnext_base/laion400m_s13b_b51k': {'name': 'open_clip/convnext_base/laion400m_s13b_b51k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion400m_s13b_b51k'}, - 'open_clip/convnext_base_w/laion2b_s13b_b82k': {'name': 'open_clip/convnext_base_w/laion2b_s13b_b82k', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s13b_b82k'}, - 'open_clip/convnext_base_w/laion2b_s13b_b82k_augreg': { - 'name': 'open_clip/convnext_base_w/laion2b_s13b_b82k_augreg', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s13b_b82k_augreg'}, - 'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k': { - 'name': 'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion_aesthetic_s13b_b82k'}, - 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k': { - 'name': 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion_aesthetic_s13b_b82k'}, - 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg': { - 'name': 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg', - 'dimensions': 640, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion_aesthetic_s13b_b82k_augreg'}, - 'open_clip/convnext_large_d/laion2b_s26b_b102k_augreg': { - 'name': 'open_clip/convnext_large_d/laion2b_s26b_b102k_augreg', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s26b_b102k_augreg'}, - 'open_clip/convnext_large_d_320/laion2b_s29b_b131k_ft': { - 'name': 'open_clip/convnext_large_d_320/laion2b_s29b_b131k_ft', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s29b_b131k_ft'}, - 'open_clip/convnext_large_d_320/laion2b_s29b_b131k_ft_soup': { - 'name': 'open_clip/convnext_large_d_320/laion2b_s29b_b131k_ft_soup', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s29b_b131k_ft_soup'}, - # Comment out as they are not currently available in open_clip release 2.18.1 - # It is discussed here https: // github.com / mlfoundations / open_clip / issues / 477 - # 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg': { - # 'name': 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg', - # 'dimensions': 1024, - # 'note': 'open_clip models', - # 'type': 'open_clip', - # 'pretrained': 'laion2b_s34b_b82k_augreg'}, - # 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_rewind': { - # 'name': 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_rewind', - # 'dimensions': 1024, - # 'note': 'open_clip models', - # 'type': 'open_clip', - # 'pretrained': 'laion2b_s34b_b82k_augreg_rewind'}, - # 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_soup': { - # 'name': 'open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_soup', - # 'dimensions': 1024, - # 'note': 'open_clip models', - # 'type': 'open_clip', - # 'pretrained': 'laion2b_s34b_b82k_augreg_soup'}, - 'open_clip/coca_ViT-B-32/laion2b_s13b_b90k': {'name': 'open_clip/coca_ViT-B-32/laion2b_s13b_b90k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s13b_b90k'}, - 'open_clip/coca_ViT-B-32/mscoco_finetuned_laion2b_s13b_b90k': { - 'name': 'open_clip/coca_ViT-B-32/mscoco_finetuned_laion2b_s13b_b90k', - 'dimensions': 512, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'mscoco_finetuned_laion2b_s13b_b90k'}, - 'open_clip/coca_ViT-L-14/laion2b_s13b_b90k': {'name': 'open_clip/coca_ViT-L-14/laion2b_s13b_b90k', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'laion2b_s13b_b90k'}, - 'open_clip/coca_ViT-L-14/mscoco_finetuned_laion2b_s13b_b90k': { - 'name': 'open_clip/coca_ViT-L-14/mscoco_finetuned_laion2b_s13b_b90k', - 'dimensions': 768, - 'note': 'open_clip models', - 'type': 'open_clip', - 'pretrained': 'mscoco_finetuned_laion2b_s13b_b90k'} - } - return OPEN_CLIP_MODEL_PROPERTIES - - -def _get_sbert_properties() -> Dict: - SBERT_MODEL_PROPERTIES = { - "sentence-transformers/all-MiniLM-L6-v1": - {"name": "sentence-transformers/all-MiniLM-L6-v1", - "dimensions": 384, - "tokens":128, - "type":"sbert", - "notes": ""}, - "sentence-transformers/all-MiniLM-L6-v2": - {"name": "sentence-transformers/all-MiniLM-L6-v2", - "dimensions": 384, - "tokens":256, - "type":"sbert", - "notes": ""}, - "sentence-transformers/all-mpnet-base-v1": - {"name": "sentence-transformers/all-mpnet-base-v1", - "dimensions": 768, - "tokens":128, - "type":"sbert", - "notes": ""}, - "sentence-transformers/all-mpnet-base-v2": - {"name": "sentence-transformers/all-mpnet-base-v2", - "dimensions": 768, - "tokens":128, - "type":"sbert", - "notes": ""}, - 'sentence-transformers/stsb-xlm-r-multilingual': - {"name": 'sentence-transformers/stsb-xlm-r-multilingual', - "dimensions": 768, - "tokens": 128, - "type": "sbert", - "notes": ""}, - "flax-sentence-embeddings/all_datasets_v3_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"sbert", - "notes": ""}, - "flax-sentence-embeddings/all_datasets_v3_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"sbert", - "notes": ""}, - "flax-sentence-embeddings/all_datasets_v4_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"sbert", - "notes": ""}, - "flax-sentence-embeddings/all_datasets_v4_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"sbert", - "notes": ""}, - - "flax-sentence-embeddings/all_datasets_v3_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v3_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"sbert", - "notes": ""}, - "flax-sentence-embeddings/all_datasets_v4_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v4_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"sbert", - "notes": ""}, - } - return SBERT_MODEL_PROPERTIES - -def _get_hf_properties() -> Dict: - HF_MODEL_PROPERTIES = { - "hf/all-MiniLM-L6-v1": - {"name": "sentence-transformers/all-MiniLM-L6-v1", - "dimensions": 384, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all-MiniLM-L6-v2": - {"name": "sentence-transformers/all-MiniLM-L6-v2", - "dimensions": 384, - "tokens":256, - "type":"hf", - "notes": ""}, - "hf/all-mpnet-base-v1": - {"name": "sentence-transformers/all-mpnet-base-v1", - "dimensions": 768, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all-mpnet-base-v2": - {"name": "sentence-transformers/all-mpnet-base-v2", - "dimensions": 768, - "tokens":128, - "type":"hf", - "notes": ""}, - - "hf/all_datasets_v3_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all_datasets_v3_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all_datasets_v4_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all_datasets_v4_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"hf", - "notes": ""}, - - "hf/all_datasets_v3_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v3_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"hf", - "notes": ""}, - "hf/all_datasets_v4_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v4_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"hf", - "notes": ""}, - - "hf/e5-small": - {"name": 'intfloat/e5-small', - "dimensions": 384, - "tokens": 192, - "type": "hf", - "model_size": 0.1342, - "notes": ""}, - "hf/e5-base": - {"name": 'intfloat/e5-base', - "dimensions": 768, - "tokens": 192, - "type": "hf", - "model_size": 0.438, - "notes": ""}, - "hf/e5-large": - {"name": 'intfloat/e5-large', - "dimensions": 1024, - "tokens": 192, - "type": "hf", - "model_size": 1.3, - "notes": ""}, - "hf/e5-large-unsupervised": - {"name": 'intfloat/e5-large-unsupervised', - "dimensions": 1024, - "tokens": 128, - "type": "hf", - "model_size": 1.3, - "notes": ""}, - "hf/e5-base-unsupervised": - {"name": 'intfloat/e5-base-unsupervised', - "dimensions": 768, - "tokens": 128, - "type": "hf", - "model_size": 0.438, - "notes": ""}, - "hf/e5-small-unsupervised": - {"name": 'intfloat/e5-small-unsupervised', - "dimensions": 384, - "tokens": 128, - "type": "hf", - "model_size": 0.134, - "notes": ""}, - "hf/multilingual-e5-small": - {"name": 'intfloat/multilingual-e5-small', - "dimensions": 384, - "tokens": 512, - "type": "hf", - "model_size": 0.471, - "notes": ""}, - "hf/multilingual-e5-base": - {"name": 'intfloat/multilingual-e5-base', - "dimensions": 768, - "tokens": 512, - "type": "hf", - "model_size": 1.11, - "notes": ""}, - "hf/multilingual-e5-large": - {"name": 'intfloat/multilingual-e5-large', - "dimensions": 1024, - "tokens": 512, - "type": "hf", - "model_size": 2.24, - "notes": ""}, - "hf/e5-small-v2": - {"name": 'intfloat/e5-small-v2', - "dimensions": 384, - "tokens": 512, - "type": "hf", - "model_size": 0.134, - "notes": ""}, - "hf/e5-base-v2": - {"name": 'intfloat/e5-base-v2', - "dimensions": 768, - "tokens": 512, - "type": "hf", - "model_size": 0.438, - "notes": ""}, - "hf/e5-large-v2": - {"name": 'intfloat/e5-large-v2', - "dimensions": 1024, - "tokens": 512, - "type": "hf", - "model_size": 1.34, - "notes": ""}, - } - return HF_MODEL_PROPERTIES - -def _get_sbert_onnx_properties() -> Dict: - SBERT_ONNX_MODEL_PROPERTIES = { - "onnx/all-MiniLM-L6-v1": - {"name": "sentence-transformers/all-MiniLM-L6-v1", - "dimensions": 384, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all-MiniLM-L6-v2": - {"name": "sentence-transformers/all-MiniLM-L6-v2", - "dimensions": 384, - "tokens":256, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all-mpnet-base-v1": - {"name": "sentence-transformers/all-mpnet-base-v1", - "dimensions": 768, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all-mpnet-base-v2": - {"name": "sentence-transformers/all-mpnet-base-v2", - "dimensions": 768, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - - "onnx/all_datasets_v3_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all_datasets_v3_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v3_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all_datasets_v4_MiniLM-L12": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L12", - "dimensions": 384, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all_datasets_v4_MiniLM-L6": - {"name": "flax-sentence-embeddings/all_datasets_v4_MiniLM-L6", - "dimensions": 384, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - - "onnx/all_datasets_v3_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v3_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - "onnx/all_datasets_v4_mpnet-base": - {"name": "flax-sentence-embeddings/all_datasets_v4_mpnet-base", - "dimensions": 768, - "tokens":128, - "type":"sbert_onnx", - "notes": ""}, - } - return SBERT_ONNX_MODEL_PROPERTIES - - -def _get_sbert_test_properties() -> Dict: - TEST_MODEL_PROPERTIES = { - "sentence-transformers/test": - {"name": "sentence-transformers/all-MiniLM-L6-v1", - "dimensions": 16, - "tokens":128, - "type":"test", - "notes": ""}, - "test": - {"name": "sentence-transformers/all-MiniLM-L6-v1", - "dimensions": 16, - "tokens":128, - "type":"test", - "notes": ""}, - } - return TEST_MODEL_PROPERTIES - -def _get_onnx_clip_properties() -> Dict: - ONNX_CLIP_MODEL_PROPERTIES = { - "onnx32/openai/ViT-L/14": - { - "name":"onnx32/openai/ViT-L/14", - "dimensions" : 768, - "type":"clip_onnx", - "note":"the onnx float32 version of openai ViT-L/14", - "repo_id": "Marqo/onnx-openai-ViT-L-14", - "visual_file": "onnx32-openai-ViT-L-14-visual.onnx", - "textual_file": "onnx32-openai-ViT-L-14-textual.onnx", - "token": None, - "resolution" : 224, - }, - "onnx16/openai/ViT-L/14": - { - "name": "onnx16/openai/ViT-L/14", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float16 version of openai ViT-L/14", - "repo_id": "Marqo/onnx-openai-ViT-L-14", - "visual_file": "onnx16-openai-ViT-L-14-visual.onnx", - "textual_file": "onnx16-openai-ViT-L-14-textual.onnx", - "token": None, - "resolution" : 224, - }, - "onnx32/open_clip/ViT-L-14/openai": - { - "name": "onnx32/open_clip/ViT-L-14/openai", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-L-14/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14", - "visual_file": "onnx32-open_clip-ViT-L-14-openai-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-L-14-openai-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "openai" - }, - "onnx16/open_clip/ViT-L-14/openai": - { - "name": "onnx16/open_clip/ViT-L-14/openai", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-L-14/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14", - "visual_file": "onnx16-open_clip-ViT-L-14-openai-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-L-14-openai-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "openai" - }, - "onnx32/open_clip/ViT-L-14/laion400m_e32": - { - "name" : "onnx32/open_clip/ViT-L-14/laion400m_e32", - "dimensions" : 768, - "type" : "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-L-14/lainon400m_e32", - "repo_id" : "Marqo/onnx-open_clip-ViT-L-14", - "visual_file" : "onnx32-open_clip-ViT-L-14-laion400m_e32-visual.onnx", - "textual_file" : "onnx32-open_clip-ViT-L-14-laion400m_e32-textual.onnx", - "token" : None, - "resolution" : 224, - "pretrained" : "laion400m_e32" - }, - "onnx16/open_clip/ViT-L-14/laion400m_e32": - { - "name": "onnx16/open_clip/ViT-L-14/laion400m_e32", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-L-14/lainon400m_e32", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14", - "visual_file": "onnx16-open_clip-ViT-L-14-laion400m_e32-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-L-14-laion400m_e32-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion400m_e32" - }, - "onnx32/open_clip/ViT-L-14/laion2b_s32b_b82k": - { - "name": "onnx32/open_clip/ViT-L-14/laion2b_s32b_b82k", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-L-14/laion2b_s32b_b82k", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14", - "visual_file": "onnx32-open_clip-ViT-L-14-laion2b_s32b_b82k-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-L-14-laion2b_s32b_b82k-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laionb_s32b_b82k", - "image_mean" : (0.5, 0.5, 0.5), - "image_std" : (0.5, 0.5, 0.5), - - }, - "onnx16/open_clip/ViT-L-14/laion2b_s32b_b82k": - { - "name": "onnx16/open_clip/ViT-L-14/laion2b_s32b_b82k", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-L-14/laion2b_s32b_b82k", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14", - "visual_file": "onnx16-open_clip-ViT-L-14-laion2b_s32b_b82k-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-L-14-laion2b_s32b_b82k-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laionb_s32b_b82k", - "image_mean": (0.5, 0.5, 0.5), - "image_std": (0.5, 0.5, 0.5), - }, - "onnx32/open_clip/ViT-L-14-336/openai": - { - "name": "onnx32/open_clip/ViT-L-14-336/openai", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-L-14-336/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14-336", - "visual_file": "onnx32-open_clip-ViT-L-14-336-openai-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-L-14-336-openai-textual.onnx", - "token": None, - "resolution": 336, - "pretrained": "openai", - "image_mean": None, - "image_std": None, - - }, - "onnx16/open_clip/ViT-L-14-336/openai": - { - "name": "onnx16/open_clip/ViT-L-14-336/openai", - "dimensions": 768, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-L-14-336/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-L-14-336", - "visual_file": "onnx16-open_clip-ViT-L-14-336-openai-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-L-14-336-openai-textual.onnx", - "token": None, - "resolution": 336, - "pretrained": "openai", - "image_mean": None, - "image_std": None, - }, - - "onnx32/open_clip/ViT-B-32/openai": - { - "name": "onnx32/open_clip/ViT-B-32/openai", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-B-32/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx32-open_clip-ViT-B-32-openai-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-B-32-openai-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "openai", - "image_mean": None, - "image_std": None, - }, - - "onnx16/open_clip/ViT-B-32/openai": - { - "name": "onnx16/open_clip/ViT-B-32/openai", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-B-32/openai", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx16-open_clip-ViT-B-32-openai-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-B-32-openai-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "openai", - "image_mean": None, - "image_std": None, - }, - - "onnx32/open_clip/ViT-B-32/laion400m_e31": - { - "name": "onnx32/open_clip/ViT-B-32/laion400m_e31", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-B-32/laion400m_e31", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx32-open_clip-ViT-B-32-laion400m_e31-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-B-32-laion400m_e31-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion400m_e31", - "image_mean": None, - "image_std": None, - }, - - "onnx16/open_clip/ViT-B-32/laion400m_e31": - { - "name": "onnx16/open_clip/ViT-B-32/laion400m_e31", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-B-32/laion400m_e31", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx16-open_clip-ViT-B-32-laion400m_e31-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-B-32-laion400m_e31-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion400m_e31", - "image_mean": None, - "image_std": None, - }, - - "onnx32/open_clip/ViT-B-32/laion400m_e32": - { - "name": "onnx32/open_clip/ViT-B-32/laion400m_e32", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-B-32/laion400m_e32", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx32-open_clip-ViT-B-32-laion400m_e32-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-B-32-laion400m_e32-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion400m_e32", - "image_mean": None, - "image_std": None, - }, - "onnx16/open_clip/ViT-B-32/laion400m_e32": - { - "name": "onnx16/open_clip/ViT-B-32/laion400m_e32", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-B-32/laion400m_e32", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx16-open_clip-ViT-B-32-laion400m_e32-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-B-32-laion400m_e32-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion400m_e32", - "image_mean": None, - "image_std": None, - }, - - "onnx32/open_clip/ViT-B-32/laion2b_e16": - { - "name": "onnx32/open_clip/ViT-B-32/laion2b_e16", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float32 version of open_clip ViT-B-32/laion2b_e16", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx32-open_clip-ViT-B-32-laion2b_e16-visual.onnx", - "textual_file": "onnx32-open_clip-ViT-B-32-laion2b_e16-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion2b_e16", - "image_mean": None, - "image_std": None, - }, - - "onnx16/open_clip/ViT-B-32/laion2b_e16": - { - "name": "onnx16/open_clip/ViT-B-32/laion2b_e16", - "dimensions": 512, - "type": "clip_onnx", - "note": "the onnx float16 version of open_clip ViT-B-32/laion2b_e16", - "repo_id": "Marqo/onnx-open_clip-ViT-B-32", - "visual_file": "onnx16-open_clip-ViT-B-32-laion2b_e16-visual.onnx", - "textual_file": "onnx16-open_clip-ViT-B-32-laion2b_e16-textual.onnx", - "token": None, - "resolution": 224, - "pretrained": "laion2b_e16", - "image_mean": None, - "image_std": None, - }, - - 'onnx32/open_clip/ViT-B-32-quickgelu/openai': - { - 'name': 'onnx32/open_clip/ViT-B-32-quickgelu/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-32-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-32-quickgelu/openai': - { - 'name': 'onnx16/open_clip/ViT-B-32-quickgelu/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-32-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e31': - { - 'name': 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e31', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-32-quickgelu/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e31-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e31': - { - 'name': 'onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e31', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-32-quickgelu/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-laion400m_e31-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e32': - { - 'name': 'onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e32', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-32-quickgelu/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-laion400m_e32-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-32-quickgelu-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32': - { - 'name': 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-32-quickgelu/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-32-quickgelu', - 'visual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e32-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-16/openai': - { - 'name': 'onnx16/open_clip/ViT-B-16/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-16/openai', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx16-open_clip-ViT-B-16-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-16-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-16/openai': - { - 'name': 'onnx32/open_clip/ViT-B-16/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-16/openai', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx32-open_clip-ViT-B-16-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-16-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-16/laion400m_e31': - { - 'name': 'onnx16/open_clip/ViT-B-16/laion400m_e31', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-16/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx16-open_clip-ViT-B-16-laion400m_e31-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-16-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-16/laion400m_e31': - { - 'name': 'onnx32/open_clip/ViT-B-16/laion400m_e31', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-16/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx32-open_clip-ViT-B-16-laion400m_e31-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-16-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-16/laion400m_e32': - { - 'name': 'onnx16/open_clip/ViT-B-16/laion400m_e32', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-16/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx16-open_clip-ViT-B-16-laion400m_e32-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-16-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-16/laion400m_e32': - { - 'name': 'onnx32/open_clip/ViT-B-16/laion400m_e32', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-16/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16', - 'visual_file': 'onnx32-open_clip-ViT-B-16-laion400m_e32-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-16-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-16-plus-240/laion400m_e31': - { - 'name': 'onnx16/open_clip/ViT-B-16-plus-240/laion400m_e31', - 'dimensions': 640, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-16-plus-240/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16-plus-240', - 'visual_file': 'onnx16-open_clip-ViT-B-16-plus-240-laion400m_e31-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-16-plus-240-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 240, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-16-plus-240/laion400m_e31': - { - 'name': 'onnx32/open_clip/ViT-B-16-plus-240/laion400m_e31', - 'dimensions': 640, 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-16-plus-240/laion400m_e31', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16-plus-240', - 'visual_file': 'onnx32-open_clip-ViT-B-16-plus-240-laion400m_e31-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-16-plus-240-laion400m_e31-textual.onnx', - 'token': None, - 'resolution': 240, - 'pretrained': 'laion400m_e31', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-B-16-plus-240/laion400m_e32': - { - 'name': 'onnx16/open_clip/ViT-B-16-plus-240/laion400m_e32', - 'dimensions': 640, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-B-16-plus-240/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16-plus-240', - 'visual_file': 'onnx16-open_clip-ViT-B-16-plus-240-laion400m_e32-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-B-16-plus-240-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 240, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-B-16-plus-240/laion400m_e32': - { - 'name': 'onnx32/open_clip/ViT-B-16-plus-240/laion400m_e32', - 'dimensions': 640, 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-B-16-plus-240/laion400m_e32', - 'repo_id': 'Marqo/onnx-open_clip-ViT-B-16-plus-240', - 'visual_file': 'onnx32-open_clip-ViT-B-16-plus-240-laion400m_e32-visual.onnx', - 'textual_file': 'onnx32-open_clip-ViT-B-16-plus-240-laion400m_e32-textual.onnx', - 'token': None, - 'resolution': 240, - 'pretrained': 'laion400m_e32', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-H-14/laion2b_s32b_b79k': - { - 'name': 'onnx16/open_clip/ViT-H-14/laion2b_s32b_b79k', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-H-14/laion2b_s32b_b79k', - 'repo_id': 'Marqo/onnx-open_clip-ViT-H-14', - 'visual_file': 'onnx16-open_clip-ViT-H-14-laion2b_s32b_b79k-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-H-14-laion2b_s32b_b79k-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion2b_s32b_b79k', - 'image_mean': None, - 'image_std': None, - }, - - 'onnx32/open_clip/ViT-H-14/laion2b_s32b_b79k': - { - 'name': 'onnx32/open_clip/ViT-H-14/laion2b_s32b_b79k', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-H-14/laion2b_s32b_b79k', - 'repo_id': 'Marqo/onnx-open_clip-ViT-H-14', - 'visual_file': 'onnx32-open_clip-ViT-H-14-laion2b_s32b_b79k-visual.zip', - 'textual_file': 'onnx32-open_clip-ViT-H-14-laion2b_s32b_b79k-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion2b_s32b_b79k', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/ViT-g-14/laion2b_s12b_b42k': - { - 'name': 'onnx16/open_clip/ViT-g-14/laion2b_s12b_b42k', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip ViT-g-14/laion2b_s12b_b42k', - 'repo_id': 'Marqo/onnx-open_clip-ViT-g-14', - 'visual_file': 'onnx16-open_clip-ViT-g-14-laion2b_s12b_b42k-visual.onnx', - 'textual_file': 'onnx16-open_clip-ViT-g-14-laion2b_s12b_b42k-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion2b_s12b_b42k', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/ViT-g-14/laion2b_s12b_b42k': - { - 'name': 'onnx32/open_clip/ViT-g-14/laion2b_s12b_b42k', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip ViT-g-14/laion2b_s12b_b42k', - 'repo_id': 'Marqo/onnx-open_clip-ViT-g-14', - 'visual_file': 'onnx32-open_clip-ViT-g-14-laion2b_s12b_b42k-visual.zip', - 'textual_file': 'onnx32-open_clip-ViT-g-14-laion2b_s12b_b42k-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'laion2b_s12b_b42k', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50/openai': - { - 'name': 'onnx16/open_clip/RN50/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx16-open_clip-RN50-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50/openai': - { - 'name': 'onnx32/open_clip/RN50/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx32-open_clip-RN50-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50/yfcc15m': - { - 'name': 'onnx16/open_clip/RN50/yfcc15m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx16-open_clip-RN50-yfcc15m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50/yfcc15m': - { - 'name': 'onnx32/open_clip/RN50/yfcc15m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx32-open_clip-RN50-yfcc15m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50/cc12m': - { - 'name': 'onnx16/open_clip/RN50/cc12m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50/cc12m', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx16-open_clip-RN50-cc12m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-cc12m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'cc12m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50/cc12m': - { - 'name': 'onnx32/open_clip/RN50/cc12m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50/cc12m', - 'repo_id': 'Marqo/onnx-open_clip-RN50', - 'visual_file': 'onnx32-open_clip-RN50-cc12m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-cc12m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'cc12m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50-quickgelu/openai': - { - 'name': 'onnx16/open_clip/RN50-quickgelu/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx16-open_clip-RN50-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50-quickgelu/openai': - { - 'name': 'onnx32/open_clip/RN50-quickgelu/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx32-open_clip-RN50-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50-quickgelu/yfcc15m': - { - 'name': 'onnx16/open_clip/RN50-quickgelu/yfcc15m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50-quickgelu/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx16-open_clip-RN50-quickgelu-yfcc15m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-quickgelu-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50-quickgelu/yfcc15m': - { - 'name': 'onnx32/open_clip/RN50-quickgelu/yfcc15m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50-quickgelu/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx32-open_clip-RN50-quickgelu-yfcc15m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-quickgelu-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50-quickgelu/cc12m': - { - 'name': 'onnx16/open_clip/RN50-quickgelu/cc12m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50-quickgelu/cc12m', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx16-open_clip-RN50-quickgelu-cc12m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50-quickgelu-cc12m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'cc12m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50-quickgelu/cc12m': - { - 'name': 'onnx32/open_clip/RN50-quickgelu/cc12m', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50-quickgelu/cc12m', - 'repo_id': 'Marqo/onnx-open_clip-RN50-quickgelu', - 'visual_file': 'onnx32-open_clip-RN50-quickgelu-cc12m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50-quickgelu-cc12m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'cc12m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN101/openai': - { - 'name': 'onnx16/open_clip/RN101/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN101/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN101', - 'visual_file': 'onnx16-open_clip-RN101-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN101-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN101/openai': - { - 'name': 'onnx32/open_clip/RN101/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN101/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN101', - 'visual_file': 'onnx32-open_clip-RN101-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN101-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None, - }, - - 'onnx16/open_clip/RN101/yfcc15m': - { - 'name': 'onnx16/open_clip/RN101/yfcc15m', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN101/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN101', - 'visual_file': 'onnx16-open_clip-RN101-yfcc15m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN101-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None, - }, - - 'onnx32/open_clip/RN101/yfcc15m': - { - 'name': 'onnx32/open_clip/RN101/yfcc15m', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN101/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN101', - 'visual_file': 'onnx32-open_clip-RN101-yfcc15m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN101-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN101-quickgelu/openai': - { - 'name': 'onnx16/open_clip/RN101-quickgelu/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN101-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN101-quickgelu', - 'visual_file': 'onnx16-open_clip-RN101-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN101-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN101-quickgelu/openai': - { - 'name': 'onnx32/open_clip/RN101-quickgelu/openai', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN101-quickgelu/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN101-quickgelu', - 'visual_file': 'onnx32-open_clip-RN101-quickgelu-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN101-quickgelu-openai-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN101-quickgelu/yfcc15m': - {'name': 'onnx16/open_clip/RN101-quickgelu/yfcc15m', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN101-quickgelu/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN101-quickgelu', - 'visual_file': 'onnx16-open_clip-RN101-quickgelu-yfcc15m-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN101-quickgelu-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN101-quickgelu/yfcc15m': - { - 'name': 'onnx32/open_clip/RN101-quickgelu/yfcc15m', - 'dimensions': 512, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN101-quickgelu/yfcc15m', - 'repo_id': 'Marqo/onnx-open_clip-RN101-quickgelu', - 'visual_file': 'onnx32-open_clip-RN101-quickgelu-yfcc15m-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN101-quickgelu-yfcc15m-textual.onnx', - 'token': None, - 'resolution': 224, - 'pretrained': 'yfcc15m', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50x4/openai': - { - 'name': 'onnx16/open_clip/RN50x4/openai', - 'dimensions': 640, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50x4/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x4', - 'visual_file': 'onnx16-open_clip-RN50x4-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50x4-openai-textual.onnx', - 'token': None, - 'resolution': 288, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50x4/openai': - { - 'name': 'onnx32/open_clip/RN50x4/openai', - 'dimensions': 640, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50x4/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x4', - 'visual_file': 'onnx32-open_clip-RN50x4-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50x4-openai-textual.onnx', - 'token': None, - 'resolution': 288, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50x16/openai': - { - 'name': 'onnx16/open_clip/RN50x16/openai', - 'dimensions': 768, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50x16/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x16', - 'visual_file': 'onnx16-open_clip-RN50x16-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50x16-openai-textual.onnx', - 'token': None, - 'resolution': 384, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50x16/openai': - { - 'name': 'onnx32/open_clip/RN50x16/openai', - 'dimensions': 768, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50x16/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x16', - 'visual_file': 'onnx32-open_clip-RN50x16-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50x16-openai-textual.onnx', - 'token': None, - 'resolution': 384, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx16/open_clip/RN50x64/openai': - { - 'name': 'onnx16/open_clip/RN50x64/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float16 version of open_clip RN50x64/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x64', - 'visual_file': 'onnx16-open_clip-RN50x64-openai-visual.onnx', - 'textual_file': 'onnx16-open_clip-RN50x64-openai-textual.onnx', - 'token': None, - 'resolution': 448, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - - 'onnx32/open_clip/RN50x64/openai': - { - 'name': 'onnx32/open_clip/RN50x64/openai', - 'dimensions': 1024, - 'type': 'clip_onnx', - 'note': 'the onnx float32 version of open_clip RN50x64/openai', - 'repo_id': 'Marqo/onnx-open_clip-RN50x64', - 'visual_file': 'onnx32-open_clip-RN50x64-openai-visual.onnx', - 'textual_file': 'onnx32-open_clip-RN50x64-openai-textual.onnx', - 'token': None, - 'resolution': 448, - 'pretrained': 'openai', - 'image_mean': None, - 'image_std': None - }, - } - return ONNX_CLIP_MODEL_PROPERTIES - - -def _get_fp16_clip_properties() -> Dict: - FP16_CLIP_MODEL_PROPERTIES = { - "fp16/ViT-L/14": { - "name": "fp16/ViT-L/14", - "dimensions": 768, - "type": "fp16_clip", - "notes": "The faster version (fp16, load from `cuda`) of openai clip model" - }, - 'fp16/ViT-B/32': - {"name": "fp16/ViT-B/32", - "dimensions": 512, - "notes": "The faster version (fp16, load from `cuda`) of openai clip model", - "type": "fp16_clip", - }, - 'fp16/ViT-B/16': - {"name": "fp16/ViT-B/16", - "dimensions": 512, - "notes": "The faster version (fp16, load from `cuda`) of openai clip model", - "type": "fp16_clip", - }, - } - - return FP16_CLIP_MODEL_PROPERTIES - - -def _get_random_properties() -> Dict: - RANDOM_MODEL_PROPERTIES = { - "random": - {"name": "random", - "dimensions": 384, - "tokens":128, - "type":"random", - "notes": ""}, - "random/large": - {"name": "random/large", - "dimensions": 768, - "tokens":128, - "type":"random", - "notes": ""}, - "random/small": - {"name": "random/small", - "dimensions": 32, - "tokens":128, - "type":"random", - "notes": ""}, - "random/medium": - {"name": "random/medium", - "dimensions": 128, - "tokens":128, - "type":"random", - "notes": ""}, - - } - return RANDOM_MODEL_PROPERTIES - -def _get_model_load_mappings() -> Dict: - return {'clip':CLIP, - 'open_clip': OPEN_CLIP, - 'sbert':SBERT, - 'test':TEST, - 'sbert_onnx':SBERT_ONNX, - 'clip_onnx': CLIP_ONNX, - "multilingual_clip" : MULTILINGUAL_CLIP, - "fp16_clip": FP16_CLIP, - 'random':Random, - 'hf':HF_MODEL, - 'no_model': NO_MODEL} - -def load_model_properties() -> Dict: - # also truncate the name if not already - sbert_model_properties = _get_sbert_properties() - sbert_model_properties.update({k.split('/')[-1]:v for k,v in sbert_model_properties.items()}) - - sbert_onnx_model_properties = _get_sbert_onnx_properties() - - clip_model_properties = _get_clip_properties() - test_model_properties = _get_sbert_test_properties() - random_model_properties = _get_random_properties() - hf_model_properties = _get_hf_properties() - open_clip_model_properties = _get_open_clip_properties() - onnx_clip_model_properties = _get_onnx_clip_properties() - multilingual_clip_model_properties = get_multilingual_clip_properties() - fp16_clip_model_properties = _get_fp16_clip_properties() - - # combine the above dicts - model_properties = dict(clip_model_properties.items()) - model_properties.update(sbert_model_properties) - model_properties.update(test_model_properties) - model_properties.update(sbert_onnx_model_properties) - model_properties.update(random_model_properties) - model_properties.update(hf_model_properties) - model_properties.update(open_clip_model_properties) - model_properties.update(onnx_clip_model_properties) - model_properties.update(multilingual_clip_model_properties) - model_properties.update(fp16_clip_model_properties) - - all_properties = dict() - all_properties['models'] = model_properties - - all_properties['loaders'] = dict() - for key,val in _get_model_load_mappings().items(): - all_properties['loaders'][key] = val - - return all_properties diff --git a/src/marqo/s2_inference/onnx_clip_utils.py b/src/marqo/s2_inference/onnx_clip_utils.py index 8a54a0ab7..b5963275b 100644 --- a/src/marqo/s2_inference/onnx_clip_utils.py +++ b/src/marqo/s2_inference/onnx_clip_utils.py @@ -14,7 +14,6 @@ from marqo.s2_inference.logger import get_logger import onnxruntime as ort from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize -import marqo.s2_inference.model_registry as model_registry from zipfile import ZipFile from huggingface_hub.utils import RevisionNotFoundError,RepositoryNotFoundError, EntryNotFoundError, LocalEntryNotFoundError from marqo.s2_inference.errors import ModelDownloadError @@ -60,6 +59,7 @@ class CLIP_ONNX(object): def __init__(self, model_name: str ="onnx32/openai/ViT-L/14", device: str = None, embedding_dim: int = None, truncate: bool = True, load=True, **kwargs): + from marqo.s2_inference.s2_inference import get_model_properties_from_registry self.model_name = model_name self.onnx_type, self.source, self.clip_model = self.model_name.split("/", 2) if not device: @@ -70,7 +70,7 @@ def __init__(self, model_name: str ="onnx32/openai/ViT-L/14", device: str = None "CPUExecutionProvider"] self.visual_session = None self.textual_session = None - self.model_info = model_registry._get_onnx_clip_properties()[self.model_name] + self.model_info = get_model_properties_from_registry(self.model_name) self.visual_type = np.float16 if self.onnx_type == "onnx16" else np.float32 self.textual_type = np.int64 if self.source == "open_clip" else np.int32 diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index d53ddc7cf..6d2706c18 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -2,12 +2,13 @@ The functions defined here would have endpoints, later on. """ import numpy as np +from marqo_commons.model_registry.model_registry import get_model_properties_dict from marqo.errors import ModelCacheManagementError, InvalidArgError, ConfigurationError, InternalError, BadRequestError from marqo.s2_inference.errors import ( VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotInCacheError, ModelDownloadError, IllegalVectoriseError) from PIL import UnidentifiedImageError -from marqo.s2_inference.model_registry import load_model_properties +from marqo.s2_inference.model_loaders import get_model_loaders from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length from marqo.s2_inference.types import * from marqo.s2_inference.logger import get_logger @@ -29,7 +30,8 @@ available_models = dict() # A lock to protect the model loading process lock = threading.Lock() -MODEL_PROPERTIES = load_model_properties() +MODEL_PROPERTIES = get_model_properties_dict() +MODEL_LOADERS = get_model_loaders() def vectorise(model_name: str, content: Union[str, List[str]], model_properties: dict = None, @@ -309,8 +311,8 @@ def get_model_size(model_name: str, model_properties: dict) -> (int, float): Return the model size for given model Note that the priorities are size_in_properties -> model_name -> model_type -> default size ''' - if "model_size" in model_properties: - return model_properties["model_size"] + if "memory_size" in model_properties: + return model_properties["memory_size"] name_info = (model_name + model_properties.get("name", "")).lower().replace("/", "-") for name, size in constants.MODEL_NAME_SIZE_MAPPING.items(): @@ -384,11 +386,11 @@ def get_model_properties_from_registry(model_name: str) -> dict: dict: a dictionary describing properties of the model. """ - if model_name not in MODEL_PROPERTIES['models']: + if model_name not in MODEL_PROPERTIES: raise UnknownModelError(f"Could not find model properties in model registry for model={model_name}. " f"Model is not supported by default.") - return MODEL_PROPERTIES['models'][model_name] + return MODEL_PROPERTIES[model_name] def _check_output_type(output: List[List[float]]) -> bool: @@ -520,13 +522,14 @@ def _get_model_loader(model_name: str, model_properties: dict) -> Any: # `no_model` is a special case, we use the name instead of type. if model_name == SpecialModels.no_model: - return MODEL_PROPERTIES['loaders'][model_name] + return MODEL_LOADERS[model_name] model_type = model_properties['type'] - if model_type not in MODEL_PROPERTIES['loaders']: - raise KeyError(f"model_name={model_name} for model_type={model_type} not in allowed model types") - return MODEL_PROPERTIES['loaders'][model_type] + if model_type not in MODEL_LOADERS: + raise KeyError(f"model_type={model_type} of model_name={model_name} is not in allowed model types") + + return MODEL_LOADERS[model_type] def get_available_models(): diff --git a/src/marqo/tensor_search/models/settings_object.py b/src/marqo/tensor_search/models/settings_object.py deleted file mode 100644 index 5b9393118..000000000 --- a/src/marqo/tensor_search/models/settings_object.py +++ /dev/null @@ -1,231 +0,0 @@ -""" -The settings object should be validated by JSON schema, rather than PyDantic, so that it can be used as a template for -documentation and potentially front-end validation (for usability). -""" -from marqo.tensor_search import enums as ns_enums -from marqo.tensor_search.enums import IndexSettingsField as NsFields, EnvVars, ObjectStores -from marqo.tensor_search.utils import read_env_vars_and_defaults, read_env_vars_and_defaults_ints - -settings_schema = { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "type": "object", - "required": [ - NsFields.index_defaults, - NsFields.number_of_shards, - NsFields.number_of_replicas - ], - "additionalProperties": False, - "properties": { - NsFields.index_defaults: { - "type": "object", - "required": [ - NsFields.treat_urls_and_pointers_as_images, - NsFields.model, - NsFields.normalize_embeddings, - NsFields.text_preprocessing, - NsFields.image_preprocessing - ], - "additionalProperties": False, - "properties": { - NsFields.treat_urls_and_pointers_as_images: { - "type": "boolean", - "examples": [ - False - ] - }, - NsFields.model: { - "type": "string", - "examples": [ - "hf/all_datasets_v4_MiniLM-L6" - ] - }, - NsFields.model_properties: { - "type": "object", - }, - NsFields.normalize_embeddings: { - "type": "boolean", - "examples": [ - True - ] - }, - NsFields.text_preprocessing: { - "type": "object", - "required": [ - NsFields.split_length, - NsFields.split_overlap, - NsFields.split_method - ], - "properties": { - NsFields.split_length: { - "type": "integer", - "examples": [ - 2 - ] - }, - NsFields.split_overlap: { - "type": "integer", - "examples": [ - 0 - ] - }, - NsFields.split_method: { - "type": "string", - "examples": [ - "sentence" - ] - } - }, - "examples": [{ - NsFields.split_length: 2, - NsFields.split_overlap: 0, - NsFields.split_method: "sentence" - }] - }, - NsFields.image_preprocessing: { - "type": "object", - "required": [ - NsFields.patch_method - ], - "properties": { - NsFields.patch_method: { - "type": ["null", "string"], - "examples": [ - None - ] - } - }, - "examples": [{ - NsFields.patch_method: None - }] - }, - NsFields.ann_parameters: { - "type": "object", - "required": [ - # Non required for backwards compatibility - ], - "properties": { - NsFields.ann_method: { - "type": "string", - "enum": ["hnsw"], - "examples": [ - "hnsw" - ] - }, - NsFields.ann_engine: { - "type": "string", - "enum": ["lucene"], - "examples": [ - "lucene" - ] - }, - NsFields.ann_metric: { - "type": "string", - "enum": ["l1", "l2", "linf", "cosinesimil"], - "examples": [ - "cosinesimil" - ] - }, - NsFields.ann_method_parameters: { - "type": "object", - "required": [], - "properties": { - NsFields.hnsw_ef_construction: { - "type": "integer", - "minimum": 1, - "maximum": read_env_vars_and_defaults_ints(EnvVars.MARQO_EF_CONSTRUCTION_MAX_VALUE), - "examples": [ - 128 - ] - }, - NsFields.hnsw_m: { - "type": "integer", - "minimum": 2, - "maximum": 100, - "examples": [ - 16 - ] - }, - }, - "examples": [{ - NsFields.hnsw_ef_construction: 128, - NsFields.hnsw_m: 16 - }] - } - }, - "examples": [{ - NsFields.ann_method: "hnsw", - NsFields.ann_engine: "lucene", - NsFields.ann_metric: "cosinesimil", - NsFields.ann_method_parameters: { - NsFields.hnsw_ef_construction: 128, - NsFields.hnsw_m: 16 - } - }] - } - }, - "examples": [{ - NsFields.treat_urls_and_pointers_as_images: False, - NsFields.model: "hf/all_datasets_v4_MiniLM-L6", - NsFields.normalize_embeddings: True, - NsFields.text_preprocessing: { - NsFields.split_length: 2, - NsFields.split_overlap: 0, - NsFields.split_method: "sentence" - }, - NsFields.image_preprocessing: { - NsFields.patch_method: None - }, - NsFields.ann_parameters: { - NsFields.ann_method: "hnsw", - NsFields.ann_engine: "lucene", - NsFields.ann_metric: "cosinesimil", - NsFields.ann_method_parameters: { - NsFields.hnsw_ef_construction: 128, - NsFields.hnsw_m: 16 - } - } - }] - }, - NsFields.number_of_shards: { - "type": "integer", - "minimum": 1, - "examples": [ - 5 - ] - }, - NsFields.number_of_replicas: { - "type": "integer", - "minimum": 0, - "maximum": read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_NUMBER_OF_REPLICAS), - "examples": [ - 1 - ] - }, - }, - "examples": [{ - NsFields.index_defaults: { - NsFields.treat_urls_and_pointers_as_images: False, - NsFields.model: "hf/all_datasets_v4_MiniLM-L6", - NsFields.normalize_embeddings: True, - NsFields.text_preprocessing: { - NsFields.split_length: 2, - NsFields.split_overlap: 0, - NsFields.split_method: "sentence" - }, - NsFields.image_preprocessing: { - NsFields.patch_method: None - }, - NsFields.ann_parameters: { - NsFields.ann_method: "hnsw", - NsFields.ann_engine: "lucene", - NsFields.ann_metric: "cosinesimil", - NsFields.ann_method_parameters: { - NsFields.hnsw_ef_construction: 128, - NsFields.hnsw_m: 16 - } - } - }, - NsFields.number_of_shards: 3, - NsFields.number_of_replicas: 0 - }] -} diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 358dafacc..e45481323 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -38,6 +38,8 @@ import functools import pprint import typing + +from marqo.errors import InvalidArgError from marqo.tensor_search.models.private_models import ModelAuth import uuid from typing import List, Optional, Union, Iterable, Sequence, Dict, Any, Tuple @@ -52,6 +54,7 @@ ) from marqo.tensor_search.enums import IndexSettingsField as NsField from marqo.tensor_search import utils, backend, validation, configs, add_docs, filtering + from marqo.tensor_search.formatting import _clean_doc from marqo.tensor_search.index_meta_cache import get_cache, get_index_info from marqo.tensor_search import index_meta_cache @@ -61,7 +64,7 @@ from marqo.tensor_search.models.external_apis.abstract_classes import ExternalAuth from marqo.tensor_search.telemetry import RequestMetricsStore from marqo.tensor_search.health import generate_heath_check_response -from marqo.tensor_search.utils import add_timing +from marqo.tensor_search.utils import add_timing, read_env_vars_and_defaults_ints from marqo.tensor_search import delete_docs from marqo.s2_inference.processing import text as text_processor from marqo.s2_inference.processing import image as image_processor @@ -76,6 +79,10 @@ from marqo.config import Config from marqo import errors from marqo.s2_inference import errors as s2_inference_errors + +from marqo_commons.settings_validation.settings_validation import validate_index_settings +from marqo_commons.shared_utils.errors import InvalidSettingsArgError + import threading from dataclasses import replace from marqo.tensor_search.tensor_search_logging import get_logger @@ -143,7 +150,17 @@ def create_vector_index( else: the_index_settings = configs.get_default_index_settings() - validation.validate_settings_object(settings_object=the_index_settings) + try: + """Validates the index settings using validate_index_settings function from marqo-commons. + validate_index_settings on error raises InvalidSettingsArgError from marqo-commons. + To propagate native error catches InvalidSettingsArgError and raises InvalidArgError from marqo.""" + validate_index_settings( + settings_to_validate=the_index_settings, + MAX_EF_CONSTRUCTION_VALUE=read_env_vars_and_defaults_ints(EnvVars.MARQO_EF_CONSTRUCTION_MAX_VALUE), + MAX_NUMBER_OF_REPLICAS=read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_NUMBER_OF_REPLICAS), + ) + except InvalidSettingsArgError as e: + raise InvalidArgError(e) vector_index_settings = { "settings": { diff --git a/src/marqo/tensor_search/validation.py b/src/marqo/tensor_search/validation.py index a3df43ff1..ddf0e22a5 100644 --- a/src/marqo/tensor_search/validation.py +++ b/src/marqo/tensor_search/validation.py @@ -12,9 +12,7 @@ from marqo.tensor_search.enums import TensorField, SearchMethod from marqo.tensor_search import constants from marqo.tensor_search.models.search import SearchContext - from marqo.tensor_search.models.delete_docs_objects import MqDeleteDocsRequest -from marqo.tensor_search.models.settings_object import settings_schema from marqo.tensor_search.models.mappings_object import ( mappings_schema, multimodal_combination_mappings_schema, @@ -418,24 +416,7 @@ def validate_index_name(name: str) -> str: return name -def validate_settings_object(settings_object): - """validates index settings. - Returns - The given index settings if validation has passed - - Raises an InvalidArgError if the settings object is badly formatted - """ - try: - jsonschema.validate(instance=settings_object, schema=settings_schema) - return settings_object - except jsonschema.ValidationError as e: - raise InvalidArgError( - f"Error validating index settings object. Reason: \n{str(e)}" - f"\nRead about the index settings object here: https://docs.marqo.ai/0.0.13/API-Reference/indexes/#body" - ) - - -def validate_dict(field: str, field_content: Dict, is_non_tensor_field: bool, mappings: Dict, index_model_dimensions: int = None): +def validate_dict(field: str, field_content: Dict, is_non_tensor_field: bool, mappings: Dict, index_model_dimensions: int = None) -> Dict: ''' Args: diff --git a/tests/s2_inference/test_clip_onnx_utils.py b/tests/s2_inference/test_clip_onnx_utils.py index e76218240..7f279f8c3 100644 --- a/tests/s2_inference/test_clip_onnx_utils.py +++ b/tests/s2_inference/test_clip_onnx_utils.py @@ -7,7 +7,6 @@ from unittest import mock import requests # NOTE: circular reference between model_registry & onnx_clip_utils -import marqo.s2_inference.model_registry as model_registry from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX from marqo.tensor_search.enums import ModelProperties from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py index fff246e44..146830db6 100644 --- a/tests/s2_inference/test_encoding.py +++ b/tests/s2_inference/test_encoding.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock, patch from marqo.s2_inference.types import FloatTensor from marqo.s2_inference.s2_inference import clear_loaded_models, get_model_properties_from_registry -from marqo.s2_inference.model_registry import load_model_properties, _get_open_clip_properties +from marqo_commons.model_registry.model_properties_data.open_clip_properties import _get_open_clip_properties from marqo.s2_inference.s2_inference import _convert_tensor_to_numpy import numpy as np import functools diff --git a/tests/tensor_search/test_create_index.py b/tests/tensor_search/test_create_index.py index b6115c5ba..3a576b040 100644 --- a/tests/tensor_search/test_create_index.py +++ b/tests/tensor_search/test_create_index.py @@ -11,7 +11,6 @@ from tests.marqo_test import MarqoTestCase from marqo.tensor_search.enums import IndexSettingsField as NsField, TensorField from unittest import mock -from marqo.tensor_search.models.settings_object import settings_schema from marqo import errors from marqo.errors import InvalidArgError @@ -98,7 +97,7 @@ def test_create_vector_index__invalid_settings(self): except IndexNotFoundError as s: pass - with self.assertRaises(errors.InvalidArgError): + with self.assertRaises(InvalidArgError): print(f"index settings={idx_defaults}") tensor_search.create_vector_index( config=self.config, @@ -335,8 +334,7 @@ def test_set_number_of_shards(self): def test_set_number_of_replicas(self): intended_replicas_count = 4 - from marqo.tensor_search.models.settings_object import settings_schema - with patch.dict(settings_schema["properties"][NsField.number_of_replicas], maximum=10): + with patch.dict(os.environ, {EnvVars.MARQO_MAX_NUMBER_OF_REPLICAS: "10"}): res_0 = tensor_search.create_vector_index( index_name=self.index_name_1, config=self.config, index_settings={ @@ -358,9 +356,8 @@ def test_configurable_max_number_of_replicas(self): maximum_number_of_replicas = 5 large_intended_replicas_count = 10 small_intended_replicas_count = 3 - from marqo.tensor_search.models.settings_object import settings_schema - with patch.dict(settings_schema["properties"][NsField.number_of_replicas], maximum=maximum_number_of_replicas): + with patch.dict(os.environ, {EnvVars.MARQO_MAX_NUMBER_OF_REPLICAS: str(maximum_number_of_replicas)}): # a large value exceeding limits should not work try: res_0 = tensor_search.create_vector_index( @@ -425,6 +422,92 @@ def test_configurable_max_number_of_replicas(self): assert maximum_number_of_replicas == int( resp.json()[self.index_name_1]['settings']['index']['number_of_replicas']) + def test_configurable_ef_construction_value(self): + maximum_ef_construction_value = 100 + large_intended_ef_construction_value = 200 + small_intended_ef_construction_value = 50 + + with patch.dict(os.environ, {EnvVars.MARQO_EF_CONSTRUCTION_MAX_VALUE: str(maximum_ef_construction_value)}): + # a large value exceeding limits should not work + try: + res_0 = tensor_search.create_vector_index( + index_name=self.index_name_1, config=self.config, + index_settings={ + "index_defaults": { + "treat_urls_and_pointers_as_images": True, + "model": "ViT-B/32", + "ann_parameters": { + "parameters": { + "ef_construction": large_intended_ef_construction_value + } + }, + }, + } + ) + raise AssertionError + except InvalidArgError as e: + pass + + try: + tensor_search.delete_index(config=self.config, index_name=self.index_name_1) + except IndexNotFoundError: + pass + + # a small value should work + res_1 = tensor_search.create_vector_index( + index_name=self.index_name_1, config=self.config, + index_settings={ + "index_defaults": { + "treat_urls_and_pointers_as_images": True, + "model": "ViT-B/32", + "ann_parameters": { + "parameters": { + "ef_construction": small_intended_ef_construction_value + } + }, + } + } + ) + resp = requests.get( + url=self.authorized_url + f"/{self.index_name_1}", + headers=self.generic_header, + verify=False + ) + assert small_intended_ef_construction_value == int( + resp.json()[self.index_name_1]["mappings"]["_meta"]["index_settings"] + ["index_defaults"]["ann_parameters"]["parameters"]["ef_construction"] + ) + + try: + tensor_search.delete_index(config=self.config, index_name=self.index_name_1) + except IndexNotFoundError: + pass + + # the same number should also work + res_1 = tensor_search.create_vector_index( + index_name=self.index_name_1, config=self.config, + index_settings={ + "index_defaults": { + "treat_urls_and_pointers_as_images": True, + "model": "ViT-B/32", + "ann_parameters": { + "parameters": { + "ef_construction": maximum_ef_construction_value + } + }, + }, + } + ) + resp = requests.get( + url=self.authorized_url + f"/{self.index_name_1}", + headers=self.generic_header, + verify=False + ) + assert maximum_ef_construction_value == int( + resp.json()[self.index_name_1]["mappings"]["_meta"]["index_settings"] + ["index_defaults"]["ann_parameters"]["parameters"]["ef_construction"] + ) + def test_default_max_number_of_replicas(self): large_intended_replicas_count = 2 small_intended_replicas_count = 0 @@ -622,7 +705,7 @@ def test_index_validation_bad(self): try: tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1, index_settings=bad_settings) raise AssertionError - except errors.InvalidArgError as e: + except InvalidArgError as e: pass def test_index_validation_good(self): diff --git a/tests/tensor_search/test_validation.py b/tests/tensor_search/test_validation.py index ad70908f5..4b34bfc83 100644 --- a/tests/tensor_search/test_validation.py +++ b/tests/tensor_search/test_validation.py @@ -5,6 +5,7 @@ import unittest from unittest import mock from unittest.mock import patch + from marqo.tensor_search.models.score_modifiers_object import ScoreModifier from marqo.tensor_search.models.delete_docs_objects import MqDeleteDocsRequest from marqo.tensor_search.models.search import SearchContext @@ -14,6 +15,7 @@ InvalidIndexNameError ) + class TestValidation(unittest.TestCase): def setUp(self) -> None: @@ -361,293 +363,7 @@ def test_searchable_attributes_below_limit(self): search_method=enums.SearchMethod.TENSOR ) - class TestValidateIndexSettings(unittest.TestCase): - - @staticmethod - def get_good_index_settings(): - return { - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5, - "number_of_replicas":1 - } - - def test_validate_index_settings(self): - - good_settings =[ - { - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5, - "number_of_replicas": 1 - }, - { # extra field in text_preprocessing: OK - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence", - "blah blah blah": "woohoo" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5, - "number_of_replicas": 1 - }, - { # extra field in image_preprocessing: OK - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence", - }, - "image_preprocessing": { - "patch_method": None, - "blah blah blah": "woohoo" - } - }, - "number_of_shards": 5, - "number_of_replicas": 1 - } - ] - for settings in good_settings: - assert settings == validation.validate_settings_object(settings) - - def test_validate_index_settings_model_properties(self): - good_settings = self.get_good_index_settings() - good_settings['index_defaults']['model_properties'] = dict() - assert good_settings == validation.validate_settings_object(good_settings) - - def test_validate_index_settings_bad(self): - bad_settings = [{ - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": "2", - "split_overlap": "0", - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5, - "number_of_replicas" : -1 - }, - { - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": "2", - "split_overlap": "0", - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5 - }, - ] - for bad_setting in bad_settings: - try: - validation.validate_settings_object(bad_setting) - raise AssertionError - except InvalidArgError as e: - pass - - def test_validate_index_settings_missing_text_preprocessing(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - del settings['index_defaults']['text_preprocessing'] - try: - validation.validate_settings_object(settings) - raise AssertionError - except InvalidArgError: - pass - - def test_validate_index_settings_missing_model(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - del settings['index_defaults']['model'] - try: - validation.validate_settings_object(settings) - raise AssertionError - except InvalidArgError: - pass - - def test_validate_index_settings_missing_index_defaults(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - del settings['index_defaults'] - try: - validation.validate_settings_object(settings) - raise AssertionError - except InvalidArgError: - pass - - def test_validate_index_settings_bad_number_shards(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - settings['number_of_shards'] = -1 - try: - validation.validate_settings_object(settings) - raise AssertionError - except InvalidArgError as e: - pass - - def test_validate_index_settings_bad_number_replicas(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - settings['number_of_replicas'] = -1 - try: - validation.validate_settings_object(settings) - raise AssertionError - except InvalidArgError as e: - pass - - def test_validate_index_settings_img_preprocessing(self): - settings = self.get_good_index_settings() - # base good settings should be OK - assert settings == validation.validate_settings_object(settings) - settings['index_defaults']['image_preprocessing']["path_method"] = "frcnn" - assert settings == validation.validate_settings_object(settings) - - def test_validate_index_settings_misplaced_fields(self): - bad_settings = [ - { - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5, - "model": "hf/all_datasets_v4_MiniLM-L6" # model is also outside, here... - }, - { - "index_defaults": { - "image_preprocessing": { - "patch_method": None # no models here - }, - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_method": "sentence", - "split_overlap": 0 - }, - "treat_urls_and_pointers_as_images": False - }, - "model": "open_clip/ViT-L-14/laion2b_s32b_b82k", # model here (bad) - "number_of_shards": 5, - "treat_urls_and_pointers_as_images": True - }, - { - "index_defaults": { - "image_preprocessing": { - "patch_method": None, - "model": "open_clip/ViT-L-14/laion2b_s32b_b82k", - }, - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_method": "sentence", - "split_overlap": 0 - }, - "treat_urls_and_pointers_as_images": False, - "number_of_shards": 5, # shouldn't be here - }, - "treat_urls_and_pointers_as_images": True - }, - { # good, BUT extra field in index_defaults - "index_defaults": { - "number_of_shards": 5, - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5 - }, - { # good, BUT extra field in root - "model": "hf/all_datasets_v4_MiniLM-L6", - "index_defaults": { - "treat_urls_and_pointers_as_images": False, - "model": "hf/all_datasets_v4_MiniLM-L6", - "normalize_embeddings": True, - "text_preprocessing": { - "split_length": 2, - "split_overlap": 0, - "split_method": "sentence" - }, - "image_preprocessing": { - "patch_method": None - } - }, - "number_of_shards": 5 - } - ] - for bad_set in bad_settings: - try: - validation.validate_settings_object(bad_set) - raise AssertionError - except InvalidArgError as e: - pass - def test_validate_mappings(self): mappings = [ {