In [4]:
pip install torch torchvision torchaudio transformers sentencepiece

Collecting torch
  Downloading torch-2.9.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting torchaudio
  Downloading torchaudio-2.9.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (6.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)


In [2]:
import os
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModel
from google.cloud import aiplatform

In [30]:
PROJECT_ID = "aiml-cpf-th-food-axmt"  
REGION = "asia-southeast1"          
INDEX_ENDPOINT_ID = "1435091372666257408"
QUERY_IMAGE_PATH = "datatest/AI1/20251112_112304(0).jpg"
NUM_NEIGHBORS = 5

In [27]:
aiplatform.init(project=PROJECT_ID, location=REGION)

In [3]:
try:
    processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
    model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
    model.eval()
    print("SigLIP model loaded successfully.")
except Exception as e:
    print(f"Error loading SigLIP model: {e}")
    exit()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/813M [00:00<?, ?B/s]

SigLIP model loaded successfully.


In [10]:
def create_query_embedding(image_path: str) -> list:
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Query image not found at: {image_path}")

    image = Image.open(image_path).convert("RGB")
    
    # Pre-processing
    inputs = processor(images=image, return_tensors="pt")
    
    # Inference
    with torch.no_grad():
        outputs = model.get_image_features(**inputs)
        query_vector = outputs / outputs.norm(p=2, dim=-1, keepdim=True)
    
    return query_vector.squeeze(0).tolist()

In [29]:
def vector_search(query_image_path: str, endpoint_id: str, num_neighbors: int):
    try:
        query_embedding_vector = create_query_embedding(query_image_path)
        
        index_endpoint = aiplatform.IndexEndpoint(index_endpoint_name=endpoint_id)
        
        # filter_condition = [
        #     {"namespace": "metadata.label_class", "allow_list": ["AI1"]} # use namespace 'metadata.label_class'
        # ]
        filter_condition = None
        
        try:
            response = index_endpoint.find_neighbors(
                queries=[query_embedding_vector],
                num_neighbors=NUM_NEIGHBORS,
                # filter=filter_condition #
            )
            print(response)
        except Exception as e:
            print(f"ERROR: {e}")
            return
        
    except Exception as e:
        print(f"Error creating embedding: {e}")
        return
    return 

In [31]:
res = vector_search(QUERY_IMAGE_PATH, INDEX_ENDPOINT_ID, NUM_NEIGHBORS)

Error creating embedding: module 'google.cloud.aiplatform' has no attribute 'IndexEndpoint'
