In [1]:
from transformers import AutoProcessor, AutoModel
import torch
import numpy as np
import faiss
from PIL import Image
import os
import glob

import open_clip

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import torch
from transformers import AutoModel, AutoProcessor
from PIL import Image
import requests


# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load model and processor
model_name = "Marqo/marqo-ecommerce-embeddings-L"
try:
    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map=device)
    model.eval()
except Exception as e:
    print(f"Error loading model: {e}")
    # Optional: Use Hugging Face token if authentication issue
    token = ""  # Replace with your token if needed
    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True, token=token, force_download=True)
    model = AutoModel.from_pretrained(model_name, trust_remote_code=True, token=token, device_map=device, force_download=True)
    model.eval()

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]
You are using a model of type siglip to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


Error loading model: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.


Fetching 1 files: 100%|██████████| 1/1 [00:01<00:00,  1.24s/it]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
You are using a model of type siglip to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Error while downloading from https://cdn-lfs-us-1.hf.co/repos/2b/ba/2bba274ddab7c3d74e51a70aa38841cffc67797619081eb47c9b29b09c0ea4a1/5f54e3323fc98caddba9626aa9771efd873c3cb9d63cc65b4619c2ccb6213e4e?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1757436317&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1NzQzNjMxN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzJiL2JhLzJiYmEyNzRkZGFiN2MzZDc0ZTUxYTcwYWEzODg0MWNmZmM2Nzc5NzYxOTA4MWViNDdjOWIyOWIwOWMwZWE0YTEvNWY1NGUzMzIzZmM5OGNhZGRiYTk2MjZhYTk3NzFlZmQ4NzNjM2NiOWQ2M2NjNjViNDYxOWMyY2NiNjIxM2U0ZT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoi

NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.

In [8]:
def get_embedding(image):
    inputs = processor(images=[image], padding='max_length',return_tensors="pt")
    # processor.image_processor.do_rescale = False
    with torch.no_grad():
        outputs = model(**inputs)
        print(outputs)
        # Use CLS token embedding
        # embedding = outputs.last_hidden_state[:, 0, :].squeeze()
    # return embedding.numpy()
    return outputs

image = '../data/test_output/cropped_image_2.jpg'
image = Image.open(image)

embedding = get_embedding(image)

embedding

RuntimeError: The size of tensor a (256) must match the size of tensor b (196) at non-singleton dimension 1

In [None]:
class TransformerEmbedding:
    def __init__(self, model_name):
        self.processor = AutoImageProcessor.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        
    def get_embedding(self, image):
        inputs = self.processor(images=image, return_tensors='pt')
        with torch.no_grad():
            outputs = self.model(**inputs)
            embed = outputs['pooler_output'].squeeze()
            # embed = embed.reshape(-1) if not use squeeze
            embed = torch.nn.functional.normalize(embed, dim=0)
        return embed.numpy()       

In [None]:
class FaissManager:
    def __init__(self, embedding_dim):
        self.index = faiss.IndexFlatIP(embedding_dim)
        
    def add_reference_images(self, embeddings, index_path):
        vector = np.array(embeddings).astype(np.float32)
        self.index.add(vector)
        faiss.write_index(self.index, index_path)
        return self.index
    
    def load_index(self, index_path):
        self.index = faiss.read_index(index_path)
        return self.index
    
    def find_similarity(self, query_embedding, index_path, top_k = 3):
        self.index = self.load_index(index_path=index_path)
        
        query_features = query_embedding.astype(np.float32).reshape(1, -1)
        
        distances, indices = self.index.search(query_features, top_k)
        
        return distances, indices

In [None]:
class DBEmbedding:
    def __init__(self, model):
        self.image_path = []
        self.embedding_lis = []
        self.model = model
    def get_db_embedding(self, db_dir):
        self.image_path = glob.glob(os.path.join(db_dir, '*/*.jpg'))
        for img in self.image_path:
            image = Image.open(img)
            embedding = self.model.get_embedding(image)
            self.embedding_lis.append(embedding)
        return self.embedding_lis

In [None]:
DB_DIR = '../data/db'

db_images = os.listdir(DB_DIR)
db_image_paths = glob.glob(os.path.join(DB_DIR, '*/*.jpg'))
emb_model = TransformerEmbedding(model_name)

embedding_db = DBEmbedding(emb_model)

embedding_lis = embedding_db.get_db_embedding(DB_DIR)

embedding_dim = len(embedding_lis[0])
embedding_dim

In [None]:
faiss_manager = FaissManager(embedding_dim=embedding_dim)

INDEX_PATH = '../data/faiss_data/vector.index'

faiss_manager.add_reference_images(embeddings=embedding_lis, index_path=INDEX_PATH)