In [2]:
from PIL import Image
import pandas as pd
import torch
from transformers import CLIPProcessor, CLIPModel

# Load the dataset
dataset_path = 'reverse_image_search.csv'  # Replace with your dataset path
df = pd.read_csv(dataset_path)

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")



In [3]:
embeddings = []

for index, row in df.iterrows():
    image_path = row['path']  # Assuming the path is in a column named 'path'
    image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB
    inputs = processor(images=image, return_tensors="pt")
    image_features = model.get_image_features(**inputs)
    # Ensure the tensor is detached from the computational graph before converting
    embeddings.append(image_features.squeeze(0).detach().numpy().tolist())


In [4]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

# Milvus parameters
HOST = '127.0.0.1'
PORT = '19530'
TOPK = 13

In [5]:
connections.connect(host=HOST, port=PORT)
dim = 512  # Dimension of the embeddings
METRIC_TYPE = 'L2'  # You can choose 'L2', 'IP', etc., based on your requirement
INDEX_TYPE = 'IVF_FLAT'  # Index type

['user_collection_id_1',
 'user_collection_id_3',
 'transformers',
 'image_based_search',
 'image_based_search_transformers',
 'text_based_search',
 'user_collection_id_2']

In [16]:
def create_milvus_collection(collection_name):
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, 
                    is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params': {"nlist": dim}
    }
    collection.create_index(field_name='embedding', index_params=index_params)
    return collection

In [17]:
user_ids= [1,2,3]

In [20]:
for userId in user_ids:    
    collection_name = 'user_collection_id_'+ (str)(userId)
    collection = create_milvus_collection(collection_name)
    


In [8]:
collection = create_milvus_collection(collection_name)

In [22]:
utility.list_collections()

['user_collection_id_1',
 'user_collection_id_3',
 'transformers',
 'image_based_search',
 'image_based_search_transformers',
 'text_based_search',
 'user_collection_id_2']

In [23]:
#utility.drop_collection(collection_name)