In [2]:
from PIL import Image
import pandas as pd
import torch
from transformers import CLIPProcessor, CLIPModel

# Load the dataset
dataset_path = 'reverse_image_search.csv'  # Replace with your dataset path
df = pd.read_csv(dataset_path)

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")




In [None]:
# Initialize a list to store tensors
embeddings = []

for index, row in df.iterrows():
    image_path = row['path']  # Assuming the path is in a column named 'path'
    image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB
    inputs = processor(images=image, return_tensors="pt")
    image_features = model.get_image_features(**inputs)
    # Ensure the tensor is detached from the computational graph before converting
    embeddings.append(image_features.squeeze(0).detach().numpy().tolist())

# Concatenate all feature vectors into a single tensor
#image_features_tensor = torch.stack(embeddings)

# image_features_tensor now contains the feature vectors for all images in your dataset>

In [3]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

# Milvus parameters
HOST = '127.0.0.1'
PORT = '19530'
TOPK = 13

In [4]:
connections.connect(host=HOST, port=PORT)
collection_name = 'tranformers_clip_patch16'
dim = 512  # Dimension of the embeddings
METRIC_TYPE = 'L2'  # You can choose 'L2', 'IP', etc., based on your requirement
INDEX_TYPE = 'IVF_FLAT'  # Index type

In [14]:
utility.list_collections()

[]

In [13]:
utility.drop_collection("tranformers_clip")

In [121]:
def create_milvus_collection(collection_name, dim):
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, 
                    is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params': {"nlist": 512}
    }
    collection.create_index(field_name='embedding', index_params=index_params)
    return collection

In [122]:
collection = create_milvus_collection(collection_name, dim)

In [123]:
paths = df['path'].tolist()


In [124]:
entities = [[path for path in paths],
            [embedding for embedding in embeddings]]
#preparing for insertion to milvus

In [125]:
mr = collection.insert(entities)


In [126]:
collection = Collection(collection_name)      # Get an existing collection.
collection.load()

In [127]:
search_params = {
    "metric_type": "L2", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 10}
}

In [128]:

# search with image

query_image_path = 'aleren.jpeg'  
query_image = Image.open(query_image_path).convert('RGB')  
query_inputs = processor(images=query_image, return_tensors="pt")
query_image_features = model.get_image_features(**query_inputs)
embedding = query_image_features.squeeze(0).detach().numpy().tolist()


# Concatenate all feature vectors into a single tensor
#image_features_tensor = torch.stack(embeddings)

# image_features_tensor now contains the feature vectors for all images in your dataset

results = collection.search(
    data=[embedding], 
    anns_field="embedding", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
)



In [129]:
results[0].ids


['./train/basketball/n02802426_7656.JPEG',
 './train/basketball/n02802426_24958.JPEG',
 './train/horizontal_bar/n03535780_16077.JPEG',
 './train/horizontal_bar/n03535780_18270.JPEG',
 './train/basketball/n02802426_12782.JPEG',
 './train/basketball/n02802426_9952.JPEG',
 './train/basketball/n02802426_26718.JPEG',
 './train/basketball/n02802426_7726.JPEG',
 './train/basketball/n02802426_3881.JPEG',
 './train/ski_mask/n04229816_6821.JPEG']

In [130]:
results[0].distances

[93.42448425292969,
 93.95035552978516,
 96.57243347167969,
 102.10467529296875,
 102.97093963623047,
 104.1107177734375,
 105.00439453125,
 105.82638549804688,
 106.10682678222656,
 106.15518188476562]

In [146]:
# search with text
query_text = "airplane"  
text_inputs = processor(text=query_text, return_tensors="pt", padding=True, truncation=True, max_length=77)
query_text_features = model.get_text_features(**text_inputs)
text_embedding = query_text_features.squeeze(0).detach().numpy().tolist()

results = collection.search(
    data=[text_embedding], 
    anns_field="embedding", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
)



In [147]:
results[0].ids


['./train/comic_book/n06596364_19168.JPEG',
 './train/bottlecap/n02877765_1596.JPEG',
 './train/safety_pin/n04127249_5909.JPEG',
 './train/harmonica/n03494278_30921.JPEG',
 './train/warplane/n04552348_16150.JPEG',
 './train/warplane/n04552348_12780.JPEG',
 './train/dishwasher/n03207941_15169.JPEG',
 './train/warplane/n04552348_10736.JPEG',
 './train/warplane/n04552348_2027.JPEG',
 './train/magpie/n01582220_10712.JPEG']

In [148]:
for result_path in results[0].ids:
    result_image = Image.open(result_path).convert('RGB')
    result_image.show()



(eog:99022): EOG-CRITICAL **: 15:28:33.580: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:99022): GLib-GIO-CRITICAL **: 15:28:33.580: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:99022): EOG-CRITICAL **: 15:28:33.580: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:99022): GLib-GIO-CRITICAL **: 15:28:33.580: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:99022): EOG-CRITICAL **: 15:28:33.580: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:99022): GLib-GIO-CRITICAL **: 15:28:33.580: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:99022): EOG-CRITICAL **: 15:28:33.651: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:99022): GLib-GIO-CRITICAL **: 15:28:33.651: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:99022): EOG-CRITICAL **: 15:28:33.651: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:99022): GLib-GIO-CRITICAL **: 15:28:33.651: g_file_equal: assertion 'G_IS_F

In [136]:
results[0].distances

[154.8622589111328,
 154.98194885253906,
 156.00918579101562,
 157.588134765625,
 158.6085968017578,
 158.73123168945312,
 159.7007293701172,
 159.90402221679688,
 160.07493591308594,
 160.310546875]