In [29]:
from PIL import Image
import pandas as pd
import torch
from transformers import CLIPProcessor, CLIPModel

# Load the dataset
dataset_path = 'reverse_image_search.csv'  # Replace with your dataset path
df = pd.read_csv(dataset_path)

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")




In [30]:
# Initialize a list to store tensors
embeddings = []

for index, row in df.iterrows():
    image_path = row['path']  # Assuming the path is in a column named 'path'
    image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB
    inputs = processor(images=image, return_tensors="pt")
    image_features = model.get_image_features(**inputs)
    # Ensure the tensor is detached from the computational graph before converting
    embeddings.append(image_features.squeeze(0).detach().numpy().tolist())

# Concatenate all feature vectors into a single tensor
#image_features_tensor = torch.stack(embeddings)

# image_features_tensor now contains the feature vectors for all images in your dataset>

In [34]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

# Milvus parameters
HOST = '127.0.0.1'
PORT = '19530'
TOPK = 13

In [35]:
connections.connect(host=HOST, port=PORT)
collection_name = 'tranformers_clip_patch16'
dim = 512  # Dimension of the embeddings
METRIC_TYPE = 'L2'  # You can choose 'L2', 'IP', etc., based on your requirement
INDEX_TYPE = 'IVF_FLAT'  # Index type

In [36]:
utility.list_collections()

['text_image_search']

In [13]:
utility.drop_collection("tranformers_clip")

In [37]:
def create_milvus_collection(collection_name, dim):
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, 
                    is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params': {"nlist": 512}
    }
    collection.create_index(field_name='embedding', index_params=index_params)
    return collection

In [38]:
collection = create_milvus_collection(collection_name, dim)

In [39]:
paths = df['path'].tolist()


In [40]:
entities = [[path for path in paths],
            [embedding for embedding in embeddings]]
#preparing for insertion to milvus

In [41]:
mr = collection.insert(entities)


In [42]:
collection = Collection(collection_name)      # Get an existing collection.
collection.load()

In [43]:
search_params = {
    "metric_type": "L2", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 10}
}

In [44]:

# search with image

query_image_path = 'aleren.jpeg'  
query_image = Image.open(query_image_path).convert('RGB')  
query_inputs = processor(images=query_image, return_tensors="pt")
query_image_features = model.get_image_features(**query_inputs)
embedding = query_image_features.squeeze(0).detach().numpy().tolist()


# Concatenate all feature vectors into a single tensor
#image_features_tensor = torch.stack(embeddings)

# image_features_tensor now contains the feature vectors for all images in your dataset

results = collection.search(
    data=[embedding], 
    anns_field="embedding", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
)



In [45]:
results[0].ids


['./train/basketball/n02802426_24958.JPEG',
 './train/basketball/n02802426_7656.JPEG',
 './train/basketball/n02802426_3881.JPEG',
 './train/basketball/n02802426_12782.JPEG',
 './train/horizontal_bar/n03535780_16077.JPEG',
 './train/basketball/n02802426_7726.JPEG',
 './train/basketball/n02802426_26718.JPEG',
 './train/basketball/n02802426_8222.JPEG',
 './train/basketball/n02802426_10137.JPEG',
 './train/basketball/n02802426_12191.JPEG']

In [46]:
results[0].distances

[85.77123260498047,
 95.03716278076172,
 95.73870849609375,
 97.46134185791016,
 99.283447265625,
 101.21218872070312,
 101.49933624267578,
 101.66267395019531,
 102.91883087158203,
 103.99423217773438]

In [27]:
# search with text
query_text = "airplane"  
text_inputs = processor(text=query_text, return_tensors="pt", padding=True, truncation=True, max_length=77)
query_text_features = model.get_text_features(**text_inputs)
text_embedding = query_text_features.squeeze(0).detach().numpy().tolist()

results = collection.search(
    data=[text_embedding], 
    anns_field="embedding", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
)



NameError: name 'collection' is not defined

In [47]:
# search with text 
#patch 16
query_text = "cat"  
text_inputs = processor(text=query_text, return_tensors="pt")
query_text_features = model.get_text_features(**text_inputs)
text_embedding = query_text_features.squeeze(0).detach().numpy().tolist()

print(text_embedding)

[0.2051691710948944, -0.03279348835349083, -0.06166143715381622, -0.03970229625701904, -0.2433445155620575, 0.21964505314826965, -0.34200319647789, 0.12462783604860306, -0.3632577955722809, 0.28449299931526184, 0.005944356322288513, -0.48552653193473816, 0.2448098212480545, -0.07519945502281189, 0.34867072105407715, 0.4418478310108185, 0.17099052667617798, -0.21698689460754395, -0.07976849377155304, 0.07817978411912918, 0.410218209028244, 0.3069245517253876, 0.2493753582239151, 0.10165336728096008, -0.19615575671195984, 0.3654637336730957, 0.352477103471756, 0.6314743757247925, -0.0062239691615104675, 0.0031716041266918182, 0.11212191730737686, -0.012524619698524475, 0.17180348932743073, 0.17517916858196259, -0.09132827818393707, 0.163207545876503, 0.17541882395744324, 0.3215360641479492, 0.3618620038032532, 0.19236218929290771, -0.0018282458186149597, -0.018739476799964905, 0.2335728406906128, 0.27410566806793213, 0.1741076558828354, 0.2329310178756714, -0.06656691431999207, -0.233122

In [48]:
results = collection.search(
    data=[text_embedding], 
    anns_field="embedding", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
)



In [26]:
# search with text 
#patch 32
query_text = "cat"  
text_inputs = processor(text=query_text, return_tensors="pt", padding=True, truncation=True, max_length=77)
query_text_features = model.get_text_features(**text_inputs)
text_embedding = query_text_features.squeeze(0).detach().numpy().tolist()

print(text_embedding)

[0.2051691710948944, -0.03279348835349083, -0.06166143715381622, -0.03970229625701904, -0.2433445155620575, 0.21964505314826965, -0.34200319647789, 0.12462783604860306, -0.3632577955722809, 0.28449299931526184, 0.005944356322288513, -0.48552653193473816, 0.2448098212480545, -0.07519945502281189, 0.34867072105407715, 0.4418478310108185, 0.17099052667617798, -0.21698689460754395, -0.07976849377155304, 0.07817978411912918, 0.410218209028244, 0.3069245517253876, 0.2493753582239151, 0.10165336728096008, -0.19615575671195984, 0.3654637336730957, 0.352477103471756, 0.6314743757247925, -0.0062239691615104675, 0.0031716041266918182, 0.11212191730737686, -0.012524619698524475, 0.17180348932743073, 0.17517916858196259, -0.09132827818393707, 0.163207545876503, 0.17541882395744324, 0.3215360641479492, 0.3618620038032532, 0.19236218929290771, -0.0018282458186149597, -0.018739476799964905, 0.2335728406906128, 0.27410566806793213, 0.1741076558828354, 0.2329310178756714, -0.06656691431999207, -0.233122

In [49]:
results[0].ids


['./train/electric_locomotive/n03272562_265.JPEG',
 './train/steam_locomotive/n04310018_11226.JPEG',
 './train/lynx/n02127052_2768.JPEG',
 './train/harmonica/n03494278_30921.JPEG',
 './train/electric_locomotive/n03272562_8383.JPEG',
 './train/hermit_crab/n01986214_23891.JPEG',
 './train/bottlecap/n02877765_3444.JPEG',
 './train/electric_locomotive/n03272562_1737.JPEG',
 './train/lynx/n02127052_206.JPEG',
 './train/castle/n02980441_5253.JPEG']

In [50]:
for result_path in results[0].ids:
    result_image = Image.open(result_path).convert('RGB')
    result_image.show()



(eog:69611): EOG-CRITICAL **: 13:29:09.117: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:69611): GLib-GIO-CRITICAL **: 13:29:09.117: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:69611): EOG-CRITICAL **: 13:29:09.117: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:69611): GLib-GIO-CRITICAL **: 13:29:09.117: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:69611): EOG-CRITICAL **: 13:29:09.117: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:69611): GLib-GIO-CRITICAL **: 13:29:09.117: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:69611): EOG-CRITICAL **: 13:29:09.117: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:69611): GLib-GIO-CRITICAL **: 13:29:09.117: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:69611): EOG-CRITICAL **: 13:29:09.117: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:69611): GLib-GIO-CRITICAL **: 13:29:09.117: g_file_equal: assertion 'G_IS_F

In [136]:
results[0].distances

[154.8622589111328,
 154.98194885253906,
 156.00918579101562,
 157.588134765625,
 158.6085968017578,
 158.73123168945312,
 159.7007293701172,
 159.90402221679688,
 160.07493591308594,
 160.310546875]