In [None]:
import pandas as pd
df=pd.read_csv("ChartswithCaptions.csv")

In [None]:
df.shape

In [None]:
from transformers import CLIPProcessor, CLIPModel
import torch

def generate_image_embedding(image_path):
    # Load the CLIP model and processor
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    image = Image.open(image_path)
    inputs = clip_processor(images=image, return_tensors="pt")
    # Generate image embeddings
    with torch.no_grad():
        image_embedding = clip_model.get_image_features(inputs["pixel_values"])
    # Convert the embedding tensor to a list for Milvus
    image_embedding = image_embedding.squeeze().tolist()

    # Convert embedding tensor to a list
    return image_embedding

In [61]:
from PIL import Image
from pymilvus import connections, Collection
from sentence_transformers import SentenceTransformer

# Connect to Milvus
connections.connect("default", host="localhost", port="19530")
collection_name = "hybrid_collection"
collection = Collection(name=collection_name)


def run_combined_query(image_id, text_input):

    # Generate image embedding
    query_image_path = "./assets/ImageList/{}.png".format(image_id)
    query_image_embedding = generate_image_embedding(query_image_path)

    # Generate text embedding
    text_model = SentenceTransformer('all-MiniLM-L6-v2')
    query_text_embedding = text_model.encode(text_input)

    top_k = 100
    image_search_results = collection.search(data=[query_image_embedding],anns_field="image_embedding",param={"metric_type": "L2", "params": {"nprobe": 15}} ,limit=top_k,output_fields=["imageid", "full_caption"])
    text_search_results = collection.search(data=[query_text_embedding],anns_field="caption_embedding", param={"metric_type": "L2", "params": {"nprobe": 15}},limit=top_k,output_fields=["imageid", "full_caption"])

    # Combine the results
    combined_results = []
    image_based_result ={}
    text_based_result ={}
    for img_result, txt_result in zip(image_search_results[0], text_search_results[0]):
        image_based_result[img_result.entity.get('imageid')]= [img_result.distance,img_result.entity.get('full_caption')]
        text_based_result[txt_result.entity.get('imageid')]= [txt_result.distance,img_result.entity.get('full_caption')]
        
    # Find common keys between both dictionaries
    common_keys = set(image_based_result.keys()).intersection(set(text_based_result.keys()))

    # Keep only the common keys in both dictionaries
    image_based_result = {key: image_based_result[key] for key in common_keys}
    text_based_result = {key: text_based_result[key] for key in common_keys}
    
    for key in common_keys:
        normalized_distance = (image_based_result[key][0] + text_based_result[key][0])/2
        combined_results.append({
            "source_imageid":image_id,
            "imageid": key,
            "distance": normalized_distance,
            "full_caption": image_based_result[key][1]
        })

    # Sort by combined distance (ascending)
    combined_results = sorted(combined_results, key=lambda x: x["distance"])

    return combined_results

def run_caption_based_query(image_id, text_input):
    # Generate text embedding
    text_model = SentenceTransformer('all-MiniLM-L6-v2')
    query_text_embedding = text_model.encode(text_input)

    top_k = 50
    text_search_results = collection.search(data=[query_text_embedding],anns_field="caption_embedding", param={"metric_type": "L2", "params": {"nprobe": 15}},limit=top_k,output_fields=["imageid", "full_caption"])

    # Combine the results
    results = []
    for txt_result in text_search_results[0]:
        results.append({
            "source_imageid":image_id,
            "imageid": txt_result.entity.get('imageid'),
            "distance": txt_result.distance,
            "full_caption": txt_result.entity.get('full_caption')
        })
        
    return results

In [None]:
# Open a file in write mode
with open("distances.csv", "w") as f:
    f.write("source_imageid,destination_imageid,distance\n")  # Write header
    for i in range(970):
        print(i,df.iloc[i].imageid)
        items = run_combined_query(df.iloc[i].imageid, df.iloc[i].full_caption)
        for item in items:
            f.write(f"{item['source_imageid']},{item['imageid']},{round(item['distance'],2)}\n")  # Write index and length as CSV row

In [63]:
# Open a file in write mode
with open("caption_based_distances.csv", "w") as f:
    f.write("source_imageid,destination_imageid,distance\n")  # Write header
    for i in range(500):
        print(i,df.iloc[i].imageid)
        items = run_caption_based_query(df.iloc[i].imageid, df.iloc[i].full_caption)
        for item in items:
            f.write(f"{item['source_imageid']},{item['imageid']},{round(item['distance'],2)}\n")  # Write index and length as CSV row

0 7
1 10
2 12
3 13
4 14
5 20
6 26
7 30
8 45
9 70
10 82
11 84
12 90
13 98
14 102
15 104
16 107
17 108
18 112
19 122
20 125
21 126
22 127
23 139
24 140
25 142
26 147
27 148
28 149
29 151
30 152
31 157
32 161
33 165
34 173
35 174
36 175
37 181
38 182
39 183
40 184
41 186
42 187
43 188
44 189
45 191
46 194
47 196
48 197
49 198
50 200
51 203
52 207
53 208
54 213
55 214
56 217
57 220
58 227
59 232
60 233
61 234
62 236
63 241
64 242
65 248
66 249
67 252
68 253
69 254
70 255
71 256
72 258
73 261
74 262
75 263
76 264
77 265
78 267
79 269
80 270
81 271
82 272
83 273
84 274
85 276
86 279
87 280
88 281
89 287
90 290
91 292
92 297
93 298
94 299
95 300
96 302
97 304
98 309
99 313
100 314
101 317
102 318
103 320
104 322
105 324
106 326
107 327
108 328
109 329
110 330
111 331
112 332
113 334
114 337
115 339
116 342
117 344
118 345
119 347
120 349
121 352
122 363
123 364
124 366
125 367
126 368
127 369
128 380
129 382
130 385
131 386
132 390
133 391
134 392
135 396
136 400
137 403
138 406
139 407
140 4