# Process the Embedings

In [3]:
import numpy as np
import hnswlib
import os

# Paths
embeddings_file = os.path.join("..", "data", "processed", "embeddings.npy")
image_ids_file = os.path.join("..", "data", "processed", "image_ids.npy")
hnsw_index_path = os.path.join("..", "data", "processed", "hnsw_index.bin")

# Load Embeddings and Image IDs
embeddings = np.load(embeddings_file)
image_ids = np.load(image_ids_file)

# Define the Dimension of the Embeddings
d = 512  # Embedding dimension for CLIP ViT-B/32

# Create HNSWlib Index
num_elements = embeddings.shape[0]
hnsw_index = hnswlib.Index(space='l2', dim=d)  # 'l2' is for Euclidean distance

# Initialize the index
hnsw_index.init_index(max_elements=num_elements, ef_construction=200, M=16)

# Add items to the index
hnsw_index.add_items(embeddings)

# Save the index
hnsw_index.save_index(hnsw_index_path)

# Save the Mapping of Image IDs to a File
np.save(image_ids_file, image_ids)

