In [None]:
!pip3 install torch torchvision datasets

In [None]:
#Import the necessary libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import numpy as np
import os
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import copy
from datasets import load_dataset
from PIL import Image

In [None]:
#Load a pretrained ResNet model and remove the final classification layer
resnet18 = models.resnet18(pretrained = True)

model = torch.nn.Sequential(*list(resnet18.children())[:-1]) # Remove the final fully connected layer

model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# CIFAR-10

In [None]:
# Define transformations for the CIFAR-10 dataset and resize for ResNet
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize CIFAR-10 images to 224x224 for ResNet input
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])  #Normalize as ResNet expects
])

In [None]:
#Load the CIFAR-10 dataset with transformations
trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
testset = torchvision.datasets.CIFAR10(root='./data', train = False, download = True, transform = transform)

trainloader = DataLoader(trainset, batch_size = 64, shuffle = False, num_workers = 4, pin_memory = True)
testloader = DataLoader(testset, batch_size = 64, shuffle = False, num_workers = 4, pin_memory = True)

In [None]:
# Function to extract embeddings for a given dataset
def extract_embeddings(dataloader, model, device):
    embeddings = []
    labels = []

    with torch.no_grad():
        for images, lbls in dataloader:
            images = images.to(device)
            outputs = model(images).squeeze()  # Remove any extra dimensions, outputs will be (batch_size, 512)
            embeddings.append(outputs.cpu())  # Store the embeddings as tensors on CPU
            labels.append(lbls)

    embeddings = torch.cat(embeddings, dim = 0)  # Concatenate all batch embeddings into one tensor
    labels = torch.cat(labels, dim = 0)  # Concatenate all batch labels
    return embeddings, labels

# Extract embeddings for train and test set
train_embeddings, train_labels = extract_embeddings(trainloader, model, device)
test_embeddings, test_labels = extract_embeddings(testloader, model, device)

# Save the embeddings for future use (optional)
torch.save(train_embeddings, 'train_embeddings.pth')
torch.save(test_embeddings, 'test_embeddings.pth')

# CIFAR-100

In [None]:
# Define transformations for the CIFAR-100 dataset and resize for ResNet
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize CIFAR-100 images to 224x224 for ResNet input
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])  #Normalize as ResNet expects
])

In [None]:
#Load the CIFAR-100 dataset with transformations
trainset = torchvision.datasets.CIFAR100(root = './data', train = True, download = True, transform = transform)
testset = torchvision.datasets.CIFAR100(root='./data', train = False, download = True, transform = transform)

trainloader = DataLoader(trainset, batch_size = 64, shuffle = False, num_workers = 4, pin_memory = True)
testloader = DataLoader(testset, batch_size = 64, shuffle = False, num_workers = 4, pin_memory = True)

In [None]:
# Function to extract embeddings for a given dataset
def extract_embeddings(dataloader, model, device):
    embeddings = []
    labels = []

    with torch.no_grad():
        for images, lbls in dataloader:
            images = images.to(device)
            outputs = model(images).squeeze()  # Remove any extra dimensions, outputs will be (batch_size, 512)
            embeddings.append(outputs.cpu())  # Store the embeddings as tensors on CPU
            labels.append(lbls)

    embeddings = torch.cat(embeddings, dim = 0)  # Concatenate all batch embeddings into one tensor
    labels = torch.cat(labels, dim = 0)  # Concatenate all batch labels
    return embeddings, labels

# Extract embeddings for train and test set
train_embeddings, train_labels = extract_embeddings(trainloader, model, device)
test_embeddings, test_labels = extract_embeddings(testloader, model, device)

# Save the embeddings for future use (optional)
torch.save(train_embeddings, 'train_embeddings.pth')
torch.save(test_embeddings, 'test_embeddings.pth')

# ImageNet-1K

In [None]:
# Define transformations to apply to images (resize to 256x256 and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

In [None]:
# Load the ImageNet 1K resized dataset
dataset = load_dataset("evanarlian/imagenet_1k_resized_256")

In [None]:
# Custom Dataset class to apply transforms on the dataset
class CustomImageDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        # The 'image' is already a PIL image, no need to use Image.open()
        image = self.dataset[idx]['image']

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        label = self.dataset[idx]['label']  # Get the label

        return image, label

In [None]:
# Create PyTorch datasets for train and validation sets
train_dataset = CustomImageDataset(dataset['train'], transform = transform)
test_dataset = CustomImageDataset(dataset['val'], transform = transform)

# Create DataLoaders for batch processing
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = False, num_workers = 4)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = False, num_workers = 4)



In [None]:
# Function to get embeddings
def get_embeddings_and_labels(dataloader, model, device):
    embeddings = []
    labels = []

    with torch.no_grad():
        for images, label_batch in dataloader:  # Retrieve both images and labels
            images = images.to(device)
            output = model(images)  # Get embeddings
            output = output.view(output.size(0), -1)  # Flatten to (batch_size, 512)

            embeddings.append(output.cpu())  # Move embeddings to CPU and store
            labels.append(label_batch.cpu())  # Move labels to CPU and store

    # Concatenate all embeddings and labels into single tensors
    embeddings = torch.cat(embeddings, dim=0)
    labels = torch.cat(labels, dim=0)

    return embeddings, labels

In [None]:
# Get embeddings and labels for the training set
train_embeddings, train_labels = get_embeddings_and_labels(train_loader, model, device)

# Get embeddings and labels for the validation (test) set
test_embeddings, test_labels = get_embeddings_and_labels(test_loader, model, device)

# Save training embeddings and labels
torch.save({'embeddings': train_embeddings, 'labels': train_labels}, "imagenet_resnet18_train_embeddings_and_labels.pt")

# Save validation (test) embeddings and labels
torch.save({'embeddings': test_embeddings, 'labels': test_labels}, "imagenet_resnet18_test_embeddings_and_labels.pt")

# K-Means Clustering

In [None]:
# Convert the list of tensors to a 2D NumPy array
train_embed = np.array(train_embeddings)

# Specify the number of clusters
n_clusters = 10

# Create a KMeans instance and fit the model
kmeans = KMeans(n_clusters = n_clusters, random_state = 42)
kmeans.fit(train_embed)

# Get the labels (cluster assignments) for each tensor
labels = kmeans.labels_

# Optionally, you can save the labels if needed
torch.save(labels, 'cluster_labels.pth')

In [None]:
test_embed = np.array(test_embeddings)

predicted_labels = kmeans.predict(test_embed) #Predict the clusters for test embeddings

torch.save(predicted_labels, 'test_cluster_labels.pth')

In [None]:
#Function to compute Precision@k
def precision_at_k(query_label, output_docs, k):
    # Get the top-k documents
    top_k_docs = output_docs[:k]

    # Count relevant documents (matching query label)
    relevant_count = (top_k_docs == query_label).sum().item()

    # Calculate precision
    precision = relevant_count / k
    return precision

def mean_average_precision(retrieved_labels, true_label):
    relevant_count = 0
    precision_sum = 0.0
    for i, label in enumerate(retrieved_labels):
        if label == true_label:
            relevant_count += 1
            precision_sum += relevant_count / (i + 1)
    return precision_sum / relevant_count if relevant_count > 0 else 0.0

In [None]:
avg_precisions, p_10, p_50 = [], [], []

for i, test_tensor in enumerate(test_embed):
    cluster_label = predicted_labels[i]  # Get the cluster label for the current test tensor

    # Get train tensors that belong to the same cluster
    cluster_train_indices = np.where(labels == cluster_label)[0]
    cluster_train_data = train_embed[cluster_train_indices]

    # Compute cosine similarity between the test tensor and the train tensors in the same cluster
    similarities = cosine_similarity([test_tensor], cluster_train_data)[0]

    # Rank train tensors by similarity
    ranked_indices = np.argsort(similarities)[::-1]  # Sort indices in descending order
    ranked_train_indices = cluster_train_indices[ranked_indices]

    true_train_labels = train_labels[ranked_train_indices]
    true_test_label = test_labels[i]

    precision_at_10 = precision_at_k(true_test_label, true_train_labels, 10)
    precision_at_50 = precision_at_k(true_test_label, true_train_labels, 50)

    p_10.append(precision_at_10)
    p_50.append(precision_at_50)

    # relevant_indices = np.where(true_train_labels == true_test_label)[0]
    # precision_scores = [precision_at_k(true_test_label, true_train_labels, j+1) for j in relevant_indices]
    map = mean_average_precision(true_test_label, true_train_labels)

    if len(map) == 0:
      average_precision = 0

    # else:
    #   average_precision = sum(precision_scores) / len(precision_scores)

    avg_precisions.append(map)

In [None]:
mean_p10 = sum(p_10) / len(p_10)
mean_p50 = sum(p_50) / len(p_50)
mean_ap = sum(avg_precisions) / len(avg_precisions)

In [None]:
print('Mean Precision@10:', mean_p10)
print('Mean Precision@50:', mean_p50)
print('Mean Average Precision', mean_ap)

0.7511599999999902
0.7077839999999819
0.6093555005947813


# K-Means Clustering + PCA

In [None]:
from sklearn.decomposition import PCA

In [None]:
# Create a PCA object
pca = PCA(n_components = 0.95)  # Retain 95% of variance

train_pca = pca.fit_transform(train_embeddings)
test_pca = pca.transform(test_embeddings)

In [None]:
train_pca = torch.from_numpy(train_pca)
test_pca = torch.from_numpy(test_pca)

In [None]:
# Convert the list of tensors to a 2D NumPy array
train_embed = np.array(train_pca)

# Specify the number of clusters
n_clusters = 10

# Create a KMeans instance and fit the model
kmeans = KMeans(n_clusters = n_clusters, random_state = 42)
kmeans.fit(train_embed)

# Get the labels (cluster assignments) for each tensor
labels = kmeans.labels_

# Optionally, you can save the labels if needed
torch.save(labels, 'cluster_labels.pth')

In [None]:
test_embed = np.array(test_embeddings)

predicted_labels = kmeans.predict(test_embed) #Predict the clusters for test embeddings

torch.save(predicted_labels, 'test_cluster_labels.pth')

In [None]:
#Function to compute Precision@k
def precision_at_k(query_label, output_docs, k):
    # Get the top-k documents
    top_k_docs = output_docs[:k]

    # Count relevant documents (matching query label)
    relevant_count = (top_k_docs == query_label).sum().item()

    # Calculate precision
    precision = relevant_count / k
    return precision

def mean_average_precision(retrieved_labels, true_label):
    relevant_count = 0
    precision_sum = 0.0
    for i, label in enumerate(retrieved_labels):
        if label == true_label:
            relevant_count += 1
            precision_sum += relevant_count / (i + 1)
    return precision_sum / relevant_count if relevant_count > 0 else 0.0

In [None]:
avg_precisions, p_10, p_50 = [], [], []

for i, test_tensor in enumerate(test_embed):
    cluster_label = predicted_labels[i]  # Get the cluster label for the current test tensor

    # Get train tensors that belong to the same cluster
    cluster_train_indices = np.where(labels == cluster_label)[0]
    cluster_train_data = train_embed[cluster_train_indices]

    # Compute cosine similarity between the test tensor and the train tensors in the same cluster
    similarities = cosine_similarity([test_tensor], cluster_train_data)[0]

    # Rank train tensors by similarity
    ranked_indices = np.argsort(similarities)[::-1]  # Sort indices in descending order
    ranked_train_indices = cluster_train_indices[ranked_indices]

    true_train_labels = train_labels[ranked_train_indices]
    true_test_label = test_labels[i]

    precision_at_10 = precision_at_k(true_test_label, true_train_labels, 10)
    precision_at_50 = precision_at_k(true_test_label, true_train_labels, 50)

    p_10.append(precision_at_10)
    p_50.append(precision_at_50)

    # relevant_indices = np.where(true_train_labels == true_test_label)[0]
    # precision_scores = [precision_at_k(true_test_label, true_train_labels, j+1) for j in relevant_indices]

    map = mean_average_precision(true_test_label, true_train_labels)

    if len(map) == 0:
      average_precision = 0

    # else:
    #   average_precision = sum(precision_scores) / len(precision_scores)

    avg_precisions.append(map)

In [None]:
mean_p10 = sum(p_10) / len(p_10)
mean_p50 = sum(p_50) / len(p_50)
mean_ap = sum(avg_precisions) / len(avg_precisions)

In [None]:
print('Mean Precision@10:', mean_p10)
print('Mean Precision@50:', mean_p50)
print('Mean Average Precision', mean_ap)

# Random Hyperplanes

In [None]:
train_embed = np.array(train_embeddings)
test_embed = np.array(test_embeddings)

In [None]:
torch.manual_seed(42)

hyperplanes = 10 #This value is changed with [3, 4, 7, 8, 10, 11]
hashtables = 12 #This value is changed with [3, 6, 12]

plane_norms = torch.rand(hashtables, hyperplanes, 512) - 0.5

product = torch.matmul(train_embeddings, plane_norms.transpose(-1, -2))
product = product > 0
product = product.long()

In [None]:
bucket_list = []

binary_dict = {}

for a in range(2 ** hyperplanes):
    binary_number = format(a, '0' + str(hyperplanes) + 'b')
    binary_dict[binary_number] = []

for i in range(hashtables):
  buckets = copy.deepcopy(binary_dict)

  for j in range(len(product[i])):
    hash_str = ''.join(map(str, product[i][j].tolist()))

    buckets[hash_str].append(j)

  bucket_list.append(buckets)

In [None]:
test_product = torch.matmul(test_embeddings, plane_norms.transpose(-1, -2))
test_product = test_product > 0
test_product = test_product.long()

In [None]:
# Compute cosine similarity and rank train tensors for each test tensor
avg_precisions, p_10, p_50 = [], [], []

for i, test_tensor in enumerate(test_embed):
  cluster_train_indices = []

  for j in range(hashtables):
    bucket_code = ''.join(map(str, test_product[j][i].tolist()))

    cluster_train_indices.append(bucket_list[j][bucket_code])

  cluster_train_indices = [a for b in cluster_train_indices for a in b]
  cluster_train_indices = np.array(list(set(cluster_train_indices)))

  cluster_train_data = train_embed[cluster_train_indices]

  # Compute cosine similarity between the test tensor and the train tensors in the same cluster
  similarities = cosine_similarity([test_tensor], cluster_train_data)[0]

  # Rank train tensors by similarity
  ranked_indices = np.argsort(similarities)[::-1]  # Sort indices in descending order
  ranked_train_indices = cluster_train_indices[ranked_indices]

  true_train_labels = train_labels[ranked_train_indices]
  true_test_label = test_labels[i]

  precision_at_10 = precision_at_k(true_test_label, true_train_labels, 10)
  precision_at_50 = precision_at_k(true_test_label, true_train_labels, 50)

  p_10.append(precision_at_10)
  p_50.append(precision_at_50)

  # relevant_indices = np.where(true_train_labels == true_test_label)[0]
  # precision_scores = [precision_at_k(true_test_label, true_train_labels, j+1) for j in relevant_indices]

  map = mean_average_precision(true_test_label, true_train_labels)

  if len(map) == 0:
    average_precision = 0

    # else:
    #   average_precision = sum(precision_scores) / len(precision_scores)

  avg_precisions.append(map)

In [None]:
mean_p10 = sum(p_10) / len(p_10)
mean_p50 = sum(p_50) / len(p_50)
mean_ap = sum(avg_precisions) / len(avg_precisions)

In [None]:
print('Mean Precision@10: ', mean_p10)
print('Mean Precision@50: ', mean_p50)
print('Mean Average Precision: ', mean_ap)

0.7624499999999887
0.7081439999999855
0.4274341529613439


# Neural LSH

In [None]:
import pickle

with open('cifar10_train_feature.pkl', 'wb') as f:  
    pickle.dump(train_embeddings, f)

with open('cifar10_test_feature.pkl', 'wb') as f:  
    pickle.dump(test_embeddings, f)
    
with open('cifar10_train_feature.pkl',mode='rb') as f:
    train_embeddings = pickle.load(f)
    
with open('cifar10_test_feature.pkl',mode='rb') as f:
    test_embeddings = pickle.load(f)

train_labels1_set = train_labels.reshape(50000,1)
train_set = torch.cat((train_embeddings, train_labels1_set), dim=1)

test_labels1_set = test_labels.reshape(10000,1)
test_set = torch.cat((test_embeddings, test_labels1_set), dim=1)

In [None]:
from tqdm import tqdm
import torch 

# Assuming 'train' is a numpy array where the last column contains the class labels
labels = train_set[:, -1]

# Create a binary mask by comparing each pair of labels
masking = (labels[:, None] != labels).to(torch.int)
masking = torch.triu(mask)


In [None]:
import torch.optim as optim
import numpy as np

# Define the compression network C_psi (same as before)
class CompNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(CompNetwork, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        # The output needs to be in [-1, 1], so we use tanh activation
        return torch.tanh(self.linear(x))


In [None]:
# Objective function terms (as before)
def bit_balance_loss(p):
    # print(torch.sum(b, dim=1).shape)
    # print(torch.abs(torch.sum(b, dim=1)).shape)
    # print(torch.mean(torch.abs(torch.sum(b, dim=1))))
    
    return torch.mean(torch.abs(torch.sum(p, dim=1)))

def sitting_on_the_fence_loss(p):
    # print(torch.abs(torch.abs(b) - 1).shape)
    # print(torch.mean(torch.abs((torch.abs(b) - 1))))
    return torch.mean(torch.abs((torch.abs(p) - 1)))

def weak_supervision_loss(p, masking):
    # Weak supervision loss with negative sampling
    # Encourage dissimilar hash codes for negative pairs (image pairs)
    loss = 0
    cr = torch.mm(p, p.T)
    result_matrix = cr * masking
    abs_result_matrix = torch.abs(result_matrix)
    loss = torch.sum(abs_result_matrix)/1125000000
    # print(loss)
    return loss

In [None]:
# Hashing process based on the additional details (unchanged)
def create_hash_buckets(p_u, L, J):
    N, H = p_u.shape
    buckets = []
    
    for _ in range(L):
        i = np.random.choice(H, J, replace=False)
        bucket = p_u[:, i]
        buckets.append(bucket)
    
    return buckets

In [None]:
# Hyperparameters (as before)
alpha = 0.3
beta = 0.3
gamma = 0.4
L = 10
J = 16
input_dimension = 2048
output_dimension = 500

# Instantiate the network
model = CompNetwork(input_dimension, output_dimension)

# Dummy data for images: Random features for N images, D-dimensional
# N = 100  # Number of images
D = input_dimension
X = train_set[:, :-1]  # Random embeddings for images

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
for epoch in range(20):
    optimizer.zero_grad()

    # Forward pass through the network
    p_u = model(X)

    # Compute the surrogate binary hash code
    # binary_hash = torch.sign(b_u)

    # Sample negative pairs (for weak supervision)
    # negative_pairs = sample_negatives(X, batch_size=10, num_negatives=5)

    # Compute the losses
    balance = bit_balance_loss(p_u)
    fence = sitting_on_the_fence_loss(p_u)
    weak_supervision = weak_supervision_loss(p_u, masking)
    
    # Total loss as per the objective function
    loss = (alpha * balance) + (beta * fence) + (gamma * supervision)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # print(f'Epoch {epoch}, Loss: {loss.item()}')

In [None]:
# Generate the hash buckets (unchanged)
model.eval()
p_u = model(X)
binary_hash = torch.sign(p_u)
hash_buckets = create_hash_buckets(binary_hash.detach().numpy(), L, J)

In [None]:
import copy
k = copy.deepcopy(hash_buckets)
k[k == -1] = 0 #replacing -1 in the hash bit with 0

In [None]:
#creating dictionary with key as hascode and value as image indices
# Dictionary to hold the result
binary_dict = {}

# Loop through the 10 numpy arrays
for i, array_50000 in enumerate(k):
    
    # Loop through each of the 50000 arrays in the current numpy array
    for j, binary_array in enumerate(array_50000):
        
        # Convert the binary numpy array to a tuple (so it can be used as a dictionary key)
        binary_tuple = tuple(binary_array)
        
        # If the binary vector is not in the dictionary, initialize with an empty list
        if binary_tuple not in binary_dict:
            binary_dict[binary_tuple] = []
        
        # Append the current index (j) to the value list
        binary_dict[binary_tuple].append(j)

# Now `binary_dict` contains keys as 16-dimensional binary arrays (tuples)
# and values as the indices of the corresponding samples
print(binary_dict)  # Number of unique 16-dimensional binary arrays

In [None]:
#loading the trained model
model_path = "CIFAR10_500.pth"
models.load_state_dict(torch.load(model_path))
models.eval()

In [None]:
# Cosine similarity between test image and retrieved images
def compute_cosine_similarity(test_embedding, retrieved_embeddings):
    similarities = cosine_similarity(test_embedding.reshape(1, -1), retrieved_embeddings)
    return similarities.squeeze()

# Precision@K
def precision_at_k(retrieved_labels, true_label, k):
    relevant = sum([1 for label in retrieved_labels[:k] if label == true_label])
    return relevant / k

# Mean Average Precision (MAP)
def mean_average_precision(retrieved_labels, true_label):
    relevant_count = 0
    precision_sum = 0.0
    for i, label in enumerate(retrieved_labels):
        if label == true_label:
            relevant_count += 1
            precision_sum += relevant_count / (i + 1)
    return precision_sum / relevant_count if relevant_count > 0 else 0.0

In [None]:
# Find images in the same bucket for test image and evaluate
def evaluate_lsh(test_embeddings, test_labels, train_embeddings, train_labels,num_tables, binary_dict):
    precisions_10 = []
    precisions_50 = []
    mean_avg_precisions = []
    
#     Xt = test[:, :-1]
    Xt = test_embeddings
    b_ut = models(Xt)
    binary_hash_test = torch.sign(b_ut)

    for i, test_hash in enumerate(binary_hash_test):
        test_label = test_labels[i]

        # Retrieve images from the corresponding bucket in each hash table
        retrieved_indices = set()
        for l in range(num_tables):
            test_hash_code = test_hash
#             print('test_hash_code: ', test_hash_code)
#             bucket = binary_dict[l].get(test_hash_code, [])
            bucket = binary_dict[test_hash_code]
            retrieved_indices.update(bucket)

        retrieved_indices = list(retrieved_indices)
        if len(retrieved_indices) == 0:
            continue

        retrieved_embeddings = train_embeddings[retrieved_indices]
        retrieved_labels = train_labels[retrieved_indices]
        # retrieved_embeddings = binary_dict[retrieved_indices]
        # retrieved_labels = binary_dict[retrieved_indices]

        # Compute cosine similarity
        similarities = compute_cosine_similarity(test_embeddings, retrieved_embeddings)

        # Rank the retrieved images by similarity
        ranked_indices = np.argsort(-similarities)  # Sort in descending order
        ranked_labels = [retrieved_labels[idx] for idx in ranked_indices]

        # Compute Precision@10, Precision@50, and MAP
        precision_10 = precision_at_k(ranked_labels, test_label, 10)
        precision_50 = precision_at_k(ranked_labels, test_label, 50)
        map_score = mean_average_precision(ranked_labels, test_label)

        precisions_10.append(precision_10)
        precisions_50.append(precision_50)
        mean_avg_precisions.append(map_score)

        # Print results for this test image (optional)
#         print(f"Test Image {i+1}: Precision@10: {precision_10:.4f}, Precision@50: {precision_50:.4f}, MAP: {map_score:.4f}")
    
    # Average precision across all test images
    avg_precision_10 = np.mean(precisions_10)
    avg_precision_50 = np.mean(precisions_50)
    avg_map = np.mean(mean_avg_precisions)

    print(f"\nOverall Results - Precision@10: {avg_precision_10:.4f}, Precision@50: {avg_precision_50:.4f}, MAP: {avg_map:.4f}")
    
    return avg_precision_10, avg_precision_50, avg_map

In [None]:
# Evaluate LSH on the test set
avg_precision_10, avg_precision_50, avg_map = evaluate_lsh(
    test_embeddings=test_set[:,:-1],
    test_labels=test_labels1_set,
    train_embeddings = train_set[:, -1],
    train_labels = train_labels1_set,
    num_tables=L,
    binary_dict=binary_dict
)