In [None]:
from nearpy import Engine
from nearpy.hashes import RandomBinaryProjections  # other hash functions can be chosen
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score

# Initialize LSH Engine

In [None]:
num_dimensions = 2048 
num_tables = 10 
hashes = [RandomBinaryProjections('rbp', 10) for _ in range(num_tables)]  # Adjust number of hash functions
engine = Engine(num_dimensions, lshashes=hashes)

embeddings_train, labels_train = np.load('pathmnist/train.npz')['embeddings'], np.load('pathmnist/train.npz')['labels']
embeddings_val, labels_val = np.load('pathmnist/val.npz')['embeddings'], np.load('pathmnist/val.npz')['labels']
embeddings_test, labels_test = np.load('pathmnist/test.npz')['embeddings'], np.load('pathmnist/test.npz')['labels']

# Add embeddings to LSH engine

In [None]:
for i, embedding in enumerate(embeddings_train):
    engine.store_vector(embedding, labels_train[i])

# Query LSH engine for nearest neighbors

In [None]:
query_embeddings = embeddings_val
true_labels = labels_val
predicted_labels = []

# Evaluate accuracy and mAP
correct_count = 0
precisions = []
recalls = []
average_precisions = []

for i, query_embedding in enumerate(query_embeddings):
    neighbors = engine.neighbours(query_embedding)

    # Retrieve labels of neighbors
    neighbor_labels = [int(neighbor[1].split('_')[1]) for neighbor in neighbors]
    
    # Predict label based on majority voting among neighbors
    predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)
    predicted_labels.append(predicted_label)
    
    # Calculate accuracy
    if predicted_label == true_labels[i]:
        correct_count += 1
    
    # Calculate precision and recall for this query
    true_positive = len(set(neighbor_labels) & set([true_labels[i]]))
    precision = true_positive / len(neighbor_labels)
    recall = true_positive / 1  # Because we are retrieving only 1 neighbor
    precisions.append(precision)
    recalls.append(recall)
    
    # Calculate average precision for this query
    average_precisions.append(average_precision_score([true_labels[i]], [predicted_label]))

# Calculate accuracy
accuracy = correct_count / len(query_embeddings)

# Calculate mean precision, recall, and mAP
mean_precision = np.mean(precisions)
mean_recall = np.mean(recalls)
mAP = np.mean(average_precisions)

print("Accuracy:", accuracy)
print("Mean Precision:", mean_precision)
print("Mean Recall:", mean_recall)
print("mAP:", mAP)