<a href="https://colab.research.google.com/github/raisulislam0/Practice/blob/master/LSH_Implementation_2023_2_96_020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#Define the LSH Class

import numpy as np
from collections import defaultdict

class LSH:
    def __init__(self, num_hash_tables, hash_size, input_dim, bucket_size=4, num_candidates=10):
        self.num_hash_tables = num_hash_tables
        self.hash_size = hash_size
        self.input_dim = input_dim
        self.bucket_size = bucket_size
        self.num_candidates = num_candidates
        self.hash_tables = [defaultdict(list) for _ in range(num_hash_tables)]
        self.hash_functions = [self._create_hash_function() for _ in range(num_hash_tables)]

    def _create_hash_function(self):
        random_vectors = np.random.randn(self.hash_size, self.input_dim)
        random_offsets = np.random.uniform(0, self.bucket_size, self.hash_size)
        return random_vectors, random_offsets

    def _hash(self, hash_function, vector):
        random_vectors, random_offsets = hash_function
        projections = np.dot(random_vectors, vector)
        return tuple(((projections + random_offsets) // self.bucket_size).astype(int))

    def insert(self, vector, value):
        for table, hash_function in zip(self.hash_tables, self.hash_functions):
            hash_value = self._hash(hash_function, vector)
            table[hash_value].append(value)

    def query(self, vector, num_neighbors=1):
        candidates = set()
        for table, hash_function in zip(self.hash_tables, self.hash_functions):
            hash_value = self._hash(hash_function, vector)
            candidates.update(table[hash_value])

        if not candidates:
            return []

        candidates = list(candidates)
        distances = np.linalg.norm(np.array([vector - data_points[c] for c in candidates]), axis=1)
        nearest_neighbors = sorted(zip(distances, candidates))[:num_neighbors]
        return [neighbor for _, neighbor in nearest_neighbors]


In [8]:
# Parameters
num_hash_tables = 5
hash_size = 10
input_dim = 128
bucket_size = 4
num_candidates = 10

# Initialize LSH
lsh = LSH(num_hash_tables, hash_size, input_dim, bucket_size, num_candidates)

# Create some random vectors (data points)
data_points = [np.random.randn(input_dim) for _ in range(100)]

# Insert data points into LSH
for i, point in enumerate(data_points):
    lsh.insert(point, i)

# Query for the nearest neighbors of a random point
query_point = np.random.randn(input_dim)
neighbors = lsh.query(query_point, num_neighbors=5)

print("Query Point:", query_point)
print("Nearest Neighbors:", neighbors)
print("Nearest Neighbors Data Points:", [data_points[n] for n in neighbors])


Query Point: [ 0.50503622 -0.68786517 -0.13297794 -0.4654563   0.5749987  -0.1662583
  1.67263525  0.5717302   2.09563576 -1.44077891 -0.29325832  0.86833582
 -1.98830844 -0.75766831 -0.34332651 -2.74197117  1.34417364 -1.02521426
  0.88794761 -1.36912366  1.66490102 -0.15835791 -1.92326506  0.47052152
  0.60961255 -0.08719648 -0.27857298  1.01790723  0.32334695 -1.48718359
 -0.02545961 -0.94650123  0.97028655 -0.78736249  0.70393384  1.94204863
  1.59847819  0.26068782  0.74431832 -1.3929055  -0.58759719  0.45673157
 -1.16275556 -1.08604691 -0.04634322  0.8250251   1.48118178  0.23711648
 -0.08395563 -0.25141256  1.69341009  1.4980886  -0.7652553   0.17152349
  0.42064581 -1.50936939  2.34031384  1.0435222   0.14904263 -0.74609373
  0.0896909   0.05881379 -0.04717356 -0.32208981 -0.32769672 -1.08301451
  0.66833304 -0.15822636  0.04777483 -1.76421533 -0.67037035 -1.81781462
 -1.97864191 -1.69793069 -0.2387826  -1.23446789  0.75796086  1.24243073
 -1.4538087  -1.77509175  1.33646138 -0