In [2]:
import numpy as np

class VerboseRandomProjectionLSH:
    def __init__(self, dimension, num_hyperplanes, num_buckets):
        self.dimension = dimension
        self.num_hyperplanes = num_hyperplanes
        self.num_buckets = num_buckets
        self.hyperplanes = np.random.randn(self.num_hyperplanes, self.dimension)
        self.buckets = [[] for _ in range(self.num_buckets)]
        
        print(f"Initialized LSH with {dimension}-D space, {num_hyperplanes} hyperplanes, {num_buckets} buckets.")
        print("Hyperplanes:\n", self.hyperplanes)
        
    def compute_hash(self, point):
        projections = np.dot(self.hyperplanes, point)
        hash_value = 0
        print(f"\nComputing hash for point: {point}")
        print(f"Projections on hyperplanes: {projections}")
        
        for i, projection in enumerate(projections):
            bit = 1 if projection >= 0 else 0
            hash_value |= bit << i
            print(f"Hyperplane {i}: Projection {projection:.2f}, Bit: {bit}")
        
        print(f"Computed hash value: {hash_value}")
        return hash_value
    
    def add_point(self, point, label):
        hash_value = self.compute_hash(point)
        bucket_index = hash_value % self.num_buckets
        self.buckets[bucket_index].append((label, point))
        print(f"Added {label} to bucket {bucket_index}, Hash: {hash_value}")
        
    def query(self, point, k=1):
        hash_value = self.compute_hash(point)
        bucket_index = hash_value % self.num_buckets
        bucket = self.buckets[bucket_index]
        
        print(f"\nQuery for point {point} goes to bucket {bucket_index}, Hash: {hash_value}")
        distances = [(label, np.linalg.norm(point - p)) for label, p in bucket]
        distances.sort(key=lambda x: x[1])
        print("Distances:", distances)
        
        return distances[:k]

# Use a 10-dimensional space for simplicity
dimension = 10  
num_hyperplanes = 5  
num_buckets = 2  

# Initialize the LSH system
lsh = VerboseRandomProjectionLSH(dimension, num_hyperplanes, num_buckets)

# Add a small number of points to the LSH
points = [np.random.randn(dimension) for _ in range(3)]
for i, point in enumerate(points):
    lsh.add_point(point, f"Point {i}")

# Choose a point to query
query_point = points[0]  # Use the first point added as a query to ensure some result
print("\nQuerying...")
result = lsh.query(query_point, k=2)
print("\nNearest Neighbors:", result)

Initialized LSH with 10-D space, 5 hyperplanes, 2 buckets.
Hyperplanes:
 [[-0.83743347 -0.55633249 -0.00472366  0.98555516  2.2355919   0.63213923
   0.41061279  0.68767773 -0.7422256   0.50369554]
 [-0.37300675 -0.1981152   1.37911377  0.30618014 -0.85705889 -0.96069224
   0.32476332  0.6817586  -0.40051521 -0.47487427]
 [-1.35863289  1.70761537 -3.33028166 -0.56780651 -0.07723327  0.62986481
   0.87754034  0.15146138  0.53954569 -0.04874371]
 [-0.16202119  1.03045185  0.72918501 -0.97383656 -0.60162195 -1.0464052
  -1.17435603 -0.61808796 -0.42677007 -1.54070825]
 [-1.92628697 -0.52845042 -0.33329592 -0.10214683 -0.44621198 -0.61120566
  -1.86812733 -1.49668058 -1.07236137  1.33889883]]

Computing hash for point: [-1.09203085 -0.44518967  0.10231113  0.55394694  1.01166295  0.88513787
  0.93224087  1.0591065  -0.64457112  1.92452295]
Projections on hyperplanes: [ 7.08773929 -0.54209315  1.08448863 -6.72096432  1.19699018]
Hyperplane 0: Projection 7.09, Bit: 1
Hyperplane 1: Projection

In [5]:
array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [9]:
array.ndim

2

In [10]:
# Get the shape of the array
shape = array.shape

# The number of rows and columns
rows, columns = shape

print(f"Number of rows: {rows}, Number of columns: {columns}")

Number of rows: 3, Number of columns: 3
