In [4]:
import numpy as np

class RPNode:
    def __init__(self, projection_vector=None, threshold=None, left=None, right=None, points=None):
        # Random projection vector used for splitting
        self.projection_vector = projection_vector
        # Threshold value for splitting
        self.threshold = threshold
        # Left and right child nodes
        self.left = left
        self.right = right
        # If this is a leaf node, it stores the actual data points (pixel blocks)
        self.points = points


In [5]:
def random_projection(dimensions):
    # Create a random vector of size equal to the number of dimensions in the data
    vector = np.random.randn(dimensions)
    # Normalize the vector so it has unit length
    vector = vector / np.linalg.norm(vector)
    return vector


In [6]:
def build_rp_tree(data, max_leaf_size=10):
    # If the number of points is small enough, make this a leaf node
    if len(data) <= max_leaf_size:
        return RPNode(points=data)
    
    # Get the number of dimensions in the data (e.g., 27D or 36D pixel blocks)
    dimensions = data.shape[1]
    
    # Create a random projection vector
    projection_vector = random_projection(dimensions)
    
    # Project all points onto the random vector
    projected_data = np.dot(data, projection_vector)
    
    # Choose a threshold (median) to split the data
    threshold = np.median(projected_data)
    
    # Split the data into two groups based on the threshold
    left_data = data[projected_data <= threshold]
    right_data = data[projected_data > threshold]
    
    # Recursively build the left and right subtrees
    left_child = build_rp_tree(left_data, max_leaf_size)
    right_child = build_rp_tree(right_data, max_leaf_size)
    
    # Return a node with the projection vector, threshold, and children
    return RPNode(projection_vector=projection_vector, threshold=threshold, left=left_child, right=right_child)


In [7]:
from heapq import heappush, heappop

def knn_search(node, query_point, k, neighbors):
    # If we reach a leaf node, check all points in this node
    if node.points is not None:
        for point in node.points:
            # Calculate the distance between the query and the current point
            distance = np.linalg.norm(query_point - point)
            # Add the point and its distance to the list of neighbors
            heappush(neighbors, (-distance, point))
            # If we have more than k neighbors, remove the farthest one
            if len(neighbors) > k:
                heappop(neighbors)
        return
    
    # Otherwise, project the query point onto the random projection vector
    projected_value = np.dot(query_point, node.projection_vector)
    
    # Recursively search the left or right subtree depending on the threshold
    if projected_value <= node.threshold:
        knn_search(node.left, query_point, k, neighbors)
        # After searching the main side, check the other side if needed
        if len(neighbors) < k or -neighbors[0][0] > abs(projected_value - node.threshold):
            knn_search(node.right, query_point, k, neighbors)
    else:
        knn_search(node.right, query_point, k, neighbors)
        if len(neighbors) < k or -neighbors[0][0] > abs(projected_value - node.threshold):
            knn_search(node.left, query_point, k, neighbors)

def get_knn(tree, query_point, k):
    neighbors = []
    knn_search(tree, query_point, k, neighbors)
    return [point for _, point in sorted(neighbors, reverse=True)]


In [8]:
# Example: Generate some random pixel block data (let's say each block has 36 dimensions)
np.random.seed(42)
data = np.random.rand(100, 36)  # 100 pixel blocks with 36D each

# Build the RP-tree
rp_tree = build_rp_tree(data, max_leaf_size=10)

# Define a query point (a random pixel block)
query_point = np.random.rand(36)

# Search for the 5 nearest neighbors
k = 5
neighbors = get_knn(rp_tree, query_point, k)

# Output the results
print("Nearest neighbors:")
for neighbor in neighbors:
    print(neighbor)


Nearest neighbors:
[0.5225766  0.29956568 0.07686208 0.50062427 0.79451554 0.70708648
 0.05022601 0.07290184 0.40287328 0.29529049 0.23238432 0.2810045
 0.80348274 0.92922805 0.40510269 0.906111   0.3214957  0.47643699
 0.226029   0.64047604 0.97898112 0.6034931  0.35781408 0.64781745
 0.12292068 0.88865908 0.50308395 0.44934974 0.58586479 0.62478386
 0.07177581 0.68261722 0.24193168 0.71395263 0.82253479 0.80395851]
[0.92937599 0.55676289 0.57161269 0.27997909 0.76949293 0.18704375
 0.32367924 0.42543644 0.50761038 0.24240973 0.11483682 0.61062004
 0.28863055 0.58123822 0.15436272 0.4811401  0.53258943 0.05182354
 0.33660428 0.13441468 0.06337497 0.98996023 0.32235384 0.80987445
 0.25464065 0.68150272 0.76022786 0.59563874 0.47157619 0.41184091
 0.34886827 0.92952914 0.83061941 0.96502691 0.12429722 0.73086748]
[0.80784594 0.76228513 0.79781365 0.43558331 0.81783422 0.12020906
 0.5444891  0.00575866 0.32458583 0.36646153 0.39617269 0.69546721
 0.3885581  0.44869362 0.23754413 0.373251