In [2]:
import numpy as np

In [146]:
class LSHknn:
    """
    k:number of nearest neighboors 
    
    no_of_planes: no of hyper planes 
    
    """
    def __init__(self,k=5,no_of_planes=5):
        self.k=k                                        #k nearest neighbours
        self.no_of_planes=no_of_planes                  #number of planes
        self.x=None                                     #data
        self.y=None                                     #labels
        self.no_classes=None                            #number of classes
        self.dim_of_data=None                           #number of dimenstions
        self.planes=None                                #planes  
        self.bucket=None                                #buckets
        self.bucket_labels=None                         #bucket wise labels
        
        
    def __random_hyperpplanes(self):
        planes=[]
        for i in range(self.no_of_planes):
            planes.append(np.random.uniform(-1,1,self.dim_of_data))     # creating random hyper planes 
        self.planes=planes
        
        
    def __assigning_each_point_to_a_bucket(self):
        bucket={}                                        #bucket dict
        bucket_labels={}                                 #corresponding labels
        for i,k in zip(self.x,self.y):
            temp=[]                                               
            for j in self.planes: 
                temp.append(np.sign(sum(i*j)))           #sign of each point                      
            temp=tuple(temp)                             #tuple with signs , used as a key 
            if temp in bucket.keys():                    # already that key exist then add point to that bucket 
                bucket[temp].append(i)                  
                bucket_labels[temp].append(k)
            else:                                        #if that key is not exist then create a new key with that
                bucket[temp]=[i]
                bucket_labels[temp]=[k]
        self.bucket=bucket
        self.bucket_labels=bucket_labels
        
        
    def fit(self,x,y):
        """
        X: data matrix with (rows,columns) or (points,features)
        
        y: class numbers in intergeer example [1,2,0,1,2,0,2,1]
        """
        self.x=x
        self.y=y
        self.dim_of_data=len(x[0])
        self.no_classes=np.unique(y)
        self.__random_hyperpplanes()
        self.__assigning_each_point_to_a_bucket()
        print('k_nearest_neighbours={}'.format(self.k))
        print('number of class : {} '.format(len(self.no_classes)))
        print('number of hyper planes {}'.format(self.no_of_planes))
    
    
    def __k_neighbors(self,xq):
        signs=[]              
        for i in self.planes:
            signs.append(np.sign(sum(i*xq)))                #calculate key for quesry point
        signs=tuple(signs)                            
        if signs not in self.bucket.keys():                 # if key not exist then resturn dont know
            return 'dont know'
        
        dist=[]                                             #if key exist ,then calculate nearest neighbours in that corresponding busket
        for i in self.bucket[signs]:                        
            dist.append(np.linalg.norm(i-xq))               #l2 or euclidien distance b/w xq to every point in the data 
        index=np.argsort(dist)[:self.k]                     #top k neighbours
        index=np.array(self.bucket_labels[signs])[index]    #correspoding labels
        
        p=dict(zip(self.no_classes,np.zeros(len(self.no_classes)))) 
        for i in index:                                     #for each class : number_of_neighours belogs to that class
            p[i]+=1
        return p
    
    
    def predict(self,xq):
        """
        xq : query point
        
        predicts one point at a time 
        
        """
        p=self.__k_neighbors(xq)                           #class : number_of_neighours belogs to that class
        index=list(p.keys())        
        values=list(p.values())
        return index[np.argmax(values)]                    #majority vote returning top class
    
    
    def predict_prob(self,xq):    
        """
        xq : query point
        
        predicts one point at a time 
        """
        p=self.__k_neighbors(xq)                          #class : number_of_neighours belogs to that class
        index=list(p.keys())
        values=np.array(list(p.values()))                 #calculating probability 
        
        return values/np.sum(values)
    def score(self,x,y):
        """
        x: data
        
        y: labels
        
        returns accuracy 
        """
        correct=0
        for i in range(len(x)):
            if y[i]==self.predict(x[i]):
                correct+=1
        return correct/len(y)

In [148]:
from sklearn.datasets import make_classification

In [149]:
x,y=make_classification(100)

In [150]:
x.shape

(100, 20)

In [151]:
m=LSHknn()

In [152]:
m.fit(x,y)

k_nearest_neighbours=5
number of class : 2 
number of hyper planes 5


In [154]:
m.predict_prob(x[4])

array([0.33333333, 0.66666667])

In [153]:
m.score(x,y)

0.85