In [5]:
import numpy as np

class KNearestNeighbors():
    def __init__(self, X_train, y_train, X_test, y_test, n_neighbors=5, weights = 'uniform', p=2):
    
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.n_neighbors = n_neighbors
        self.weights = weights
        self.p = p
        self.class_num = 3

        
    def euclidian_distance(self, a, b):
      #  if self.p==2 & self.weights =='uniform':
            
        return np.sqrt(np.sum((a-b)**2, axis=1))
    
    
    def kneighbors(self, return_distance=False):
       
        dist = []
        neigh_ind = []
        
        point_dist = [self.euclidian_distance(x_test, self.X_train) for x_test in self.X_test]

        for row in point_dist:
            enum_neigh = enumerate(row)
            sorted_neigh = sorted(enum_neigh, key=lambda x: x[1])[:self.n_neighbors]
    
            ind_list = [tup[0] for tup in sorted_neigh]
            dist_list = [tup[1] for tup in sorted_neigh]
    
            dist.append(dist_list)
            neigh_ind.append(ind_list)
        
        if return_distance:
            return np.array(dist), np.array(neigh_ind)
        
        return np.array(neigh_ind)
         
    
    def predict(self):
        
        if self.weights=='uniform':
            neighbors = self.kneighbors()
            y_pred = np.array([np.argmax(np.bincount(self.y_train[neighbor])) for neighbor in neighbors])
        
            return y_pred
    
        if self.weights=='distance':
        
            dist, neigh_ind = self.kneighbors(return_distance=True)
        
            inv_dist = 1/dist
            
            mean_inv_dist = inv_dist / np.sum(inv_dist, axis=1)[:, np.newaxis]
            
            proba = []
            
            for i, row in enumerate(mean_inv_dist):
                
                row_pred = self.y_train[neigh_ind[i]]
                
                for k in range(self.class_num):
                    indices = np.where(row_pred==k)
                    prob_ind = np.sum(row[indices])
                    proba.append(np.array(prob_ind))
        
            predict_proba = np.array(proba).reshape(self.X_test.shape[0], self.class_num)
            
            y_pred = np.array([np.argmax(item) for item in predict_proba])
            
            return y_pred
            
    def score(self, X_test, y_test):
        pass


In [6]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples = 1000, n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1, n_classes=3, random_state=21)


mu = np.mean(X, 0)
sigma = np.std(X, 0)

X = (X - mu ) / sigma

In [8]:
knn = KNearestNeighbors(X, y, weights='distance')

TypeError: __init__() missing 2 required positional arguments: 'X_test' and 'y_test'

In [None]:
t = np.array([[1,2], [33, 88], [23, 2]])

In [None]:
knn.kneighbors(t)

In [None]:
n = knn.predict(np.array([[1,2],[2,3], [-0.7,-1.1], [444, 91], [14, -5],[-2,-3] ]))

In [None]:
n.shape

In [None]:
n

In [None]:
[np.argmax(i) for i in n]

In [None]:
neighbor = knn.kneighbors(np.array([[1,2],[1,5], [-0.7,-1.1]]))

In [None]:
neighbor

In [9]:
data = np.hstack((X, y[:, np.newaxis]))
        
np.random.shuffle(data)

split_rate = 0.7

train, test = np.split(data, [int(split_rate*(data.shape[0]))])

train_x = train[:,:-1]
train_y = train[:, -1]

test_x = test[:,:-1]
test_y = test[:, -1]


'''train_x = train_x.astype(int)

test_x = test_x.astype(int)
 '''

train_y = train_y.astype(int)
test_y = test_y.astype(int)

In [None]:
knn2 = KNearestNeighbors(train_x, train_y, test_x, test_y, weights='distance')

In [None]:
knn2.kneighbors()

In [None]:
predicted_y = knn2.predict()

In [None]:
float(sum(predicted_y == test_y))/ float(len(test_y))

In [None]:
accuracy = float(sum(predicted_y == test_y))/ float(len(test_y))

In [None]:
accuracy

In [None]:
print(accuracy)