In [1]:
"""
    KNN from scratch
"""
class KNNClassifier:
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    def euclideanDistance(self, l1, l2):
        if (len(l1) != len(l2)):
            raise ValueError('Different shape')
        else:
            dist = 0
            for i in range(len(l1)):
                dist += (l1[i]-l2[i])**2
            
            return dist**0.5
            
    
    def predict(self, X):
        y_predict = []
        for x_unknown in X:
            neighbors_distance = []
            
            for x_known in self.X:
                neighbors_distance.append(self.euclideanDistance(x_unknown,x_known))
            
            neighbors_distance_sorted_with_label = sorted(zip(neighbors_distance,self.y))
            
            k_nearest_neighbors_label = [items[1] for items in neighbors_distance_sorted_with_label[:self.n_neighbors]]
                
            y_predict.append(max(set(k_nearest_neighbors_label), key=k_nearest_neighbors_label.count))
        
        return y_predict
            
    def score(self, X, y):
        y_predict = self.predict(X)
            
        true_label = 0
        for i in range(len(y)):
            if (y_predict[i] == y[i]): true_label += 1

        return(true_label/len(y))

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [3]:
data = datasets.load_iris()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=123)

In [5]:
knn_clf = KNNClassifier()
knn_clf.fit(X_train, y_train)
knn_clf.score(X_test, y_test)

0.9666666666666667

In [6]:
from sklearn.neighbors import KNeighborsClassifier

In [7]:
knn_clf_sk = KNeighborsClassifier()
knn_clf_sk.fit(X_train, y_train)
print(knn_clf_sk.score(X_test, y_test))

0.966666666667


In [8]:
# REGRESSOR

In [9]:
from sklearn.metrics import mean_squared_error

"""
    KNN from scratch
"""
class KNNRegressor:
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    def euclideanDistance(self, l1, l2):
        if (len(l1) != len(l2)):
            raise ValueError('Different shape')
        else:
            dist = 0
            for i in range(len(l1)):
                dist += (l1[i]-l2[i])**2
            
            return dist**0.5
            
    
    def predict(self, X):
        y_predict = []
        for x_unknown in X:
            neighbors_distance = []
            
            for x_known in self.X:
                neighbors_distance.append(self.euclideanDistance(x_unknown,x_known))
            
            neighbors_distance_sorted_with_target = sorted(zip(neighbors_distance,self.y))
            
            k_nearest_neighbors_target = [items[1] for items in neighbors_distance_sorted_with_target[:self.n_neighbors]]
            
            # mean knn
            y_predict.append(round(sum(k_nearest_neighbors_target)/self.n_neighbors, 2))
        
        return y_predict
            
    def score(self, X, y):
        y_predict = self.predict(X)

        return mean_squared_error(y_predict,y)

In [10]:
data = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=123)

In [11]:
knn_reg = KNNRegressor()
knn_reg.fit(X_train, y_train)
# print(knn_reg.predict(X_test))
print(knn_reg.score(X_test, y_test))

37.378627451


In [12]:
from sklearn.neighbors import KNeighborsRegressor

In [13]:
knn_reg_sk = KNeighborsRegressor()
knn_reg_sk.fit(X_train, y_train)
y_predict = knn_reg_sk.predict(X_test)
mean_squared_error(y_predict, y_test)

37.378627450980389