In [557]:
import numpy as np
from collections import Counter

In [558]:
def knn_classifier_and_regressor(train_input, train_output, new_point, k, classifier=True):
    '''
    A knn classifier and regressor.
    
    Input:
        train_input: ndarray, a n*p matrix, which is the features of training data set.
        train_output: ndarray, a n*1 matrix, which is the output (either a class or a value) of training data set.
        new_point: ndarray, a 1*p matrix, which is the input of a new data.
        k: int, number of neighbors.
        classifier: an indicator of classifier and regressor. Default is True.
    
    Output:
        knn_result: int or float, the result of knn prediction.
    '''
    
    n = train_input.shape[0]
    distance = np.zeros(n)
    
    for i in range(n):
        distance[i] = np.linalg.norm(train_input[i,:] - new_point)
    
    top_k_neighbors = np.argsort(distance)[:k]
    k_neighbor_output = train_output[top_k_neighbors]
    
    # for classifier
    if classifier:
        voters = Counter(k_neighbor_output)
        knn_result = voters.most_common(1)[0][0]
    
    # for regressor
    else:
        knn_result = np.mean(k_neighbor_output)
    
    return knn_result

In [553]:
# a classification task of iris data

import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

predictions = [knn_classifier_and_regressor(X_train, 
                                          y_train, 
                                          new_point, 
                                          k=5,
                                          classifier=True)
              for new_point in X_test]

accuracy_rate = np.sum(predictions==y_test)/len(y_test)
accuracy_rate

0.98

In [556]:
# a regression task of bouston housing data

bo_housing= datasets.load_boston()
X = bo_housing.data
y = bo_housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

predictions = [knn_classifier_and_regressor(X_train, 
                                          y_train, 
                                          new_point, 
                                          k=5,
                                          classifier=False)
              for new_point in X_test]
MSE = sum([(predictions[i]-y_test[i])**2 for i in range(len(y_test))])/len(y_test)
MSE

30.67194901960782