In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

__Step by step the process to select k in the k-nearest neighbor algorithm (pseudocode):__

<pre> - For k in range (n):
    - define the k nearest neighbors
    - Calculate the mean of k nearest neighbors
    - derived the k-nn regression
    - calculate the lost fuction for training and test sets
- Choose the k of the model with the best fit
</pre>

In [12]:
#N dimentional
import math
from functools import reduce

def l1_norm(a,b):
    """Returns the l1 norm (a,b)"""
    d= map(lambda x,y:abs(x-y), a,b) #Calculate the absolute distance between a,b elements
    return reduce(lambda x,y:x+y, d) #Sum of absolute distances (Manhattan distance)
    
    
def l2_norm(a,b):
    """Returns the l2 norm (a,b)""" 
    
    d= map(lambda x,y:(x-y)**2,a,b) #Calculate (a(i)-b(i))^2
    return math.sqrt(reduce(lambda x,y:x+y,d )) # Square root of sum of (a(i)-b(i))^2 (Euclidean distance)   
    
    
def k_neighbor_nd(input_data, k, p, metric='l1', mode='mean'):
    """Returns the k-neighbor estimate for p using data input_data.

    Keyword arguments:
    input_data -- numpy array of all the data
    k -- Number of k
    p -- input values
    metric -- l1 or l2. l1 norm or l2 norm https://en.wikipedia.org/wiki/Norm_(mathematics)
    mode -- estimator possible values = 'mean', 'median', 'max'
    
    Implement the l1 and l2 norms
    for mean, median and max, use np.mean, np.median and np.max
    """
    # All points in each neighborhood are weighted equally (uniform weights)
    
   
    distance =[]
    for point in (input_data):
        if metric =='l1':       
            # list of pairs of each element of input_data and the distance(l1 or l2) from p to the element  
            distance.append((point ,l1_norm(point,p)))
        elif metric =='l2':
            distance.append((point ,l2_norm(point,p)))
        
    #Order pairs by increasing distance
    sort_distance= sorted(distance, key= lambda n:n[1])
    
    #find the k nearest neighbors of input value,p
    neighbor = sort_distance[:k]
    
    ### If it is found that two neighbors, neighbor k+1, k have identical distance but different labels,
    ### the result will depond on ordering of the data set.
    
    # Calculate the mean of the k neighbours
    neighbor_value =[]  
    for v,d in neighbor:
        neighbor_value.append(v)
        
    if mode =='mean':     
        return np.mean(np.array(neighbor_value),axis=0) 
    
    elif mode =='max':
        return np.max(np.array(neighbor_value),axis=0)
    
    elif mode =='median':
        return np.median(np.array(neighbor_value),axis=0)
    
   

In [13]:
data_4d = np.array([[4, 1, 2, 1], [1, 4, 2, 0], [3, 3, 1, 1], 
        [4, 0, 0, 0], [1, 2, 0, 0], [3, 4, 2, 3], 
        [2, 4, 4, 2], [2, 1, 4, 1], [3, 3, 2, 4], 
        [4, 3, 0, 4], [2, 2, 4, 0],[4, 3, 0, 2], 
        [4, 3, 0, 2], [0, 3, 4, 2]])

In [14]:
#Evaluate

print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 1, 4, 3], metric='l1', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=2, p=[4, 4, 0, 0], metric='l1', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 2, 2, 4], metric='l1', mode='max'))
print(k_neighbor_nd(input_data=data_4d, k=1, p=[2, 3, 3, 4], metric='l1', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 3, 3, 4], metric='l1', mode='median'))
print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 1, 4, 3], metric='l2', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=2, p=[4, 4, 0, 0], metric='l2', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 2, 2, 4], metric='l2', mode='max'))
print(k_neighbor_nd(input_data=data_4d, k=1, p=[2, 3, 3, 4], metric='l2', mode='mean'))
print(k_neighbor_nd(input_data=data_4d, k=3, p=[2, 3, 3, 4], metric='l2', mode='median'))

[ 2.          2.33333333  4.          1.        ]
[ 4.  3.  0.  2.]
[4 4 2 4]
[ 3.  3.  2.  4.]
[ 3.  4.  2.  3.]
[ 1.33333333  2.66666667  4.          1.66666667]
[ 3.5  3.   0.5  1.5]
[4 4 2 4]
[ 3.  3.  2.  4.]
[ 3.  4.  2.  3.]


Different metrics return different neighbors for the same value point(p). Also different modes return different results.
So tuninig the hyperparameters is really important for optimization of a machine learning model. 

---