In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [5]:
class KNeighborsRegression:

    def __init__(self, k: int = 3, p: int = 1) -> None:
        '''
        Initialize KNeighborsRegressor

        :params k: Number of neighbors to use for prediction
        :params p: Minkowski distance metric parameter
        :returns: None
        '''

        self.k = k
        self.p = p
    

    def minkowski_distance(self, x, y, p = 1):
        '''
        Calculates the Minkowski distance between two vectors x and y.

        :param x: x points
        :param y: y points
        :param p: p is the Minkowski power parameter. (Default = 1)
        :returns: the Minkowski distance between x and y
        :rasies: ValueError if x and y do not have the same length    
        '''

        # Exception handling
        if len(x) != len(y):
            raise ValueError('x and y must be of same length')

        n = len(x)

        distance  = 0
        for i in range(n):
            distance  += (abs(x[i] - y[i]) ** p)

        return distance ** (1 / p)


    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        '''
        Fit the model for the spesific test point

        :params X: Training data
        :params y: Testing labels
        :returns neighbors: The k closest neighbors to the test point
        '''

        # Calculate distances
        distances = [self.minkowski_distance(x, y, self.p) for x in X]

        # Return neighbors
        return pd.DataFrame(
            data = distances,
            columns = ['dist']
        ).sort_values(
            by = 'dist',
            axis = 0
        )[:self.k]


    def predict(self, X_train: np.array, X_test: np.array, y_train: np.array) -> np.array:
        '''
        Predict the labels for the test data

        :params X_train: Training data
        :params X_test: Testing data
        :params y_train: Training labels
        :returns y_pred: Predicted labels
        '''
        
        y_pred = []
        for y in X_test:

            # Fit the model for the spesific test point
            neighbors = self.fit(X_train, y)

            # Calcualte mean of the neighbors append it to y_pred list
            y_pred.append(y_train[neighbors.index].mean())
        
        return y_pred

In [6]:
if __name__ == '__main__':

    # Load the dataset
    df = pd.read_csv('boston.csv')
    
    # Preparing the data
    X = df.drop(columns = 'price')
    y = df['price'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

    # Scale the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Define model
    knn = KNeighborsRegression(k = 5)

    # Make predictions
    y_pred = knn.predict(X_train, X_test, y_train)
    print(y_pred)

[17.16, 20.66, 23.160000000000004, 22.36, 21.18, 21.94, 20.04, 19.92, 33.879999999999995, 18.64, 16.18, 19.16, 31.380000000000003, 39.440000000000005, 21.48, 27.9, 13.74, 20.339999999999996, 21.9, 21.8, 17.12, 21.259999999999998, 27.46, 24.220000000000002, 15.819999999999999, 23.259999999999998, 14.919999999999998, 23.459999999999997, 22.96, 39.94, 23.36, 35.8, 21.2, 16.24, 11.66, 14.66, 11.28, 17.52, 23.46, 27.46, 9.72, 18.78, 18.48, 18.04, 37.56, 29.080000000000002, 21.9, 22.8, 28.74, 25.74, 29.0, 22.080000000000002, 31.4, 18.46, 38.88, 20.04, 23.3, 15.360000000000003, 17.939999999999998, 21.059999999999995, 27.46, 14.919999999999998, 25.78, 33.6, 12.579999999999998, 38.48, 18.5, 25.759999999999998, 27.1, 14.059999999999999, 23.080000000000002, 9.559999999999999, 30.4, 39.94, 19.860000000000003, 13.5, 22.0, 15.559999999999999, 20.619999999999997, 23.04, 21.639999999999997, 19.0, 25.419999999999998, 18.86, 31.22, 11.36, 39.54, 24.02, 23.18, 32.12, 10.04, 16.599999999999998, 15.1800000