# KNeighbors Regressor Example Notebook 🏠
This notebook shows how my version of KNeighborsRegressor is similar to the one from sklearn. The notebooks uses the California housing dataset for this comparison.

In [1]:
# Importing KNeighborsRegressor from sklearn and my own implementation
%cd .. 
from algorithms.neighbors.KNeighborsRegressor import KNeighborsRegressor
from sklearn.neighbors import KNeighborsRegressor as KNeighborsRegressor_sklearn
import numpy as np

f:\KTorch


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
# Getting the dataset
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# Getting our data
X, y = fetch_california_housing(return_X_y=True)
X.shape, y.shape, X[0], y[0]

((20640, 8),
 (20640,),
 array([   8.3252    ,   41.        ,    6.98412698,    1.02380952,
         322.        ,    2.55555556,   37.88      , -122.23      ]),
 4.526)

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)
len(X_train), len(X_test), len(y_train), len(y_test)

(16512, 4128, 16512, 4128)

In [5]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train[0], X_test[0]

(array([ 0.32228035,  1.14255294, -0.02983161,  0.0702982 , -0.53274173,
        -0.14066543,  0.84560328, -1.29903555]),
 array([-0.9607672 ,  0.18852846, -0.99377706, -0.01531633,  1.46991487,
         0.01596965, -0.73915152,  0.63040587]))

In [6]:
# Creating the knn models
my_knn = KNeighborsRegressor(n_neighbors=5, metric='minkowski', p=2)
sklearn_knn = KNeighborsRegressor_sklearn(n_neighbors=5, metric='minkowski', p=2)

In [7]:
# Fitting the models
my_knn.fit(X_train, y_train)
sklearn_knn.fit(X_train, y_train)

K Nearest Neighbors Regressor model fitted successfully


In [9]:
# Predicting with the models
my_y_pred = my_knn.predict(X_test)
sklearn_y_pred = sklearn_knn.predict(X_test)
my_y_pred[:5], sklearn_y_pred[:5]

(array([1.7966  , 2.3018  , 4.017002, 2.2244  , 5.00001 ]),
 array([1.7966  , 2.3018  , 4.017002, 2.2244  , 5.00001 ]))

The predictions are similar

In [10]:
# Comparing the results
np.testing.assert_array_almost_equal(my_y_pred, sklearn_y_pred, decimal=2)
print('The results are the same')

The results are the same


In [11]:
# Testing the MSE
from sklearn.metrics import mean_squared_error
my_mse = mean_squared_error(y_test, my_y_pred)
sklearn_mse = mean_squared_error(y_test, sklearn_y_pred)
np.testing.assert_almost_equal(my_mse, sklearn_mse, decimal=2)
print('The MSEs are the same')

The MSEs are the same


In [15]:
# Testing the coefficient of determination
sklearn_score = sklearn_knn.score(X_test, y_test)
my_score = my_knn.score(X_test, y_test)
np.testing.assert_approx_equal(my_score, sklearn_score, significant=3)
print('The coefficient of determination are the same')
sklearn_score, my_score

The coefficient of determination are the same


(0.6894729606145737, 0.69)

#### Using weighted KNeighborsRegressor

In [16]:
# Creating weighted models
my_knn_weighted = KNeighborsRegressor(n_neighbors=5, metric='minkowski', p=2, weights='distance')
sklearn_knn_weighted = KNeighborsRegressor_sklearn(n_neighbors=5, metric='minkowski', p=2, weights='distance')

In [17]:
# Fitting the weighted models
my_knn_weighted.fit(X_train, y_train)
sklearn_knn_weighted.fit(X_train, y_train)

K Nearest Neighbors Regressor model fitted successfully


In [22]:
# predicting with the weighted models
my_y_pred_weighted = my_knn_weighted.predict(X_test)
sklearn_y_pred_weighted = sklearn_knn_weighted.predict(X_test)
my_y_pred_weighted[:5], sklearn_y_pred_weighted[:5]

(array([1.81531148, 2.30833068, 3.7118399 , 2.22385677, 5.00001   ]),
 array([1.81531148, 2.30833068, 3.7118399 , 2.22385677, 5.00001   ]))

The predictions are similar

In [19]:
# Comparing the results of the weighted models
np.testing.assert_almost_equal(my_y_pred_weighted, sklearn_y_pred_weighted, decimal=2)
print('The results are the same')

The results are the same


In [20]:
# Testing the MSE of the weighted models
from sklearn.metrics import mean_squared_error
my_mse = mean_squared_error(y_test, my_y_pred_weighted)
sklearn_mse = mean_squared_error(y_test, sklearn_y_pred_weighted)
np.testing.assert_almost_equal(my_mse, sklearn_mse, decimal=2)
print('The MSEs are the same')

The MSEs are the same


In [21]:
# Testing the coefficient of determination of the weighted models
sklearn_score = sklearn_knn_weighted.score(X_test, sklearn_y_pred_weighted)
my_score = my_knn_weighted.score(X_test, my_y_pred_weighted)
np.testing.assert_almost_equal(my_score, sklearn_score, decimal=2)
print('The coefficient of determination are the same')

The coefficient of determination are the same
