#  k-nearest Neighbors Regressor - Implementation on synthetic dataset

In [26]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

In [23]:
# Creating the synthetic dataset
X, y = make_regression(n_samples=1000, n_features=3, n_informative=3, random_state=42)

In [24]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Sample Training

In [25]:
sample_model = KNeighborsRegressor(n_jobs=-1)
sample_model.fit(X_train, y_train)
y_pred = sample_model.predict(X_test)

### Sample model scores

In [28]:
print("R2 Score:",r2_score(y_test, y_pred))
print("Mean Absolute Error:",mean_absolute_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score: 0.975382121239828
Mean Absolute Error: 12.998096712498993
Root Mean Squared Error: 20.711806607812935


## Hyperparameter Tuning

In [29]:
# Creating the Parameter Grid for GridSearchCV
n_neighbors = list(range(1,11)) # number of neighbors from 1 to 10
weights = ['uniform', 'distance']
algorithm = ['auto', 'ball_tree', 'kd_tree']
leaf_size = list(range(5, 31, 5))
p = [1, 2]
n_jobs = [-1]

param_grid = dict(n_neighbors = n_neighbors,
                  weights = weights,
                  algorithm = algorithm,
                  leaf_size = leaf_size,
                  p = p,
                  n_jobs = n_jobs)

In [30]:
grid = GridSearchCV(estimator=KNeighborsRegressor(), param_grid=param_grid, n_jobs=-1)

In [31]:
grid.fit(X_train, y_train)

In [32]:
# Best Parameters
grid.best_params_

{'algorithm': 'auto',
 'leaf_size': 5,
 'n_jobs': -1,
 'n_neighbors': 4,
 'p': 2,
 'weights': 'distance'}

In [33]:
# Best model
best_model = grid.best_estimator_

In [34]:
y_pred = best_model.predict(X_test)

### Model Scores

In [35]:
print("R2 Score:",r2_score(y_test, y_pred))
print("Mean Absolute Error:",mean_absolute_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score: 0.9807785245401399
Mean Absolute Error: 11.726453413340865
Root Mean Squared Error: 18.30147229764003
