In [16]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor

from sklearn.metrics import mean_squared_error
from math import sqrt

## KNN Regresson Model

In [2]:
url = (
     "https://archive.ics.uci.edu/ml/machine-learning-databases"
     "/abalone/abalone.data"
)

Info about data: https://archive.ics.uci.edu/ml/datasets/abalone

In [5]:
abalone = pd.read_csv(url, header=None)

In [7]:
abalone.columns = [
     "Sex",
     "Length",
     "Diameter",
     "Height",
     "Whole weight",
     "Shucked weight",
     "Viscera weight",
     "Shell weight",
     "Rings",
 ]

In [38]:
abalone.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [8]:
abalone = abalone.drop("Sex", axis=1)

In [9]:
X = abalone.drop("Rings", axis=1)
y = abalone["Rings"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [13]:
knn_model = KNeighborsRegressor(n_neighbors=3)

In [14]:
knn_model.fit(X_train, y_train)

KNeighborsRegressor(n_neighbors=3)

In [17]:
train_preds = knn_model.predict(X_train)

In [18]:
mse = mean_squared_error(y_train, train_preds)

In [19]:
rmse = sqrt(mse)

In [20]:
rmse

1.6748477251353828

In [21]:
test_preds = knn_model.predict(X_test)

In [22]:
mse = mean_squared_error(y_test, test_preds)

In [23]:
rmse = sqrt(mse)

In [24]:
rmse

2.349822122737403

## Grid Search

In [25]:
from sklearn.model_selection import GridSearchCV

In [26]:
parameters = {"n_neighbors": range(1, 50)}

In [27]:
gridsearch = GridSearchCV(KNeighborsRegressor(), parameters)

In [28]:
gridsearch.fit(X_train, y_train)

GridSearchCV(estimator=KNeighborsRegressor(),
             param_grid={'n_neighbors': range(1, 50)})

In [29]:
gridsearch.best_params_

{'n_neighbors': 12}

In [30]:
train_preds_grid = gridsearch.predict(X_train)

In [31]:
train_mse = mean_squared_error(y_train, train_preds_grid)

In [32]:
train_rmse = sqrt(train_mse)

In [33]:
test_preds_grid = gridsearch.predict(X_test)

In [34]:
test_mse = mean_squared_error(y_test, test_preds_grid)

In [35]:
test_rmse = sqrt(test_mse)

In [36]:
train_rmse

1.9863477177083948

In [37]:
test_rmse

2.1468739250794564