## Hyperparameter tuning of regression model
They are model configuration property and does not change during training  
- model inputs (training data)
- model parameters (they are the regression coeff in case of regression)
- model hyperparameters (they are configuration property)

### Hyperparameter tuning with grid search

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV


In [44]:
automobile_df = pd.read_csv("./datasets/auto-mpg-processed.csv")
automobile_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,25.5,4,122.0,96,2300,15.5,43
1,27.2,4,141.0,71,3190,24.8,41
2,23.8,4,151.0,85,2855,17.6,42
3,34.0,4,112.0,88,2395,18.0,38
4,16.0,8,302.0,140,4141,14.0,46


In [45]:
X = automobile_df.drop(['mpg','age'],axis=1)
Y = automobile_df['mpg']
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

### Grid search on Lasso Regressor

In [46]:
parameters = {'alpha':[0.2,0.3,0.6,0.8,0.9,1.0]}

grid_search = GridSearchCV(Lasso(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_

{'alpha': 0.9}

In [47]:
for i in range(6):
    print("parameters ", grid_search.cv_results_['params'][i])
    print("Mean Test score: ", grid_search.cv_results_['mean_test_score'][i])
    print("Rank: ",grid_search.cv_results_['rank_test_score'][i])

parameters  {'alpha': 0.2}
Mean Test score:  0.6991682941225713
Rank:  6
parameters  {'alpha': 0.3}
Mean Test score:  0.6995488043840277
Rank:  5
parameters  {'alpha': 0.6}
Mean Test score:  0.7001168418620658
Rank:  4
parameters  {'alpha': 0.8}
Mean Test score:  0.7001201840612147
Rank:  3
parameters  {'alpha': 0.9}
Mean Test score:  0.7001210080071267
Rank:  1
parameters  {'alpha': 1.0}
Mean Test score:  0.7001205314878473
Rank:  2


In [48]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train,y_train)

In [49]:
y_pred = lasso_model.predict(x_test)

In [50]:
print("training score: ", lasso_model.score(x_train,y_train))
print("Test score: ", r2_score(y_test,y_pred))

training score:  0.7078280403788706
Test score:  0.6976014146001029


### Grid search on KNeighborsRegressor

In [51]:
parameters = {'n_neighbors':[10,12,14,18,20,25,35,50]}

grid_search = GridSearchCV(KNeighborsRegressor(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_

{'n_neighbors': 20}

In [52]:
for i in range(8):
    print("parameters ", grid_search.cv_results_['params'][i])
    print("Mean Test score: ", grid_search.cv_results_['mean_test_score'][i])
    print("Rank: ",grid_search.cv_results_['rank_test_score'][i])

parameters  {'n_neighbors': 10}
Mean Test score:  0.6950719769889242
Rank:  8
parameters  {'n_neighbors': 12}
Mean Test score:  0.6990716542661675
Rank:  7
parameters  {'n_neighbors': 14}
Mean Test score:  0.7095779027206064
Rank:  5
parameters  {'n_neighbors': 18}
Mean Test score:  0.7152294974003176
Rank:  4
parameters  {'n_neighbors': 20}
Mean Test score:  0.7196283126459915
Rank:  1
parameters  {'n_neighbors': 25}
Mean Test score:  0.7184521579402842
Rank:  2
parameters  {'n_neighbors': 35}
Mean Test score:  0.7171525881114326
Rank:  3
parameters  {'n_neighbors': 50}
Mean Test score:  0.7072573042235719
Rank:  6


In [53]:
kneighbors_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(x_train,y_train)
y_pred = lasso_model.predict(x_test)
print("training score: ", kneighbors_model.score(x_train,y_train))
print("Test score: ", r2_score(y_test,y_pred))

training score:  0.7462087219339162
Test score:  0.6976014146001029


### Hyper parameter tuning for Decision Tree regressor

In [54]:
parameters = {'max_depth':[1,2,3,4,5,6,7]}

grid_search = GridSearchCV(DecisionTreeRegressor(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_

{'max_depth': 4}

In [56]:
decision_tree = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth']).fit(x_train,y_train)
y_pred = decision_tree.predict(x_test)
print("training score: ", kneighbors_model.score(x_train,y_train))
print("Test score: ", r2_score(y_test,y_pred))

training score:  0.7462087219339162
Test score:  0.4754444017949485
