In [1]:
%reset -f
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [2]:
## import data
automobile_df = pd.read_csv('datasets/auto-mpg-processed2.csv')
automobile_df.head().drop('Unnamed: 0',axis=1)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,31.5,4,98.0,68,2045,18.5,43
1,16.0,8,400.0,230,4278,9.5,47
2,23.0,4,120.0,88,2957,17.0,45
3,25.0,4,110.0,87,2672,17.5,50
4,22.0,6,250.0,105,3353,14.5,44


In [3]:
## split data into training and testing set
X = automobile_df.drop(['mpg','age'],axis=1)
Y = automobile_df['mpg']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)


In [4]:
## use grid_search to evaluate the best alpha parameter,
## cv =3 means 3 fold progression, splits test data into three parts and train in chunks
parameters ={'alpha':[.2, .4, .6,.7,.8,.9,1.0]}
grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)
grid_search.best_params_

{'alpha': 1.0}

In [5]:
## compare results
for i in range(len(parameters['alpha'])):
    print('Parameters: ',grid_search.cv_results_['params'][i])
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ',grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'alpha': 0.2}
Mean Test Score:  0.7000481835152299
Rank:  7
Parameters:  {'alpha': 0.4}
Mean Test Score:  0.7007670410763729
Rank:  6
Parameters:  {'alpha': 0.6}
Mean Test Score:  0.7009927700302261
Rank:  5
Parameters:  {'alpha': 0.7}
Mean Test Score:  0.7011079491141337
Rank:  4
Parameters:  {'alpha': 0.8}
Mean Test Score:  0.7012183054464596
Rank:  3
Parameters:  {'alpha': 0.9}
Mean Test Score:  0.7013242919681165
Rank:  2
Parameters:  {'alpha': 1.0}
Mean Test Score:  0.7014274307760663
Rank:  1


In [6]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train, y_train)
y_pred = lasso_model.predict(x_test)
print('Training Score: ',lasso_model.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))

Training Score:  0.7147474965419156
Test score:  0.6687866538415967


In [7]:
parameters = {'n_neighbors':[10,12,14,18,20,25,30,35,50]}
grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)
grid_search.best_params_

{'n_neighbors': 18}

In [8]:
for i in range(len(parameters['n_neighbors'])):
    print('Parameters: ',grid_search.cv_results_['params'][i])
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ',grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'n_neighbors': 10}
Mean Test Score:  0.6991105060665317
Rank:  9
Parameters:  {'n_neighbors': 12}
Mean Test Score:  0.7050768664520228
Rank:  8
Parameters:  {'n_neighbors': 14}
Mean Test Score:  0.7135829426069221
Rank:  6
Parameters:  {'n_neighbors': 18}
Mean Test Score:  0.7229515769228616
Rank:  1
Parameters:  {'n_neighbors': 20}
Mean Test Score:  0.7191343059770035
Rank:  3
Parameters:  {'n_neighbors': 25}
Mean Test Score:  0.7194534237877378
Rank:  2
Parameters:  {'n_neighbors': 30}
Mean Test Score:  0.7162570990416441
Rank:  5
Parameters:  {'n_neighbors': 35}
Mean Test Score:  0.7163068530357837
Rank:  4
Parameters:  {'n_neighbors': 50}
Mean Test Score:  0.71021141268202
Rank:  7


In [9]:
kneighbors_mode = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(x_train, y_train)
y_pred = kneighbors_mode.predict(x_test)
print('Training Score: ',kneighbors_mode.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))

Training Score:  0.7414146167822651
Test score:  0.6577055073081235


In [10]:
parameters = {'max_depth':[1,2,3,4,5,6,7,8]}
grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)
print(grid_search.best_params_)
print()
for i in range(len(parameters['max_depth'])):
    print('Parameters: ',grid_search.cv_results_['params'][i])
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ',grid_search.cv_results_['rank_test_score'][i])
decisiontree_model = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth']).fit(x_train, y_train)
y_pred = decisiontree_model.predict(x_test)
print()
print('Training Score: ',kneighbors_mode.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))

{'max_depth': 3}

Parameters:  {'max_depth': 1}
Mean Test Score:  0.5414404628948626
Rank:  8
Parameters:  {'max_depth': 2}
Mean Test Score:  0.6835368160432157
Rank:  2
Parameters:  {'max_depth': 3}
Mean Test Score:  0.7061510903493284
Rank:  1
Parameters:  {'max_depth': 4}
Mean Test Score:  0.6629506835355637
Rank:  3
Parameters:  {'max_depth': 5}
Mean Test Score:  0.6509829541435367
Rank:  4
Parameters:  {'max_depth': 6}
Mean Test Score:  0.6264782139831313
Rank:  5
Parameters:  {'max_depth': 7}
Mean Test Score:  0.6256292228526943
Rank:  6
Parameters:  {'max_depth': 8}
Mean Test Score:  0.6053689472492119
Rank:  7

Training Score:  0.7414146167822651
Test score:  0.6577055073081235


In [11]:
## multiple hyper parameters
parameters = {'epsilon':[.05,.1,.2,.3],
             'C':[.2,.3]}
grid_search = GridSearchCV(SVR(kernel='linear'),parameters, cv=3,return_train_score=True)
grid_search.fit(x_train, y_train)
grid_search.best_params_

{'C': 0.3, 'epsilon': 0.05}

In [14]:
svr_model = SVR(kernel='linear',epsilon=grid_search.best_params_['epsilon'],C=grid_search.best_params_['C']).fit(x_train,y_train)
y_pred = svr_model.predict(x_test)
print()
print('Training Score: ',svr_model.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))


Training Score:  0.7112242850530315
Test score:  0.6635248547412111
