In [1]:
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

In [3]:
automobile_df = pd.read_csv('auto-mpg-processed.csv')

automobile_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,18.0,6,250.0,78,3574,21.0,44
1,19.0,6,232.0,100,2634,13.0,49
2,39.0,4,86.0,64,1875,16.4,39
3,29.0,4,68.0,49,1867,19.5,47
4,34.7,4,105.0,63,2215,14.9,39


In [4]:
X= automobile_df.drop(['mpg', 'age'], axis=1)

y= automobile_df['mpg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
parameters = {'alpha': [0.2, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]}

grid_search = GridSearchCV(Lasso(), param_grid=parameters, cv=3, return_train_score=True)
grid_search.fit(X_train, y_train)

grid_search.best_params_

{'alpha': 1.0}

In [6]:
for i in range(len(parameters['alpha'])):
    print 'Parameters: ', grid_search.cv_results_['params'][i]
    print 'Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i]
    print 'Rank: ', grid_search.cv_results_['rank_test_score'][i]

Parameters:  {'alpha': 0.2}
Mean Test Score:  0.7108691216839828
Rank:  7
Parameters:  {'alpha': 0.4}
Mean Test Score:  0.7119922384822591
Rank:  6
Parameters:  {'alpha': 0.6}
Mean Test Score:  0.7122773587785729
Rank:  5
Parameters:  {'alpha': 0.7}
Mean Test Score:  0.7123196013195684
Rank:  4
Parameters:  {'alpha': 0.8}
Mean Test Score:  0.7123615476148722
Rank:  3
Parameters:  {'alpha': 0.9}
Mean Test Score:  0.7124018006963451
Rank:  2
Parameters:  {'alpha': 1.0}
Mean Test Score:  0.7124409211912549
Rank:  1


In [7]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(X_train, y_train)

In [8]:
y_pred = lasso_model.predict(X_test)

In [9]:
print 'Training score: ', lasso_model.score(X_train, y_train)
print 'Testing score: ', r2_score(y_test, y_pred)

Training score:  0.7209633240239071
Testing score:  0.651502731384682


In [10]:
parameters = {'n_neighbors': [10, 12, 14, 18, 20, 25, 30, 35, 50 ]}

grid_search = GridSearchCV(KNeighborsRegressor(), param_grid=parameters, cv=3, return_train_score=True)
grid_search.fit(X_train, y_train)

grid_search.best_params_

{'n_neighbors': 25}

In [11]:
for i in range(len(parameters['n_neighbors'])):
    print 'Parameters: ', grid_search.cv_results_['params'][i]
    print 'Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i]
    print 'Rank: ', grid_search.cv_results_['rank_test_score'][i]

Parameters:  {'n_neighbors': 10}
Mean Test Score:  0.7210378137872826
Rank:  7
Parameters:  {'n_neighbors': 12}
Mean Test Score:  0.7187727349563451
Rank:  9
Parameters:  {'n_neighbors': 14}
Mean Test Score:  0.7194246151063237
Rank:  8
Parameters:  {'n_neighbors': 18}
Mean Test Score:  0.722070467521665
Rank:  6
Parameters:  {'n_neighbors': 20}
Mean Test Score:  0.7289955230909436
Rank:  4
Parameters:  {'n_neighbors': 25}
Mean Test Score:  0.7381336114364118
Rank:  1
Parameters:  {'n_neighbors': 30}
Mean Test Score:  0.7363030609356042
Rank:  2
Parameters:  {'n_neighbors': 35}
Mean Test Score:  0.7328513302763892
Rank:  3
Parameters:  {'n_neighbors': 50}
Mean Test Score:  0.7254223075041454
Rank:  5


In [12]:
kneighbors_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(X_train, y_train)

In [13]:
y_pred = kneighbors_model.predict(X_test)

print 'Training score: ', kneighbors_model.score(X_train, y_train)
print 'Testing score: ', r2_score(y_test, y_pred)

Training score:  0.7441213016817443
Testing score:  0.6449341612045287


In [14]:
parameters = {'max_depth': [1, 2, 3, 4, 5, 7, 8]}

grid_search = GridSearchCV(DecisionTreeRegressor(), param_grid=parameters, cv=3, return_train_score=True)
grid_search.fit(X_train, y_train)

grid_search.best_params_

{'max_depth': 4}

In [15]:
parameters = {'epsilon': [0.05, 0.1, 0.2, 0.3], 'C': [0.2, 0.3]}

grid_search = GridSearchCV(SVR(kernel='linear'), param_grid=parameters, cv=3, return_train_score=True)
grid_search.fit(X_train, y_train)

grid_search.best_params_

{'C': 0.2, 'epsilon': 0.2}

In [16]:
svr_model = SVR(kernel='linear', epsilon=grid_search.best_params_['epsilon'],
              C=grid_search.best_params_['C'] ).fit(X_train, y_train)

In [17]:
y_pred = svr_model.predict(X_test)

print 'Training score: ', svr_model.score(X_train, y_train)
print 'Testing score: ', r2_score(y_test, y_pred)

Training score:  0.714352934435531
Testing score:  0.6527742311322797
