In [6]:
# importing python libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# importing model, metrecis and preprocessing libs
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

# importing linear model for training data
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

# importing GridSearchCV
from sklearn.model_selection import GridSearchCV

# importing warning
import warnings
warnings.filterwarnings('ignore')

In [8]:
# loading automobile processed data
automobile_df = pd.read_csv('datasets/auto-mgp-processed.csv')
automobile_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,29.0,4,98.0,83,2219,16.5,45
1,30.0,4,88.0,76,2065,14.5,48
2,12.0,8,455.0,225,4951,11.0,46
3,20.0,4,130.0,102,3150,15.7,43
4,19.0,6,232.0,100,2634,13.0,48


In [10]:
# setting up data
X = automobile_df.drop(['mpg', 'age'], axis=1)
Y = automobile_df['mpg']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [11]:
# using alpha as hyperparameters for Lasso model
parameters = {'alpha' : [0.2, 0.4, 0.6, 0.8, 1.0]}

grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'alpha': 1.0}

In [12]:
# finding out best alpha value for lasso model
for i in range(len(parameters['alpha'])):
    print('Parameters :', grid_search.cv_results_['params'][i])
    print('Mean Test Score :', grid_search.cv_results_['mean_test_score'][i])
    print('Rank Test Score:', grid_search.cv_results_['rank_test_score'][i])

Parameters : {'alpha': 0.2}
Mean Test Score : 0.6732741810871253
Rank Test Score: 5
Parameters : {'alpha': 0.4}
Mean Test Score : 0.6746188276918939
Rank Test Score: 4
Parameters : {'alpha': 0.6}
Mean Test Score : 0.6753424519029674
Rank Test Score: 3
Parameters : {'alpha': 0.8}
Mean Test Score : 0.6758361451742232
Rank Test Score: 2
Parameters : {'alpha': 1.0}
Mean Test Score : 0.6759183525863396
Rank Test Score: 1


In [13]:
# creating lasso model
lasso_model = Lasso(alpha=grid_search.best_params_['alpha'])
lasso_model.fit(x_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [14]:
# predicting y using x_test
y_pred = lasso_model.predict(x_test)
print('Training Score :', lasso_model.score(x_train, y_train))
print('Test Score: ', r2_score(y_test, y_pred))

Training Score : 0.6890114894290853
Test Score:  0.772307344733106


In [19]:
# using hyperparameter tunning for KNeighborsRegressor
parameters = {'n_neighbors': [10, 12, 14, 18, 20, 25, 30, 35, 50]}

grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'n_neighbors': 25}

In [20]:
# finding out best n_neighbors value for this model
for i in range(len(parameters['n_neighbors'])):
    print('Parameters :', grid_search.cv_results_['params'][i])
    print('Mean Test Score :', grid_search.cv_results_['mean_test_score'][i])
    print('Rank Test Score:', grid_search.cv_results_['rank_test_score'][i])

Parameters : {'n_neighbors': 10}
Mean Test Score : 0.6842393573776687
Rank Test Score: 5
Parameters : {'n_neighbors': 12}
Mean Test Score : 0.6807448647865456
Rank Test Score: 8
Parameters : {'n_neighbors': 14}
Mean Test Score : 0.671961361052906
Rank Test Score: 9
Parameters : {'n_neighbors': 18}
Mean Test Score : 0.681241922718569
Rank Test Score: 7
Parameters : {'n_neighbors': 20}
Mean Test Score : 0.6851741635670614
Rank Test Score: 4
Parameters : {'n_neighbors': 25}
Mean Test Score : 0.6923198378783147
Rank Test Score: 1
Parameters : {'n_neighbors': 30}
Mean Test Score : 0.6902476215584789
Rank Test Score: 2
Parameters : {'n_neighbors': 35}
Mean Test Score : 0.6883173657885497
Rank Test Score: 3
Parameters : {'n_neighbors': 50}
Mean Test Score : 0.6812556024982587
Rank Test Score: 6


In [23]:
# creating KNeighbors model
kneighbors_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors'])
kneighbors_model.fit(x_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=25, p=2,
          weights='uniform')

In [24]:
# predicting y using x_test
y_pred = kneighbors_model.predict(x_test)
print('Training Score :', kneighbors_model.score(x_train, y_train))
print('Test Score: ', r2_score(y_test, y_pred))

Training Score : 0.715467493979026
Test Score:  0.7566864822386881


In [25]:
# using hyperparameter tunning for DecisionTree
parameters = {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8]}

grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 2}

In [26]:
# finding out best max depth value for this model
for i in range(len(parameters['max_depth'])):
    print('Parameters :', grid_search.cv_results_['params'][i])
    print('Mean Test Score :', grid_search.cv_results_['mean_test_score'][i])
    print('Rank Test Score:', grid_search.cv_results_['rank_test_score'][i])

Parameters : {'max_depth': 1}
Mean Test Score : 0.523863303455837
Rank Test Score: 8
Parameters : {'max_depth': 2}
Mean Test Score : 0.674120549832129
Rank Test Score: 1
Parameters : {'max_depth': 3}
Mean Test Score : 0.6736212910253409
Rank Test Score: 2
Parameters : {'max_depth': 4}
Mean Test Score : 0.6525172921683343
Rank Test Score: 3
Parameters : {'max_depth': 5}
Mean Test Score : 0.6301241051580287
Rank Test Score: 4
Parameters : {'max_depth': 6}
Mean Test Score : 0.583614697834583
Rank Test Score: 5
Parameters : {'max_depth': 7}
Mean Test Score : 0.5609137907931278
Rank Test Score: 7
Parameters : {'max_depth': 8}
Mean Test Score : 0.5705077786215786
Rank Test Score: 6


In [27]:
# creating DecisonTreeRegressor model
decisiontree_model = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth'])
decisiontree_model.fit(x_train, y_train)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [28]:
# predicting y using x_test
y_pred = decisiontree_model.predict(x_test)
print('Training Score :', decisiontree_model.score(x_train, y_train))
print('Test Score: ', r2_score(y_test, y_pred))

Training Score : 0.7242240415033101
Test Score:  0.7482514861279168


In [29]:
# using multiple parameters
# using hyperparameter tunning for SVR
parameters = {'epsilon' : [0.05, 0.1, 0.2, 0.3], 'C' : [0.2, 0.3]}

grid_search = GridSearchCV(SVR(kernel='linear'), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 0.3, 'epsilon': 0.3}

In [30]:
# creating SVR model
svr_model = SVR(kernel='linear', epsilon=grid_search.best_params_['epsilon'], C=grid_search.best_params_['C'])
svr_model.fit(x_train, y_train)

SVR(C=0.3, cache_size=200, coef0=0.0, degree=3, epsilon=0.3,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [31]:
# predicting y using x_test
y_pred = svr_model.predict(x_test)
print('Training Score :', svr_model.score(x_train, y_train))
print('Test Score: ', r2_score(y_test, y_pred))

Training Score : 0.6738472921689758
Test Score:  0.7618596878455268
