In [107]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [108]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [109]:
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [110]:
from sklearn.model_selection import GridSearchCV

In [111]:
import warnings
warnings.filterwarnings("ignore")

In [112]:
automobile_df = pd.read_csv('automobile_df_processed.csv')
automobile_df.sample(5)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
272,30.0,4,135.0,84.0,2385,12.9,41
143,44.0,4,97.0,52.0,2130,24.6,40
313,27.0,4,151.0,90.0,2950,17.3,40
244,18.0,6,258.0,110.0,2962,13.5,51
32,25.4,6,168.0,116.0,2900,12.6,41


In [113]:
X = automobile_df.drop(['mpg', 'age'], axis=1)
y = automobile_df['mpg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [114]:
parameters = {
    'alpha': [.2, .4, .6, .7, .8, .9, 1]
}

In [115]:
grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True).fit(X_train, y_train)
grid_search.best_params_

{'alpha': 0.8}

In [116]:
for i in range(len(parameters['alpha'])):
    print(f'alpha: {parameters["alpha"][i]}, score: {grid_search.cv_results_["mean_test_score"][i]}')

alpha: 0.2, score: 0.6829000069154191
alpha: 0.4, score: 0.6834662937462133
alpha: 0.6, score: 0.6838165435402589
alpha: 0.7, score: 0.6838497799406204
alpha: 0.8, score: 0.6838720858428512
alpha: 0.9, score: 0.6838548592900872
alpha: 1, score: 0.6838386465386757


In [117]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(X_train, y_train)

In [118]:
y_pred = lasso_model.predict(X_test)
print('Training R2 score:', r2_score(y_train, lasso_model.predict(X_train)))
print('Testing R2 score:', r2_score(y_test, y_pred))

Training R2 score: 0.6905437793421874
Testing R2 score: 0.7660768817339573


In [119]:
parameters = {'n_neighbors': [10, 12, 14, 18, 20, 25, 30, 35, 50]}

In [121]:
grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv=3, return_train_score=True).fit(X_train, y_train)
grid_search.best_params_

{'n_neighbors': 30}

In [123]:
for i in range(len(parameters['n_neighbors'])):
    print('Parameters: ', grid_search.cv_results_['params'][i])
    print('Mean test score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'n_neighbors': 10}
Mean test score:  0.6652505648081778
Rank:  9
Parameters:  {'n_neighbors': 12}
Mean test score:  0.6709897072639865
Rank:  8
Parameters:  {'n_neighbors': 14}
Mean test score:  0.6726067458073227
Rank:  6
Parameters:  {'n_neighbors': 18}
Mean test score:  0.6713845069125935
Rank:  7
Parameters:  {'n_neighbors': 20}
Mean test score:  0.6765679421887864
Rank:  5
Parameters:  {'n_neighbors': 25}
Mean test score:  0.6848705213372028
Rank:  2
Parameters:  {'n_neighbors': 30}
Mean test score:  0.6869995027175769
Rank:  1
Parameters:  {'n_neighbors': 35}
Mean test score:  0.682831490532713
Rank:  4
Parameters:  {'n_neighbors': 50}
Mean test score:  0.6842947155589099
Rank:  3


In [124]:
knn_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(X_train, y_train)

In [125]:
y_pred = knn_model.predict(X_test)
print('Training R2 score:', r2_score(y_train, knn_model.predict(X_train)))
print('Testing R2 score:', r2_score(y_test, y_pred))

Training R2 score: 0.7162648690947989
Testing R2 score: 0.7530339998236419


In [126]:
parameters = { 'max_depth': [1,2,3,4,5,7,8]}

In [127]:
grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True).fit(X_train, y_train)
grid_search.best_params_

{'max_depth': 3}

In [128]:
for i in range(len(parameters['max_depth'])):
    print('Parameters: ', grid_search.cv_results_['params'][i])
    print('Mean test score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'max_depth': 1}
Mean test score:  0.5269243336839583
Rank:  5
Parameters:  {'max_depth': 2}
Mean test score:  0.6224277039596774
Rank:  2
Parameters:  {'max_depth': 3}
Mean test score:  0.6558189884948061
Rank:  1
Parameters:  {'max_depth': 4}
Mean test score:  0.6079748325658512
Rank:  3
Parameters:  {'max_depth': 5}
Mean test score:  0.5505079970295627
Rank:  4
Parameters:  {'max_depth': 7}
Mean test score:  0.4921936066313095
Rank:  7
Parameters:  {'max_depth': 8}
Mean test score:  0.5016115524970909
Rank:  6


In [129]:
tree_model = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth']).fit(X_train, y_train)

In [130]:
y_pred = tree_model.predict(X_test)
print('Training R2 score:', r2_score(y_train, tree_model.predict(X_train)))
print('Testing R2 score:', r2_score(y_test, y_pred))

Training R2 score: 0.7620599227703615
Testing R2 score: 0.6987932524162004


In [131]:
parameters = {'epsilon': [.05, .1, .2, .3], 'C': [.2,.3]}

In [132]:
grid_search = GridSearchCV(SVR(kernel='linear'), parameters, cv=3, return_train_score=True).fit(X_train, y_train)
grid_search.best_params_

{'C': 0.3, 'epsilon': 0.2}

In [133]:
svr_model = SVR(epsilon=grid_search.best_params_['epsilon'], C=grid_search.best_params_['C']).fit(X_train, y_train)

In [134]:
y_pred = svr_model.predict(X_test)
print('Training R2 score:', r2_score(y_train, svr_model.predict(X_train)))
print('Testing R2 score:', r2_score(y_test, y_pred))

Training R2 score: 0.6070896059705699
Testing R2 score: 0.6747628025135349
