In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [2]:
automobile_df= pd.read_csv('mpg-preprocessed.csv')

In [3]:
automobile_df.sample(5)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
287,17.0,8,304.0,150,3672,11.5,48
177,44.3,4,90.0,48,2085,21.7,40
175,29.0,4,98.0,83,2219,16.5,46
236,23.9,4,119.0,97,2405,14.9,42
7,29.0,4,90.0,70,1937,14.0,45


In [4]:
X=automobile_df.drop(['mpg','age'],axis=1)
Y=automobile_df['mpg']

x_train,x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)

In [6]:
parameters = {'alpha':[0.2,0.4,0.6,0.8,1.0]}
grid_search = GridSearchCV(Lasso(), parameters,cv=3, return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_


{'alpha': 0.4}

In [9]:
for i in range(len(parameters['alpha'])):
    print("Mean test scores",grid_search.cv_results_['mean_test_score'][i])
    print("Parameters",grid_search.cv_results_['params'][i])
    print("Ranks",grid_search.cv_results_['rank_test_score'][i])
    

Mean test scores 0.7129084153654058
Parameters {'alpha': 0.2}
Ranks 5
Mean test scores 0.7131189527552246
Parameters {'alpha': 0.4}
Ranks 1
Mean test scores 0.7130887203184711
Parameters {'alpha': 0.6}
Ranks 2
Mean test scores 0.7130469055524523
Parameters {'alpha': 0.8}
Ranks 3
Mean test scores 0.7130037469818973
Parameters {'alpha': 1.0}
Ranks 4


In [10]:
lasso_model =Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train,y_train)

In [11]:
y_pred = lasso_model.predict(x_test)
print("Training Score", lasso_model.score(x_train,y_train))
print("testing score",r2_score(y_test,y_pred))

Training Score 0.7266946228706452
testing score 0.6144232544825474


In [12]:
parameters = {'n_neighbors':[10,12,14,16,18,20,25,30,35,50]}

grid_search = GridSearchCV(KNeighborsRegressor(), parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)
grid_search.best_params_

{'n_neighbors': 25}

In [15]:
for i in range(len(parameters['n_neighbors'])):
    print("Mean test scores",grid_search.cv_results_['mean_test_score'][i])
    print("Parameters",grid_search.cv_results_['params'][i])
    print("Ranks",grid_search.cv_results_['rank_test_score'][i])
    

Mean test scores 0.738073195228544
Parameters {'n_neighbors': 10}
Ranks 9
Mean test scores 0.739892413726845
Parameters {'n_neighbors': 12}
Ranks 7
Mean test scores 0.7398474807245892
Parameters {'n_neighbors': 14}
Ranks 8
Mean test scores 0.7400513297806673
Parameters {'n_neighbors': 16}
Ranks 6
Mean test scores 0.7449503417840564
Parameters {'n_neighbors': 18}
Ranks 2
Mean test scores 0.7432794972253793
Parameters {'n_neighbors': 20}
Ranks 4
Mean test scores 0.7465772354170696
Parameters {'n_neighbors': 25}
Ranks 1
Mean test scores 0.7444639870737993
Parameters {'n_neighbors': 30}
Ranks 3
Mean test scores 0.7412205232134076
Parameters {'n_neighbors': 35}
Ranks 5
Mean test scores 0.7287004510345815
Parameters {'n_neighbors': 50}
Ranks 10


In [16]:
kneighbos_model= KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(x_train,y_train)

In [17]:
y_pred= kneighbos_model.predict(x_test)

print("Training score", kneighbos_model.score(x_train,y_train))
print("testing score", r2_score(y_test,y_pred))

Training score 0.761808555932363
testing score 0.590540894809267


In [18]:
parameters={'max_depth':[1,2,3,4,5,6,7,8]}

grid_search=GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_

{'max_depth': 2}