In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = sns.load_dataset('mpg')

In [4]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [5]:
df.drop('name', axis=1, inplace=True)

In [6]:
df.isna().sum()

Unnamed: 0,0
mpg,0
cylinders,0
displacement,0
horsepower,6
weight,0
acceleration,0
model_year,0
origin,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 25.0+ KB


In [8]:
df.dtypes

Unnamed: 0,0
mpg,float64
cylinders,int64
displacement,float64
horsepower,float64
weight,int64
acceleration,float64
model_year,int64
origin,object


In [9]:
df.shape

(398, 8)

In [10]:
df['horsepower'].median()

93.5

In [11]:
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

In [12]:
df.isna().sum()

Unnamed: 0,0
mpg,0
cylinders,0
displacement,0
horsepower,0
weight,0
acceleration,0
model_year,0
origin,0


In [13]:
df.origin.unique()

array(['usa', 'japan', 'europe'], dtype=object)

In [14]:
df['origin'].value_counts()

Unnamed: 0_level_0,count
origin,Unnamed: 1_level_1
usa,249
japan,79
europe,70


In [15]:
df['origin'] = df['origin'].map({'usa':1, 'japan':2, 'europe':3})

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    int64  
dtypes: float64(4), int64(4)
memory usage: 25.0 KB


In [17]:
X = df.drop('mpg', axis=1)
y = df['mpg']

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [19]:
X_train.shape

(278, 7)

In [20]:
X_test.shape

(120, 7)

In [21]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

In [22]:
model.coef_

array([-0.31761423,  0.02623748, -0.01827076, -0.00748775,  0.05040673,
        0.84709514,  1.51909584])

In [23]:
for i, col_name in enumerate(X_train.columns):
  print(f' {col_name}: {model.coef_[i]}')

 cylinders: -0.31761423027992997
 displacement: 0.02623748259907894
 horsepower: -0.018270764913124644
 weight: -0.007487750398361904
 acceleration: 0.050406734619713886
 model_year: 0.8470951427061371
 origin: 1.5190958387975042


In [24]:
y_pred = model.predict(X_test)

In [25]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8348001123742284

In [26]:
from sklearn.linear_model import Ridge
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(X_train, y_train)

In [27]:
for i, col_name in enumerate(X_train.columns):
  print(f' {col_name}: {ridge_model.coef_[i]}')

 cylinders: -0.31700321010067906
 displacement: 0.02621324975798342
 horsepower: -0.018263252481449534
 weight: -0.00748732605021309
 acceleration: 0.050368969474425776
 model_year: 0.8470062938903167
 origin: 1.5174528285653937


In [28]:
y_ridge_pred = ridge_model.predict(X_test)
r2_score(y_test, y_ridge_pred)

0.8348084889168355

In [29]:
from sklearn.linear_model import Lasso
lasso_model = Lasso(alpha=0.5)
lasso_model.fit(X_train, y_train)

In [30]:
for i, col_name in enumerate(X_train.columns):
  print(f' {col_name}: {lasso_model.coef_[i]}')

 cylinders: -0.0
 displacement: 0.006208198888300358
 horsepower: -0.011058382987169565
 weight: -0.0069826731680230885
 acceleration: 0.0
 model_year: 0.744654952003819
 origin: 0.0


In [31]:
y_lasso_pred = lasso_model.predict(X_test)
r2_score(y_test, y_lasso_pred)

0.8277934716635555

In [32]:
from sklearn.linear_model import ElasticNet
elastic_model = ElasticNet(alpha=1, l1_ratio=0.5)
elastic_model.fit(X_train, y_train)

In [33]:
for i, col_name in enumerate(X_train.columns):
  print(f' {col_name}: {elastic_model.coef_[i]}')

 cylinders: -0.0
 displacement: 0.005888869953667563
 horsepower: -0.012403874933570126
 weight: -0.006934550516257631
 acceleration: 0.0
 model_year: 0.7133150744603874
 origin: 0.0


In [34]:
y_elastic_pred = elastic_model.predict(X_test)
r2_score(y_test, y_elastic_pred)

0.8284840073256804

In [35]:
from sklearn.linear_model import LassoCV
lassocv_model = LassoCV(cv=5, verbose=2)
lassocv_model.fit(X_train, y_train)

Path: 000 out of 100
Path: 001 out of 100
Path: 002 out of 100
Path: 003 out of 100
Path: 004 out of 100
Path: 005 out of 100
Path: 006 out of 100
Path: 007 out of 100
Path: 008 out of 100
Path: 009 out of 100
Path: 010 out of 100
Path: 011 out of 100
Path: 012 out of 100
Path: 013 out of 100
Path: 014 out of 100
Path: 015 out of 100
Path: 016 out of 100
Path: 017 out of 100
Path: 018 out of 100
Path: 019 out of 100
Path: 020 out of 100
Path: 021 out of 100
Path: 022 out of 100
Path: 023 out of 100
Path: 024 out of 100
Path: 025 out of 100
Path: 026 out of 100
Path: 027 out of 100
Path: 028 out of 100
Path: 029 out of 100
Path: 030 out of 100
Path: 031 out of 100
Path: 032 out of 100
Path: 033 out of 100
Path: 034 out of 100
Path: 035 out of 100
Path: 036 out of 100
Path: 037 out of 100
Path: 038 out of 100
Path: 039 out of 100
Path: 040 out of 100
Path: 041 out of 100
Path: 042 out of 100
Path: 043 out of 100
Path: 044 out of 100
Path: 045 out of 100
Path: 046 out of 100
Path: 047 out

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.3s finished


In [36]:
y_lassocv_pred = lassocv_model.predict(X_test)
r2_score(y_test, y_lassocv_pred)

0.8082805983844751

In [37]:
from sklearn.linear_model import RidgeCV
ridgecv_model = RidgeCV(cv=5)
ridgecv_model.fit(X_train, y_train)

In [38]:
y_ridgecv_pred = ridgecv_model.predict(X_test)
r2_score(y_test, y_ridgecv_pred)

0.8354145247502054

In [39]:
from sklearn.linear_model import ElasticNetCV
elasticcv_model = ElasticNetCV(cv=5)
elasticcv_model.fit(X_train, y_train)

In [40]:
y_elasticnetcv_pred = elasticcv_model.predict(X_test)
r2_score(y_test, y_elasticnetcv_pred)

0.792863401804916

In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import Lasso, Ridge

In [41]:
lasso = Lasso()

In [42]:
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100]}
param_grid

{'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

In [43]:
grid_search = GridSearchCV(estimator=lasso, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

In [44]:
grid_search.best_params_

{'alpha': 0.1}

In [45]:
grid_search.best_score_

np.float64(0.7964209726696481)

In [46]:
grid_search.best_estimator_

In [47]:
y_pred = grid_search.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8345318641232303

In [50]:
random_search_cv = RandomizedSearchCV(estimator=lasso, param_distributions=param_grid, n_iter=3, cv=5, scoring='r2', verbose=2)
random_search_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ........................................

In [51]:
random_search_cv.best_params_

{'alpha': 0.001}

In [52]:
random_search_cv.best_score_

np.float64(0.7940351886356238)

In [53]:
random_search_cv.best_estimator_

In [54]:
y_pred = random_search_cv.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8348101865598601

In [56]:
ridge = Ridge()
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100]}
ridge = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
ridge.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

In [57]:
ridge.best_params_

{'alpha': 10}

In [58]:
ridge.best_score_

np.float64(0.794358984633479)

In [59]:
ridge.best_estimator_

In [60]:
y_pred = ridge.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8354145247502054

In [61]:
ridge = Ridge()
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100]}
random_cv = RandomizedSearchCV(estimator=ridge, param_distributions=param_grid, n_iter=3, cv=5, scoring='r2', verbose=2)
random_cv.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ...........................................alpha=10; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ........................................

In [62]:
random_cv.best_params_
random_cv.best_score_
random_cv.best_estimator_

In [63]:
y_pred = random_cv.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8354145247502054

In [64]:
model = ElasticNet()
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100],
             'l1_ratio': [0.1,0.4, 0.9]}
model = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
model.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.9; total time=   0.0s
[CV] END ..........................alpha=0.001, 

In [65]:
model.best_params_
model.best_score_
model.best_estimator_

In [66]:
y_pred = model.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8345726087080976

In [67]:
model = ElasticNet()
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100],
             'l1_ratio': [0.1,0.4, 0.9]}
random_model = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=3, cv=5, scoring='r2', verbose=2)
random_model.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END .............................alpha=10, l1_ratio=0.4; total time=   0.0s
[CV] END .............................alpha=10, l1_ratio=0.4; total time=   0.0s
[CV] END .............................alpha=10, l1_ratio=0.4; total time=   0.0s
[CV] END .............................alpha=10, l1_ratio=0.4; total time=   0.0s
[CV] END .............................alpha=10, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l

In [68]:
random_model.best_params_
random_model.best_score_
random_model.best_estimator_

In [69]:
y_pred = random_model.best_estimator_.predict(X_test)
r2_score(y_test, y_pred)

0.8356976438132142