In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
df = sns.load_dataset('mpg')

In [4]:
df.head()


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [5]:
df.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model_year      0
origin          0
name            0
dtype: int64

In [8]:
median_hp = df.horsepower.median()

In [9]:
df.horsepower.fillna(median_hp, inplace = True)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
 8   name          398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [12]:
df.origin.value_counts()

origin
usa       249
japan      79
europe     70
Name: count, dtype: int64

In [13]:
df['origin'] = df.origin.map({'usa':1, 'japan':2, 'europe':3})

In [23]:
x = df.drop('mpg', axis =1).drop('name', axis = 1)
y = df['mpg']

In [24]:
x

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,8,307.0,130.0,3504,12.0,70,1
1,8,350.0,165.0,3693,11.5,70,1
2,8,318.0,150.0,3436,11.0,70,1
3,8,304.0,150.0,3433,12.0,70,1
4,8,302.0,140.0,3449,10.5,70,1
...,...,...,...,...,...,...,...
393,4,140.0,86.0,2790,15.6,82,1
394,4,97.0,52.0,2130,24.6,82,3
395,4,135.0,84.0,2295,11.6,82,1
396,4,120.0,79.0,2625,18.6,82,1


In [25]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 42)

In [26]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)

In [28]:
for i,col in enumerate(x_train.columns):
  print(f"The coeffiecient for {col}: {model.coef_[i]}")

The coeffiecient for cylinders: -0.06625946740554023
The coeffiecient for displacement: 0.01611963874123594
The coeffiecient for horsepower: -0.007760521644907005
The coeffiecient for weight: -0.007295332409965163
The coeffiecient for acceleration: 0.07304816108258906
The coeffiecient for model_year: 0.8405100228809241
The coeffiecient for origin: 1.4770135843720182


In [29]:
from sklearn.metrics import r2_score
r2_score(y_test, y_predict)

0.8347876703359955

In [36]:
from sklearn.linear_model import Ridge
model = Ridge()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
for i, col in enumerate(x_train.columns):
  print(f'The coefficient for {col} : {model.coef_[i]}')
print(f' R square : {r2_score(y_test, y_predict)}')

The coefficient for cylinders : -0.0643567415488252
The coefficient for displacement : 0.015973625123631287
The coefficient for horsepower : -0.007683067651063425
The coefficient for weight : -0.0072923664111001025
The coefficient for acceleration : 0.07301027135248557
The coefficient for model_year : 0.8398988786491295
The coefficient for origin : 1.4636454389986078
 R square : 0.8348383125969713


In [37]:
from sklearn.linear_model import Lasso
model = Lasso()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
for i, col in enumerate(x_train.columns):
  print(f'The coefficient for {col} : {model.coef_[i]}')
print(f' R square : {r2_score(y_test, y_predict)}')

The coefficient for cylinders : 0.0
The coefficient for displacement : -0.0
The coefficient for horsepower : -0.005323587044268719
The coefficient for weight : -0.006658241104185039
The coefficient for acceleration : 0.0
The coefficient for model_year : 0.7041710018190526
The coefficient for origin : 0.0
 R square : 0.8304848927901434


In [38]:
from sklearn.linear_model import LassoCV
model = LassoCV()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
for i, col in enumerate(x_train.columns):
  print(f'The coefficient for {col} : {model.coef_[i]}')
print(f' R square : {r2_score(y_test, y_predict)}')

The coefficient for cylinders : -0.0
The coefficient for displacement : -0.004878452289380888
The coefficient for horsepower : -0.012537453254288638
The coefficient for weight : -0.006378748739974589
The coefficient for acceleration : 0.0
The coefficient for model_year : 0.2916786882824399
The coefficient for origin : 0.0
 R square : 0.8022673656257144


In [39]:
from sklearn.linear_model import ElasticNet
model = ElasticNet()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
for i, col in enumerate(x_train.columns):
  print(f'The coefficient for {col} : {model.coef_[i]}')
print(f' R square : {r2_score(y_test, y_predict)}')

The coefficient for cylinders : 0.0
The coefficient for displacement : 0.0
The coefficient for horsepower : -0.005885183413553732
The coefficient for weight : -0.006622815285527577
The coefficient for acceleration : 0.0
The coefficient for model_year : 0.7152879783832007
The coefficient for origin : 0.0
 R square : 0.829796830531699


In [40]:
from sklearn.linear_model import ElasticNetCV
model = ElasticNetCV()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
for i, col in enumerate(x_train.columns):
  print(f'The coefficient for {col} : {model.coef_[i]}')
print(f' R square : {r2_score(y_test, y_predict)}')

The coefficient for cylinders : -0.0
The coefficient for displacement : -0.0062300911859389865
The coefficient for horsepower : -0.01682584981290217
The coefficient for weight : -0.006199722159947016
The coefficient for acceleration : 0.0
The coefficient for model_year : 0.19022397684091
The coefficient for origin : 0.0
 R square : 0.7813405447325633
