In [17]:
#%matplotlib.inline
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [18]:
mpg_df = pd.read_csv('mpg.csv')
mpg_df =mpg_df.drop('name',axis=1)
mpg_df['origin'] =mpg_df['origin'].replace({1:'america',2:'europe',3:'asia'})
mpg_df =pd.get_dummies(mpg_df,columns=['origin'])
mpg_df = mpg_df.replace('?',np.nan)
mpg_df = mpg_df.apply(lambda x:x.fillna(x.median()),axis=0)

In [19]:
x = mpg_df.drop('mpg',axis=1)
y = mpg_df[['mpg']]

In [21]:
from sklearn import preprocessing
x_scaled =preprocessing.scale(x)
x_scaled = pd.DataFrame(x_scaled,columns=x.columns)

y_scaled =preprocessing.scale(y)
y_scaled = pd.DataFrame(y_scaled,columns=y.columns)

In [23]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train,y_test =train_test_split(x_scaled,y_scaled,test_size=0.3,random_state=1)

In [26]:
regression_model = LinearRegression()
regression_model.fit(x_train,y_train)
for idx, col_name in enumerate(x_train.columns):
  print(col_name, regression_model.coef_[0][idx])

cylinders -0.08592264254448635
displacement 0.38615017668954277
horsepower -0.10637514644618914
weight -0.7965737428612103
acceleration 0.021846813318919522
model_year 0.39594105310149513
origin_america -0.0939989664489348
origin_asia 0.04491789013805135
origin_europe 0.07243059852959359


In [27]:
intercept = regression_model.intercept_[0]
print(intercept)

0.015510225561902374


In [30]:
#Ridge model
ridge = Ridge(alpha=.3)
ridge.fit(x_train,y_train)
print(ridge.coef_)

[[-0.0800581   0.36661042 -0.10890119 -0.78324655  0.01917898  0.39442138
  -0.0930884   0.04466769  0.07153523]]


In [31]:
#Lasso Model
lasso = Lasso(alpha=0.1)
lasso.fit(x_train,y_train)
print(lasso.coef_)

[-0.         -0.         -0.01464723 -0.60711757  0.          0.29460087
 -0.04017427  0.          0.        ]


In [32]:
print(regression_model.score(x_train,y_train))
print(regression_model.score(x_test,y_test))

0.8141025501610559
0.8433135132808832


In [33]:
print(ridge.score(x_train,y_train))
print(ridge.score(x_test,y_test))

0.8140828080856514
0.8437999817350272


In [34]:
print(lasso.score(x_train,y_train))
print(lasso.score(x_test,y_test))

0.7878910251573478
0.8315130533007058


In [35]:
from sklearn.preprocessing import PolynomialFeatures

In [36]:
poly = PolynomialFeatures(degree = 2, interaction_only=True)


In [46]:
x_poly = poly.fit_transform(x_scaled)
x_train,x_test,y_train,y_test=train_test_split(x_poly,y,test_size=0.3,random_state=1)
x_train.shape

(278, 46)

In [47]:
regression_model.fit(x_train,y_train)
print(regression_model.coef_[0])

[-3.68062164e-13 -3.38019048e-01 -2.92838331e-01 -1.99370468e+00
 -5.23089321e+00 -6.29375520e-01  3.06573890e+00 -2.57399194e+11
 -9.99763270e+12  9.84532305e+12 -1.81912981e+00  1.23627615e-01
  2.47387724e+00  1.53906214e+00 -1.71266427e+00  1.25282436e+13
  1.03253853e+13  9.85560577e+12 -9.15902281e-02  1.47734652e+00
 -1.43820708e+00  3.10218238e+00  2.56068679e+12  2.11043772e+12
  2.01441801e+12 -4.04533009e-01 -1.09179064e-01 -1.69184608e+00
  8.46778616e+11  6.97888370e+11  6.66136170e+11 -7.53385421e-02
  2.11284158e-01 -4.65660364e+12 -3.83782664e+12 -3.66321499e+12
  6.44392467e-01 -1.21902966e+12 -1.00468601e+12 -9.58975270e+11
 -4.09040298e+11 -3.37118182e+11 -3.21780136e+11  2.81853460e+11
  1.56536429e+13 -8.81941246e+12]


In [48]:
ridge = Ridge(alpha=.3)
ridge.fit(x_train,y_train)
print(ridge.coef_)

[[ 0.          0.06740642 -0.61900803 -1.97236759 -5.15141317 -0.62282102
   3.04381568  0.1723188   0.15891088 -0.38553368 -1.4895438   0.02925116
   1.72762625  1.4201127  -1.38679985 -0.05712906  1.13569653 -1.11720961
   0.30089657  1.53987731 -0.84218996  2.38658282  0.21457492  0.50684735
  -0.80377008 -0.47592772 -0.30069342 -1.50318104 -0.61710306  0.43440955
   0.32933252 -0.14480549  0.25597746  0.47585604 -0.93880283  0.37865359
   0.4784103  -0.67039722  0.22915648  0.61211567 -0.49887694  0.4033774
   0.21155723 -0.29118156  0.41878367  0.1184465 ]]


In [50]:
print(ridge.score(x_train,y_train))
print(ridge.score(x_test,y_test))

0.9025975935207238
0.8673792928418453


In [53]:
lasso=Lasso(alpha=0.01)
lasso.fit(x_train,y_train)
print(lasso.coef_)

[ 0.         -0.         -0.08692269 -1.94971176 -5.29180738 -0.47273225
  2.98385949 -0.          0.         -0.         -0.79527606 -0.05882027
  1.1561088   1.08164446 -0.9493974  -0.          1.23352833 -0.87530628
 -0.          1.38994992 -0.4329787   1.86995071 -0.          0.
 -0.         -0.         -0.24115467 -1.24997101 -0.6285489   0.
  0.         -0.16019703  0.          0.26564551 -0.61312578 -0.
  0.47635543 -0.89394     0.          0.3945189  -0.68253648  0.12935612
  0.         -0.23102258  0.38217499  0.        ]


In [54]:
print(lasso.score(x_train,y_train))
print(lasso.score(x_test,y_test))

0.9013410674767774
0.8704180363604552
