# Ridge and Lasso in Modelling

In [1]:
import pandas as pd
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures, StandardScaler

url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv"
data = pd.read_csv(url)

In [2]:
df = data.dropna()

# Display sample
print(df.head())

    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0    3504          12.0   
1  15.0          8         350.0       165.0    3693          11.5   
2  18.0          8         318.0       150.0    3436          11.0   
3  16.0          8         304.0       150.0    3433          12.0   
4  17.0          8         302.0       140.0    3449          10.5   

   model_year origin                       name  
0          70    usa  chevrolet chevelle malibu  
1          70    usa          buick skylark 320  
2          70    usa         plymouth satellite  
3          70    usa              amc rebel sst  
4          70    usa                ford torino  


In [3]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [5]:
df.head(2)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320


In [None]:
# split the data
y = df.mpg
X = df.drop(['mpg', 'origin', 'name'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=12)

y.head()

0    18.0
1    15.0
2    18.0
3    16.0
4    17.0
Name: mpg, dtype: float64

In [12]:
# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### further read: [click this link](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)

In [15]:
# build ridge, lasso and reular linear regresson model
# In scikit, the regularization parameter is denoted by alpha
ridge = Ridge(alpha = 0.5)
ridge.fit(X_train_scaled, y_train)

lasso = Lasso(alpha=0.5)
lasso.fit(X_train_scaled, y_train)

lin = LinearRegression()
lin.fit(X_train_scaled, y_train)

LinearRegression()

In [16]:
# generate prediction for training and test sets
ridge_train = ridge.predict(X_train_scaled)
ridge_test = ridge.predict(X_test_scaled)

lasso_train = lasso.predict(X_train_scaled)
lasso_test = lasso.predict(X_test_scaled)

lin_train = lin.predict(X_train_scaled)
lin_test = lin.predict(X_test_scaled)


In [27]:
# print the mean_swuared-error for train and test
print('Train error ridge:', mean_squared_error(y_train, ridge_train))
print('Test error ridge:', mean_squared_error(y_test, ridge_test))
print('\n')

print('Train error lasso:', mean_squared_error(y_train, lasso_train))
print('Test error lasso:', mean_squared_error(y_test, lasso_test))
print('\n')

print('Train error lin:', mean_squared_error(y_train, lin_train))
print('Test error lin:', mean_squared_error(y_test, lin_test))
# print('\n')

Train error ridge: 9.701656986420407
Test error ridge: 16.809526746774726


Train error lasso: 10.099243250316597
Test error lasso: 18.08917571422418


Train error lin: 9.700888480581273
Test error lin: 16.748025313964703


In [33]:
# how including ridge and lasso chnaged our paraeter estimates
print('ridge parameter coeff:', ridge.coef_, end = '\n\n')
print('lasso parameter coeff:', lasso.coef_, end = '\n\n')
print('lin parameter coeff:', lin.coef_)

ridge parameter coeff: [-0.46908945 -0.33378111 -0.05577065 -4.67661903 -0.08134899  2.57841336]

lasso parameter coeff: [-0.24684784 -0.42491716 -0.         -4.41934871  0.          2.19722681]

lin parameter coeff: [-0.45679158 -0.28124899 -0.01817741 -4.76568911 -0.06107906  2.59068562]
