# Regularized linear models

- Ridge regression
- LASSO
- ElasticNet

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("../Data/Advertising.csv", index_col=0)
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

X, y = df.drop("Sales", axis = "columns"), df["Sales"]

# feaured engineering
model_polynomial = PolynomialFeatures(degree = 3, include_bias=False)
polynomial_features = model_polynomial.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=.33, random_state=42)

# 19 features
X_train.shape

(134, 19)

## Feature standardization

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

scaled_X_train.mean(), scaled_X_train.std(), scaled_X_test.mean(), scaled_X_test.std()

(-3.34898382919136e-17, 1.0, -0.11982457640326809, 1.1245966534380971)

## Regularizations

### Ridge regression (Tikhonov regularization) / L2-regularization

In [11]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

def ridge_regression(X_test, penalty = 0):
    # alpha in Ridge is same as lambda in theory
    model_ridge = Ridge(alpha = penalty)
    model_ridge.fit(scaled_X_train, y_train)
    y_pred = model_ridge.predict(X_test)
    return y_pred

# ridge regression with penalty 0
# same as normal LinearRegression (polynomial in this case)
y_pred = ridge_regression(scaled_X_test, penalty = 0)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

MAE, MSE, RMSE

(0.37485164412180333, 0.2650465950553843, 0.5148267621786812)

In [12]:
from sklearn.linear_model import LinearRegression

# Polynomial regression
model_linear = LinearRegression()
model_linear.fit(scaled_X_train, y_train)
y_pred = model_linear.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

MAE, MSE, RMSE

(0.3748516441217811, 0.26504659505536016, 0.5148267621786576)

In [14]:
y_pred = ridge_regression(scaled_X_test, penalty = 0.5)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

MAE, MSE, RMSE

(0.5392524917636479, 0.4487478689277259, 0.6698864597286065)

## LASSO - L1 regularization

In [22]:
from sklearn.linear_model import LassoCV

# cv is k, k-fold
# n_alphas (number of alphas to test?)
model_lassoCV = LassoCV(n_alphas = 200, cv=5, max_iter=2000)
model_lassoCV.fit(scaled_X_train, y_train)

In [23]:
# penalty found through 5-fold cross validation (200 reps?)
model_lassoCV.alpha_

0.004968802520343366

In [28]:
# many removed
model_lassoCV.coef_

array([ 5.19612354,  0.43037087,  0.29876351, -4.80417579,  3.46665205,
       -0.40507212,  0.        ,  0.        ,  0.        ,  1.35260206,
       -0.        ,  0.        ,  0.14879719, -0.        ,  0.        ,
        0.        ,  0.09649665,  0.        ,  0.04353956])

In [29]:
y_pred = model_lassoCV.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

MAE, MSE, RMSE

(0.46291883026932984, 0.33467924600222104, 0.5785146895301977)

## Elastic Net


In [30]:
from sklearn.linear_model import ElasticNetCV

model_elastic = ElasticNetCV(l1_ratio = [.1,.5,.8,.7,.9,.95,1])
model_elastic.fit(scaled_X_train, y_train)

  model = cd_fast.enet_coordinate_descent(


In [31]:
# thinks pure lasso is best
model_elastic.l1_ratio_

1.0

In [32]:
# same exact alpha as lasso
model_elastic.alpha_

0.004968802520343366