# Regularization Review

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


from sklearn.datasets import load_breast_cancer, load_boston
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline


In [2]:
boston = load_boston()
X = boston.data
y = boston.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [4]:
lr = LinearRegression()
lr.fit(X_train, y_train)
print("Train RMSE: ", np.sqrt(mean_squared_error(lr.predict(X_train), y_train)))
print("Test RMSE:", np.sqrt(mean_squared_error(lr.predict(X_test), y_test)))

Train RMSE:  4.254954668230921
Test RMSE: 6.023416146614772




In [5]:
def model_test(model, X_train, X_test, y_train, y_test):
    lr = model
    X_train = StandardScaler().fit_transform(X_train)
    X_test = StandardScaler().fit_transform(X_test)
    lr.fit(X_train, y_train)
    print("Train r2", lr.score(X_train, y_train))
    print("Test r2", lr.score(X_test, y_test))
    print("Train RMSE: ", np.sqrt(mean_squared_error(lr.predict(X_train), y_train)))
    print("Test RMSE:", np.sqrt(mean_squared_error(lr.predict(X_test), y_test)))

In [6]:
model_test(LinearRegression(), X_train, X_test)

TypeError: model_test() missing 2 required positional arguments: 'y_train' and 'y_test'

In [7]:
model_test(Ridge(), X_train, X_test,   y_train, y_test)

Train r2 0.7818224053670757
Test r2 0.6062839111363423
Train RMSE:  4.255160545827858
Test RMSE: 5.909093700780388


In [8]:
model_test(Ridge(alpha=0.1), X_train, X_test, y_train, y_test)

Train r2 0.7818432926158021
Test r2 0.6063972763740455
Train RMSE:  4.2549568568923775
Test RMSE: 5.908242917615769


In [9]:
model_test(Ridge(alpha = 10), X_train, X_test, y_train, y_test)

Train r2 0.7805456511517963
Test r2 0.6041937050970777
Train RMSE:  4.267592778607969
Test RMSE: 5.924758381694348


In [10]:
from ipywidgets import interact

In [13]:
def slidin(alph):
    model_test(Ridge(alpha = alph), X_train, X_test, y_train, y_test)

In [14]:
interact(slidin, alph = (0.001, 100, .01))

Train r2 0.7705493363358535
Test r2 0.5933280825665161
Train RMSE:  4.363706523702593
Test RMSE: 6.005530642650671


<function __main__.slidin>

In [None]:
ridge_default = Ridge().fit(X_train, y_train).coef_
ridge_small = Ridge(alpha = 0.1).fit(X_train, y_train).coef_
ridge_large = Ridge(alpha = 100).fit(X_train, y_train).coef_

In [None]:
plt.figure(figsize = (12, 8))
plt.plot(ridge_default, 's', label = 'alpha = 1.0')
plt.plot(ridge_small, 'o', label = 'alpha = 0.1')
plt.plot(ridge_large, 'v', label = 'alpha = 100')
plt.axhline(color = 'black')
plt.legend()

In [None]:
model_test(Lasso())

In [None]:
model_test(ElasticNet())

### Problem

Examine the plots of coefficients for three different values of alpha.  Compare and contrast what we see with the four models (`LinearRegression`, `Ridge`, `Lasso`, `ElasticNet`) in terms of the values of coefficients across alpha values, and the values from model to model.

If you have time, find the ideal value for our regularization parameter using the `GridSearchCV`.

### Classification and Regularization

Here, we are focused on `LogisticRegression`, and the use of different regularization parameters and values for these parameters.  The aim is to draw connections between what happens with Regularization across our least squares strategies.

In [None]:
clf = LogisticRegression()

In [None]:
clf

In [None]:
cancer = load_breast_cancer()

In [None]:
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
clf.fit(X_train, y_train)

In [None]:
plt.plot(clf.coef_.T, 'o', label = 'Logistic Default')
plt.axhline(color = 'black')

In [None]:
clf_small = LogisticRegression(C = 0.1)
clf_small.fit(X_train, y_train)

In [None]:
plt.plot(clf.coef_.T, 'o', label = 'Logistic Default')
plt.plot(clf_small.coef_.T, '^', label = 'C = 0.1')
plt.axhline(color = 'black')
plt.legend()

In [None]:
clf_big = LogisticRegression(C = 100)
clf_big.fit(X_train, y_train)
plt.plot(clf.coef_.T, 'o', label = 'Logistic Default')
plt.plot(clf_small.coef_.T, '^', label = 'C = 0.1')
plt.plot(clf_big.coef_.T, 'x', label = 'C = 100')
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation = 90)
plt.axhline(color = 'black')
plt.legend()

### Applying L1 Regularization

In [None]:
clf_l1 = LogisticRegression(C = 1.0, penalty='l1')
clf_l1_small = LogisticRegression(C = 0.1, penalty='l1')
clf_l1_large = LogisticRegression(C = 100, penalty='l1')

In [None]:
clf_l1.fit(X_train, y_train)
clf_l1_small.fit(X_train, y_train)
clf_l1_large.fit(X_train, y_train)

plt.plot(clf_l1.coef_.T, 'o', alpha = 0.8, label = 'Default')
plt.plot(clf_l1_small.coef_.T, '^', alpha = 0.6, label = 'C = 0.1')
plt.plot(clf_l1_large.coef_.T, 'v', alpha = 0.3, label = 'C = 100')
plt.axhline(color = 'black')
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation = 90)
plt.legend()

### Takeaways

- We use regularization in both classification and regression
- Depending on the task, we want either `L1` or `L2` penalties
- We can grid search to find ideal `C`

In [None]:
from sklearn.linear_model import LogisticRegressionCV

In [None]:
clf = LogisticRegressionCV()

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.C_