# Regularization

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression, Ridge, Lasso


In [2]:
n = 100
c = 0.0001
x1 = np.random.normal(size=n)
x2 = x1 + c*np.random.normal(size=n)
x3 = x1 - x2 + c*np.random.normal(size=n)
y = x1 + x2 + x3 + np.random.normal(size=n)
X = np.stack((x1, x2, x3), axis=1)
X[:10]

array([[ 1.47597487e+00,  1.47583292e+00,  3.08605289e-04],
       [ 5.13371654e-01,  5.13417694e-01, -1.05769767e-04],
       [ 2.03359027e+00,  2.03365616e+00,  4.78443143e-05],
       [-1.63762773e+00, -1.63768379e+00,  1.05458976e-04],
       [-3.28285749e-02, -3.28609426e-02, -6.56605996e-05],
       [ 9.06992771e-01,  9.06907433e-01,  1.43731856e-04],
       [ 2.47325267e-02,  2.48189857e-02, -5.35487572e-05],
       [-3.87827695e-01, -3.87800718e-01, -6.81087220e-05],
       [ 1.67236005e+00,  1.67221978e+00,  3.40812437e-04],
       [-1.51837291e+00, -1.51854514e+00,  3.44673980e-04]])

In [3]:
m = LinearRegression().fit(X, y)
print("lm coefficients:", m.coef_)

mr = Ridge(alpha=0.1).fit(X, y)
print("ridge coefficients:", mr.coef_)

ml = Lasso(alpha=0.1).fit(X, y)
print("lasso coefficients", ml.coef_)

lm coefficients: [-1051.36786948  1053.31410508   714.68363118]
ridge coefficients: [0.96588883 0.96997506 0.00391336]
lasso coefficients [1.81342087e+00 6.28481510e-06 0.00000000e+00]


## Create testing data

In [4]:
nt = 1000
c = 0.0005
x1t = np.random.normal(size=nt)
x2t = x1t + c*np.random.normal(size=nt)
x3t = x1t - x2t + c*np.random.normal(size=nt)
yt = x1t + x2t + x3t + np.random.normal(size=nt)
Xt = np.stack((x1t, x2t, x3t), axis=1)


## ... and test

In [5]:
yhat = m.predict(Xt)
rmse = np.sqrt(np.mean(yt - yhat)**2)
print("lm:", rmse)
yhatr = mr.predict(Xt)
rmser = np.sqrt(np.mean(yt - yhatr)**2)
print("ridge:", rmser)
yhatl = ml.predict(Xt)
rmsel = np.sqrt(np.mean(yt - yhatl)**2)
print("lasso:", rmsel)


lm: 0.10634276651737389
ridge: 0.11786047327421235
lasso: 0.14160468264857531


## Exercise:

1. compute the rmse-s at different lambda values.  Use a wide range of lambdas, say b/w 1e-6 to 1e6.

2. make a plot where you show how rmse depends on $\lambda$.  Note: use log scale for lambda

3. make another plot where you show how coefficients depend on $\lambda$

4. currently our test data remains very similar to training data.
Extend the test data a little bit by increasing the factor 0.0001 to,
say, 0.001.  See what happens with RMSE-s and all that.