# Regularization

In [313]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression, Ridge, Lasso


In [337]:
n = 100
c = 0.0001
x1 = np.random.normal(size=n)
x2 = x1 + c*np.random.normal(size=n)
x3 = x1 - x2 + c*np.random.normal(size=n)
y = x1 + x2 + x3 + np.random.normal(size=n)
X = np.stack((x1, x2, x3), axis=1)
X[:10]

array([[-4.16286224e-01, -4.16340719e-01, -6.50011153e-05],
       [-5.30187661e-01, -5.30153196e-01, -1.16857626e-04],
       [ 6.13004963e-02,  6.12477520e-02,  6.88680500e-05],
       [-5.28504002e-01, -5.28394888e-01, -2.99538653e-04],
       [ 1.91947236e-01,  1.91835080e-01,  3.64553110e-05],
       [ 5.00806048e-01,  5.00807912e-01, -5.40924540e-05],
       [ 1.58872600e-01,  1.58917397e-01, -1.36869545e-04],
       [-2.90783886e-01, -2.90674753e-01, -4.68458466e-05],
       [ 3.03648712e-01,  3.03656885e-01,  8.58845801e-05],
       [ 4.41145970e-01,  4.41225220e-01, -6.66298571e-05]])

In [338]:
m = LinearRegression().fit(X, y)
print("lm coefficients:", m.coef_)

mr = Ridge(alpha=0.1).fit(X, y)
print("ridge coefficients:", mr.coef_)

ml = Lasso(alpha=0.1).fit(X, y)
print("lasso coefficients", ml.coef_)

lm coefficients: [ 1902.48897469 -1900.41108156 -1710.66134047]
ridge coefficients: [ 1.0517812   1.05063402 -0.01141951]
lasso coefficients [ 1.97760879  0.         -0.        ]


## Create testing data

In [351]:
nt = 1000
c = 0.0005
x1t = np.random.normal(size=nt)
x2t = x1t + c*np.random.normal(size=nt)
x3t = x1t - x2t + c*np.random.normal(size=nt)
yt = x1t + x2t + x3t + np.random.normal(size=nt)
Xt = np.stack((x1t, x2t, x3t), axis=1)


## ... and test

In [353]:
yhat = m.predict(Xt)
rmse = np.sqrt(np.mean(yt - yhat)**2)
print("lm:", rmse)
yhatr = mr.predict(Xt)
rmser = np.sqrt(np.mean(yt - yhatr)**2)
print("ridge:", rmser)
yhatl = ml.predict(Xt)
rmsel = np.sqrt(np.mean(yt - yhatl)**2)
print("lasso:", rmsel)


lm: 0.030556055797348613
ridge: 0.03572384785412423
lasso: 0.042416706692368494


## Exercise:

1. compute the rmse-s at different lambda values.  Use a wide range of lambdas, say b/w 1e-6 to 1e6.

2. make a plot where you show how rmse depends on $\lambda$.  Note: use log scale for lambda

3. make another plot where you show how coefficients depend on $\lambda$

4. currently our test data remains very similar to training data.
Extend the test data a little bit by increasing the factor 0.0001 to,
say, 0.001.  See what happens with RMSE-s and all that.