$\dfrac{\sum_N (y - \hat{y})^2}{N}$

$\dfrac{\sum_N (y - \hat{y})^2}{N} + \alpha \sum_M |w|$

$\dfrac{\sum_N (y - \hat{y})^2}{N} + \alpha \sum_M w^2$



In [59]:
from sklearn.datasets import make_regression

N = 1_000

X, y = make_regression(**{
    'n_samples': N,
    'n_features': 3,
    'n_informative': 2,
    'n_targets': 1,
    'bias': 5.3,
    'random_state': 37
})

print(f'X shape = {X.shape}, y shape {y.shape}')

X shape = (1000, 3), y shape (1000,)


In [60]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge

linear = LinearRegression()
lasso = Lasso(alpha=1.0, max_iter=5_000, random_state=37)
ridge = Ridge(alpha=1.0, max_iter=5_000, random_state=37)

linear.fit(X, y)
lasso.fit(X, y)
ridge.fit(X, y)

pd.DataFrame({
    'linear': [linear.intercept_] + list(linear.coef_),
    'lasso': [lasso.intercept_] + list(lasso.coef_),
    'ridge': [ridge.intercept_] + list(ridge.coef_),
})

Unnamed: 0,linear,lasso,ridge
0,5.3,5.272388,5.299127
1,1.076976e-14,-0.0,-0.003796
2,14.50597,13.508805,14.493159
3,47.45879,46.448957,47.409024


In [61]:
import autograd.numpy as np

X = np.hstack([np.ones(N).reshape(N, 1), X])

In [62]:
from autograd import grad

def get_norm(w, norm=1):
    x = [np.power(np.abs(v), norm) for v in w]
    x = np.sum(x)
    x = np.power(x, 1 / norm)
    return x
    
def cost(w, X, y):
    y_pred = np.dot(X, w)
    loss = ((y_pred - y) ** 2.0)
    return loss.mean()

loss = grad(cost)

In [65]:
def do_learn(X, y, loss_f, r=0.1, alpha=1.0, n_iters=5_000, penalty=None):
    w = np.zeros(X.shape[1])
    
    for it in range(n_iters):
        if penalty == 'l2':
            p = get_norm(w, 2)
        elif penalty == 'l1':
            p = get_norm(w, 1)
        else:
            p = 0.0
        
        loss1 = loss_f(w, X, y)
        loss2 = loss1 + (alpha * p)
        loss3 = r * loss2
        
        w = w - loss3
        
        if it % 500 == 0:
            print(loss1)
            print(loss2)
            print(loss3)
            print(w)
            print('-' * 15)
            
    return w

# do_learn(X, y, loss)
do_learn(X, y, loss, penalty='l1')

[ -8.77870161   7.07695875 -31.81814282 -91.06360547]
[ -8.77870161   7.07695875 -31.81814282 -91.06360547]
[-0.87787016  0.70769587 -3.18181428 -9.10636055]
[ 0.87787016 -0.70769587  3.18181428  9.10636055]
---------------
[-7.60189361e+40 -7.44868567e+40 -7.68733318e+40 -7.22599973e+40]
[8.02665553e+40 8.17986347e+40 7.94121596e+40 8.40254941e+40]
[8.02665553e+39 8.17986347e+39 7.94121596e+39 8.40254941e+39]
[-4.65654833e+40 -4.74542971e+40 -4.60698180e+40 -4.87461774e+40]
---------------
[-9.19875097e+81 -9.01336010e+81 -9.30213802e+81 -8.74389665e+81]
[9.71273858e+81 9.89812946e+81 9.60935153e+81 1.01675929e+82]
[9.71273858e+80 9.89812946e+80 9.60935153e+80 1.01675929e+81]
[-5.63470507e+81 -5.74225691e+81 -5.57472657e+81 -5.89858225e+81]
---------------
[-1.11310450e+123 -1.09067108e+123 -1.12561495e+123 -1.05806437e+123]
[1.17530010e+123 1.19773352e+123 1.16278965e+123 1.23034023e+123]
[1.17530010e+122 1.19773352e+122 1.16278965e+122 1.23034023e+122]
[-6.81833389e+122 -6.94847812e

  ret = umr_sum(arr, axis, dtype, out, keepdims)
  return f_raw(*args, **kwargs)


[-1.62985919e+205 -1.59701114e+205 -1.64817758e+205 -1.54926689e+205]
[1.72092888e+205 1.75377693e+205 1.70261049e+205 1.80152119e+205]
[1.72092888e+204 1.75377693e+204 1.70261049e+204 1.80152119e+204]
[-9.98372049e+204 -1.01742837e+205 -9.87744898e+204 -1.04512651e+205]
---------------
[-1.97222818e+246 -1.93248006e+246 -1.99439455e+246 -1.87470663e+246]
[2.08242802e+246 2.12217614e+246 2.06026166e+246 2.17994958e+246]
[2.08242802e+245 2.12217614e+245 2.06026166e+245 2.17994958e+245]
[-1.20809055e+246 -1.23114985e+246 -1.19523106e+246 -1.26466628e+246]
---------------
[-2.38651537e+287 -2.33841773e+287 -2.41333801e+287 -2.26850839e+287]
[2.51986383e+287 2.56796147e+287 2.49304119e+287 2.63787082e+287]
[2.51986383e+286 2.56796147e+286 2.49304119e+286 2.63787082e+286]
[-1.46186263e+287 -1.48976577e+287 -1.44630186e+287 -1.53032267e+287]
---------------
[nan nan nan nan]
[nan nan nan nan]
[nan nan nan nan]
[nan nan nan nan]
---------------
[nan nan nan nan]
[nan nan nan nan]
[nan nan nan

array([nan, nan, nan, nan])