## Lasso Regression

### lasso.py

In [3]:
import numpy as np

In [4]:
def soft_thresholding(x, y):
    return np.sign(x) * max(abs(x) - y, 0)

In [12]:
class Lasso:
    def __init__(self, lambda_, tol=0.0001, max_iter=1000):
        self.lambda_ = lambda_
        self.tol = tol
        self.max_iter = max_iter
        self.w_ = None
    
    def fit(self, X, t):
        n, d = X.shape
        self.w_ = np.zeros(d + 1)
        avgl1 = 0.
        for _ in range(self.max_iter):
            avgl1_prev = avgl1
            self._update(n, d, X, t)
            avgl1 = np.abs(self.w_).sum() / self.w_.shape[0]
            if abs(avgl1 - avgl1_prev) <= self.tol:
                break
    def _update(self, n, d, X, t):
        self.w_[0] = (t - np.dot(X, self.w_[1:])).sum() / n
        w0vec = np.ones(n) * self.w_[0]
        for k in range(d):
            ww = self.w_[1:]
            ww[k] = 0
            q = np.dot(t - w0vec - np.dot(X, ww), X[:, k])
            r = np.dot(X[:, k], X[:, k])
            self.w_[k + 1] = soft_thresholding(q / r, self.lambda_)
            
    def predict(self, X):
        if X.ndim == 1:
            X = X.reshape(X.shape[0], 1)
        Xtil = np.c_[np.ones(X.shape[0]), X]
        return np.dot(Xtil, self.w_)

### lasso_winequality1.py

In [13]:
import csv

In [21]:
Xy = []
with open("./data/winequality-red.csv") as fp:
    for row in csv.reader(fp, delimiter=';'):
        Xy.append(row)
Xy = np.array(Xy[1:], dtype=np.float64)

np.random.seed(0)
np.random.shuffle(Xy)
train_X = Xy[:-1000, :-1]
train_y = Xy[:-1000, -1]
test_X = Xy[-1000:, :-1]
test_y = Xy[-1000:, -1]

# ハイパーパラメータを変えながら学習させて結果表示
for lambda_ in [100, 1., 0.1, 0.01, 0.001, 0.0001, 0.00001, 0]:
    model = Lasso(lambda_)
    model.fit(train_X, train_y)
    y = model.predict(test_X)
    print('--- lambda = {} ---:'.format(lambda_))
    print('coefficients:')
    print(model.w_)
    mse = ((y - test_y)**2).mean()
    print("MSE: {:.3f}".format(mse))

--- lambda = 100 ---:
coefficients:
[ 5.58430718  0.         -0.          0.          0.         -0.
  0.         -0.         -0.         -0.          0.          0.        ]
MSE: 0.691
--- lambda = 1.0 ---:
coefficients:
[ 5.58430718  0.         -0.          0.          0.         -0.
  0.         -0.         -0.         -0.          0.          0.        ]
MSE: 0.691
--- lambda = 0.1 ---:
coefficients:
[ 5.73493612  0.         -0.1407455   0.34369322 -0.         -2.00071813
  0.         -0.         -0.          0.          0.          0.        ]
MSE: 0.636
--- lambda = 0.01 ---:
coefficients:
[ 5.71424724  0.         -1.01439751  0.00742223  0.         -3.34228417
  0.         -0.          0.          0.          1.04248618  0.        ]
MSE: 0.539
--- lambda = 0.001 ---:
coefficients:
[ 4.47222921e+00  0.00000000e+00 -1.09737085e+00 -5.53370439e-02
  0.00000000e+00 -3.02055910e+00  0.00000000e+00 -1.72208060e-05
  7.72156072e-01  0.00000000e+00  1.08440304e+00  4.71376678e-02]
MSE: 