<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/2024notebooks/2024_0624L1_and_L2_regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# L1, L2 正則化

* Source: [Understanding L1 and L2 regularization with analytical and probabilistic views](https://medium.com/intuition/understanding-l1-and-l2-regularization-with-analytical-and-probabilistic-views-8386285210fc)
<!--
```wget https://gist.githubusercontent.com/tanukon/5bf850ab27b012835da2d1e7a5dd0e19/raw/e3c1365fa2dd88508416ea32652aac03d7502c50/L1%20and%20L2%20regularization.ipynb``` -->

<img src="https://miro.medium.com/v2/resize:fit:1204/format:webp/0*IgQLm-qC3bBLMHc5.png">


In [None]:
import IPython
isColab = 'google.colab' in str(IPython.get_ipython())

import matplotlib.pyplot as plt
try:
    import japanize_matplotlib
except ImportError:
    !pip install japanize_matplotlib
    import japanize_matplotlib

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from scipy.stats import laplace, norm

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso

try:
    from celluloid import Camera
except ImportError:
    !pip install celluloid
    from celluloid import Camera

# データの作成

In [None]:
n_samples = 30
#n_samples = 50

# データ点を等間隔とする
x_points = np.linspace(- 2 * np.pi, 2 * np.pi, 300)
#x_points = np.linspace(0, 2 * np.pi, 100)

# データ点から n_samples 点だけサンプリングする
x_sampled = np.random.choice(x_points, n_samples)
x = np.sort(x_sampled)

# 真の値を決める
x_true = np.cos(x)

# ノイズを定義
noise = np.random.rand(n_samples) / 1.

# 真の値とノイズを加えて y とする
y = x_true + noise
print((x, y))
plt.scatter(x, y)

# 多項回帰モデル

In [None]:
#poly = PolynomialFeatures(degree=10)
poly = PolynomialFeatures(degree=15)
poly_features = poly.fit_transform(x.reshape(-1, 1))

poly_regression_model = LinearRegression()
poly_regression_model.fit(poly_features, y)

y_hat_wo_regularization = poly_regression_model.predict(poly_features)

ridge_model = Ridge(alpha=0.01)
#ridge_model = Ridge(alpha=0.001)
ridge_model.fit(poly_features, y)

y_hat_w_regularization = ridge_model.predict(poly_features)

In [None]:
#fig, ax = plt.subplots(1, 2, figsize=(10, 5))
fig, ax = plt.subplots(1, 2, figsize=(13, 4))

ax[0].plot(x, y_hat_wo_regularization, label='当てはめた曲線', color='orange')
ax[0].scatter(x, y, label='サンプリングしたデータ')
ax[0].set_xlabel('x')
ax[0].set_ylabel('y')
ax[0].legend()
ax[0].set_title('正則化なし')

ax[1].plot(x, y_hat_w_regularization, label='当てはめた曲線', color='orange')
ax[1].scatter(x, y, label='サンプリングしたデータ')
ax[1].set_xlabel('x')
ax[1].set_ylabel('y')
ax[1].legend()
ax[1].set_title('正則化あり')

plt.show()

In [None]:
# polynomial regession without regularization
poly_regression_model.coef_

# LASSO regression

In [None]:
# the regularization term visualization

def func1(x, c = 1):
    return x - c

def func2(x, c = 1):
    return x + c

def func3(x, c = 1):
    return -x + c

def func4(x, c = 1):
    return -x - c


x = np.linspace(-1, 1, 100)
c = 1

y1 = func1(x[(x >= 0) & (x <= c)])
y2 = func2(x[(x >= -c) & (x <= 0)])
y3 = func3(x[(x >= 0) & (x <= c)])
y4 = func4(x[(x >= -c) & (x <= 0)])

plt.plot(x[(x >= 0) & (x <= c)], y1, color='blue')
plt.plot(x[(x >= -c) & (x <= 0)], y2, color='blue')
plt.plot(x[(x >= 0) & (x <= c)], y3, color='blue')
plt.plot(x[(x >= -c) & (x <= 0)], y4, color='blue')

plt.axis('equal')
plt.xlabel('w0')
plt.ylabel('w1')
plt.grid()
plt.show()

In [None]:
class LassoRegression:
    def __init__(self, penalty: float = 1e-6, iterations: int = 30, step: float = 0.1, intercept: bool = False):
        self.penalty = penalty
        self.iterations = iterations
        self.step = step
        self.intercept = intercept

        self.parameter_list = []

    def soft_threshold(self, a, z):
        return np.sign(a) * np.maximum(np.abs(a) - self.penalty, 0.) / z

    def get_parameters(self, beta):
        b0 = beta[0, 0]
        b1 = beta[0, 1]

        return b0, b1

    def fit(self, X, y):
        # dataset
        self.X = X #/ (np.linalg.norm(X, axis = 0, keepdims=True))
        self.y = y

        # parameters
        self.n, self.p = X.shape
        self.beta = np.array([-5, 5]).reshape(-1, 2)

        b0, b1 = self.get_parameters(self.beta)
        self.parameter_list.append([b0, b1])

        # coordinate descent
        for i in range(self.iterations):
            self.update_parameters()

        return self

    def update_parameters(self):
        # update parameters based on the coordinate descent formula

        z = (self. X ** 2).sum(axis=0)

        for i in range(self.p):

            X_i = self.X[:, i].reshape(-1, 1)

            Y_pred_wo_i = np.dot(np.delete(self.X, i, axis=1), np.delete(self.beta, i, axis=1))

            a = np.dot(X_i.T, self.y - Y_pred_wo_i)

            if self.intercept == True:
                if i == 0:
                    self.beta[0, i] = a
                else:
                    self.beta[0, i] = self.soft_threshold(a, z[i])
            else:
                self.beta[0, i] = self.soft_threshold(a, z[i])

        b0, b1 = self.get_parameters(self.beta)
        self.parameter_list.append([b0, b1])

    def contour_cost_function(self):
        b1 = np.arange(-5, 5, self.step)
        b2 = np.arange(-5, 5, self.step)

        B1, B2 = np.meshgrid(b1, b2)
        self.B1 = B1
        self.B2 = B2

        self.Z = np.array([(1/2) * (1/100) * (np.linalg.norm(Y - X @ np.array([b0, b1]).reshape(1, 2).T) + (abs(b0) + abs(b1))/2) for b0, b1 in zip(np.ravel(B1), np.ravel(B2))]).reshape(B1.shape)

    def visualize(self):

        self.contour_cost_function()

        # create an animation
        fig, ax = plt.subplots()
        camera = Camera(fig)

        for i in range(len(self.parameter_list)):
            current_b = self.parameter_list[i]

            cs = ax.contour(self.B1, self.B2, self.Z, 30, cmap='jet')
            ax.clabel(cs, inline=True, fontsize=10)

            x = np.linspace(-1, 1, 100)
            c = 1

            ax.scatter(x=current_b[0], y=current_b[1], color='red')
            ax.annotate(f'iteration {str(i)}', (0.05, 0.9), xycoords='axes fraction')
            ax.annotate(f'b0={format(current_b[0], ".2f")}, b1={format(current_b[1], ".2f")}', (0.05, 0.8), xycoords='axes fraction')
            ax.set_xlabel('b0')
            ax.set_ylabel('b1')
            ax.set_aspect('equal', adjustable='box')

            camera.snap()

        animation = camera.animate()
        animation.save('output.mp4')


In [None]:
# sample data
X = np.random.randn(100, 2)
beta = np.array([2, 3]).reshape(1, 2)
Y = X @ beta.T + np.random.normal(beta.shape[0])

In [None]:
lasso_reg = LassoRegression(penalty=1)
lasso_reg.fit(X, Y)

In [None]:
lasso_reg.visualize()