In [71]:
# Source: Alexandru Tifrea and Fanny Yang, 2021.
# Based on an earlier version by Sebastian Curi and Andreas Krause.

# Python Notebook Commands
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

# General math and plotting modules.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Widget and formatting modules
import ipywidgets
from ipywidgets import interact, interactive, interact_manual, fixed, widgets
from matplotlib import rcParams

rcParams['figure.figsize'] = (10, 6)
rcParams['font.size'] = 16

# Machine Learning library.
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn import datasets
from sklearn.metrics import mean_squared_error

import warnings

rcParams['figure.figsize'] = (15, 6)
rcParams['font.size'] = 20

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



# Regularized Polynomial Regression

### Regression with polynomial features

In the following we show how the estimator depends on hyperparameters like the regularization coefficient (for LASSO and ridge penalties) or the the degree of the polynomial used for the features.


Let's consider 1-dimensional data $\{(x_i, y_i)\}_{i=0}^n \subset \mathbb{R} \times \mathbb{R}$. We use a polynomial kernel of the form $k(x, z)=1 + xz + (xz)^2+...+(xz)^d$ for the regression task. This kernel induces a feature representation of the data in the space of monomials of degree at most $d$, i.e. $\varphi: \mathbb{R} \rightarrow span(\{1, X, X^2, ..., X^d\})$. Minimizing the kernel regression objective is equivalent to performing linear regression in this feature space. The maximum degree controls the complexity of the kernel function.

The kernel ridge regression that is minimized below can be written as: $L(w; \lambda) := \sum_{i=0}^n (y_i - w^T\varphi(x_i))^2 + \lambda ||w||_2^2 $.

Below we show the mean squared error (MSE) computed on the training points, as well as the L2 error of the estimator compared to the ground truth function $f^*$, i.e. $||\hat{f}-f^*||_{L_2}$.

In [80]:
def true_regression_fun(X):
    return np.cos(3 * np.pi * X)


def poly_kernel_regression(n_samples, degree, reg_type, reg_coef, noise):
    np.random.seed(101)

    X = np.sort(np.random.rand(n_samples))
    y = true_regression_fun(X) + np.random.randn(n_samples) * noise

    if reg_type == "ridge" and reg_coef > 0:
      model = Ridge(alpha=reg_coef, fit_intercept=False, solver="svd")
      model_key = "ridge"
    elif reg_type == "lasso" and reg_coef > 0:
      model = Lasso(alpha=reg_coef, fit_intercept=False, tol=1e-2, max_iter=10000)
      model_key = "lasso"
    else:
      model = LinearRegression(fit_intercept=False)
      model_key = "linearregression"
    
    clf = make_pipeline(PolynomialFeatures(degree), model)
    clf.fit(X[:, np.newaxis], y)

    X_test = np.sort(np.concatenate((np.linspace(0 - 1e-4, 1 + 1e-4, 100), X)))
    train_mse = mean_squared_error(
      y_true=y,
      y_pred=clf.predict(X[:, np.newaxis])
    )
    test_mse = mean_squared_error(
      y_true=true_regression_fun(X_test),
      y_pred=clf.predict(X_test[:, np.newaxis])
    )
    
    fig = make_subplots(rows=2, cols=1, row_width=[0.15, 0.35])
    fig.add_trace(go.Scatter(x=X_test,
                             y=clf.predict(X_test[:, np.newaxis]),
                             line_width=3,
                             name="Model"),
                  row=1,
                  col=1)
    fig.add_trace(go.Scatter(x=X_test,
                             y=true_regression_fun(X_test),
                             line_dash="dash",
                             line_width=3,
                             name="True function"),
                  row=1,
                  col=1)
    fig.add_trace(go.Scatter(x=X,
                             y=y,
                             mode="markers",
                             marker_size=7,
                             marker_symbol="x",
                             marker_color="black",
                             name="Samples"),
                  row=1,
                  col=1)
    fig.add_trace(go.Scatter(x=np.arange(clf[model_key].coef_.shape[0]),
                             y=np.fabs(clf[model_key].coef_),
                             line_width=3,
                             showlegend=False),
                  row=2,
                  col=1)

    fig.update_layout(
        title=f"Training MSE = {train_mse:.6}" + "<br>L2 error" + f" = {test_mse:.6}",
        margin=go.layout.Margin(
            l=0,  #left margin
            r=0,  #right margin
            b=0,  #bottom margin
            t=60,  #top margin
        ),
        xaxis1_range=[0, 1],
        xaxis1_title="x",
        yaxis1_range=[-2, 2],
        yaxis1_title="y",
        xaxis2_title="Degree",
        yaxis2_title="Abs. value of coefficient",
    )
    fig.show()


_ = interact(
    poly_kernel_regression,
    n_samples=ipywidgets.IntSlider(value=20,
                                   min=5,
                                   max=100,
                                   step=5,
                                   description='Number of samples:',
                                   style={'description_width': 'initial'},
                                   continuous_update=False),
    degree=ipywidgets.IntSlider(value=10,
                                min=1,
                                max=30,
                                step=1,
                                description='Polynomial Degree:',
                                style={'description_width': 'initial'},
                                continuous_update=False),
    reg_type=ipywidgets.Dropdown(options=["lasso", "ridge"],
                                 value="ridge",
                                 description='Regularization type:',
                                 disabled=False,
                                 style={'description_width': 'initial'},
                                 continuous_update=False),
    reg_coef=ipywidgets.FloatSlider(value=0.,
                                    min=0,
                                    max=0.001,
                                    step=0.0001,
                                    readout_format='.4f',
                                    description='Regularization coefficient:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),
    noise=ipywidgets.FloatSlider(value=0.5,
                                 min=0,
                                 max=1,
                                 step=0.1,
                                 readout_format='.2f',
                                 description='Noise level:',
                                 style={'description_width': 'initial'},
                                 continuous_update=False),
)

interactive(children=(IntSlider(value=20, continuous_update=False, description='Number of samples:', min=5, st…

### Impact of noise on the estimator norm

In [82]:
def norm_increase_with_noise(n_samples, degree):
    np.random.seed(101)
    
    noise_values = np.arange(0, 1.1, 0.1)
    l1_norms, l2_norms = [], []
    
    X = np.sort(np.random.rand(n_samples))
    gauss_noise = np.random.randn(n_samples)
    for noise in noise_values:
      y = true_regression_fun(X) + gauss_noise * noise

      clf = make_pipeline(
          PolynomialFeatures(degree),
          LinearRegression(fit_intercept=False))
      clf.fit(X[:, np.newaxis], y)
      l1_norms.append(np.linalg.norm(clf["linearregression"].coef_, ord=1))
      l2_norms.append(np.linalg.norm(clf["linearregression"].coef_, ord=2))
    
    l1_norms = np.array(l1_norms) / np.sqrt(degree)
    l2_norms = np.array(l2_norms) / np.sqrt(degree)
    
    fig = make_subplots(rows=1, cols=2)
    fig.add_trace(go.Scatter(x=noise_values,
                             y=l1_norms,
                             line_width=3,
                             name="l1 norm",
                             showlegend=False),
                  row=1,
                  col=1)
    fig.add_trace(go.Scatter(x=noise_values,
                             y=l2_norms,
                             line_width=3,
                             name="l2 norm",
                             showlegend=False),
                  row=1,
                  col=2)

    fig.update_layout(
        margin=go.layout.Margin(
            l=0,  #left margin
            r=0,  #right margin
            b=0,  #bottom margin
            t=10,  #top margin
        ),
        xaxis1_title="Noise level",
        yaxis1_title="$\ell_1\ norm$",
        xaxis2_title="Noise level",
        yaxis2_title="$\ell_2\ norm$",
    )
    fig.show()
    
_ = interact(
    norm_increase_with_noise,
    n_samples=ipywidgets.IntSlider(value=20,
                                   min=10,
                                   max=100,
                                   step=5,
                                   description='Number of samples:',
                                   style={'description_width': 'initial'},
                                   continuous_update=False),
    degree=ipywidgets.IntSlider(value=10,
                                min=10,
                                max=30,
                                step=1,
                                description='Polynomial Degree:',
                                style={'description_width': 'initial'},
                                continuous_update=False),
)

interactive(children=(IntSlider(value=20, continuous_update=False, description='Number of samples:', min=10, s…