In [1]:
# Source: Alexandru Tifrea and Fanny Yang, 2021.
# Based on an earlier version by Sebastian Curi and Andreas Krause.

# Python Notebook Commands
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

# General math and plotting modules.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

# Widget and formatting modules
import ipywidgets
from ipywidgets import interact, interactive, interact_manual, fixed
from matplotlib import rcParams
rcParams['figure.figsize'] = (10, 6)
rcParams['font.size'] = 16

# Machine Learning library. 
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn import datasets

rcParams['figure.figsize'] = (15, 6)
rcParams['font.size'] = 20

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Kernel Ridge Regression

### Regression with polynomial kernels

In the following we show how the estimator depends on hyperparameters like the ridge coefficient or the the degree of the polynomial used for to define the kernel.

In [2]:
def true_regression_fun(X):
#     return np.cos(1.5 * np.pi * X)
    return np.cos(3 * np.pi * X)

def poly_kernel_regression(n_samples, degree, l2_coef, noise):
    np.random.seed(111)

    X = np.sort(np.random.rand(n_samples))
    y = true_regression_fun(X) + np.random.randn(n_samples) * noise

    clf = make_pipeline(PolynomialFeatures(degree),
                        Ridge(alpha=l2_coef, fit_intercept=False, solver="svd"))
    clf.fit(X[:, np.newaxis], y)
    
    X_test = np.linspace(-1, 2, 100)
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(17, 10))
    ax1.plot(X_test, clf.predict(X_test[:, np.newaxis]), linestyle="-", linewidth=3,
             label="Model")
    ax1.plot(X_test, true_regression_fun(X_test), linestyle="--", linewidth=3,
             label="True function")
    ax1.scatter(X, y, edgecolor='b', s=20, label="Samples")
    ax1.set_xlabel("x")
    ax1.set_ylabel("y")
    ax1.set_xlim((0., 1.))
    ax1.set_ylim((-2, 2))
    ax1.legend(loc="lower left")
    
    ax2.plot(np.arange(clf["ridge"].coef_.shape[0]), np.fabs(clf["ridge"].coef_), linewidth=3)
    ax2.set_xlabel("Degree")
    ax2.set_ylabel("Abs. value of coefficient")
    plt.show()
    
interact(poly_kernel_regression, 
        n_samples=ipywidgets.IntSlider(value=20,
                                       min=5,
                                       max=100,
                                       step=5,
                                       description='Number of samples:',
                                       style={'description_width': 'initial'},
                                       continuous_update=False),
        degree=ipywidgets.IntSlider(value=10,
                                         min=1,
                                         max=30,
                                         step=1,
                                         description='Polynomial Degree:',
                                         style={'description_width': 'initial'},
                                         continuous_update=False),
         l2_coef=ipywidgets.FloatSlider(value=0.,
                                      min=0,
                                      max=0.01,
                                      step=0.001,
                                      readout_format='.3f',
                                      description='Ridge coefficient:',
                                      style={'description_width': 'initial'},
                                      continuous_update=False),
         noise=ipywidgets.FloatSlider(value=0.5,
                                      min=0,
                                      max=1,
                                      step=0.1,
                                      readout_format='.2f',
                                      description='Noise level:',
                                      style={'description_width': 'initial'},
                                      continuous_update=False),);

interactive(children=(IntSlider(value=20, continuous_update=False, description='Number of samples:', min=5, st…

### Regression with RBF kernels

In the following we show how the estimator depends on hyperparameters like the ridge coefficient or the bandwidth for two commonly used radial basis function kernels: the Gaussian and the Laplacian kernels.

In [23]:
def rbf_kernel_regression(n_samples, kernel, bandwidth, l2_coef, noise):
    np.random.seed(10)

    X = np.sort(np.random.rand(n_samples))
    y = true_regression_fun(X) + np.random.randn(n_samples) * noise

    gamma = np.power(10., -bandwidth)
    if kernel == 'Gaussian':
      kernel = "rbf"
    elif kernel == 'Laplacian':
      kernel = "laplacian"
    
    clf = KernelRidge(alpha=l2_coef, kernel=kernel, gamma=gamma)
    clf.fit(X[:, np.newaxis], y)


    X_test = np.linspace(-1, 2, 100)
    plt.plot(X_test, clf.predict(X_test[:, np.newaxis]), linestyle="-", linewidth=3, 
             label="Model")
    plt.plot(X_test, true_regression_fun(X_test), linestyle="--", linewidth=3, 
             label="True function")
    plt.scatter(X, y, edgecolor='b', s=20, label="Samples")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xlim((0., 1.))
    plt.ylim((-2, 2))
    plt.legend(loc="lower left")
    plt.show()
    
interact(rbf_kernel_regression, 
         kernel=['Gaussian', 'Laplacian'], 
         bandwidth=ipywidgets.FloatSlider(value=-1,
                                    min=-10,
                                    max=10,
                                    step=0.1,
                                    readout_format='.1f',
                                    description='Bandwidth 10^:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),  
        n_samples=ipywidgets.IntSlider(value=30,
                             min=10,
                             max=100,
                             step=10,
                             description='Number of samples:',
                             style={'description_width': 'initial'},
                             continuous_update=False),
        l2_coef=ipywidgets.FloatSlider(value=0.,
                                    min=0,
                                    max=1,
                                    step=0.1,
                                    readout_format='.2f',
                                    description='Ridge coefficient:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),
        noise=ipywidgets.FloatSlider(value=0.1,
                                    min=0,
                                    max=1,
                                    step=0.01,
                                    readout_format='.2f',
                                    description='Noise level:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),);

interactive(children=(IntSlider(value=30, continuous_update=False, description='Number of samples:', min=10, s…

# RBF kernel classification with SVMs

In [21]:
# Our dataset and targets
n_samples = 20  # Number of points per class
tol = 1e-1

def laplacian_kernel(X, Y, gamma):
    rows = X.shape[0]
    cols = Y.shape[0]
    K = np.zeros((rows, cols))
    for col in range(cols):
        dist = gamma * np.linalg.norm(X - Y[col, :], ord=1, axis=1)
        K[:, col] = np.exp(-dist)
    return K

def kernelized_svm(dataset, kernel, reg, bw, noise):
    if dataset == 'blobs':
        X, Y = datasets.make_blobs(n_samples=n_samples, centers=2, random_state=3, cluster_std=10*noise)
    elif dataset == 'circles':
        X, Y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=noise, random_state=42)
    elif dataset == 'moons':
        X, Y = datasets.make_moons(n_samples=n_samples, noise=noise, random_state=42)
    elif dataset == 'xor':
        np.random.seed(42)
        step = int(n_samples/4)
        
        X = np.zeros((n_samples, 2))
        Y = np.zeros(n_samples)
        
        X[0*step:1*step, :] = noise * np.random.randn(step, 2)
        Y[0*step:1*step] = 1
        X[1*step:2*step, :] = np.array([1, 1]) + noise * np.random.randn(step, 2)
        Y[1*step:2*step] = 1
        
        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)
        Y[2*step:3*step] = -1
        X[3*step:4*step, :] = np.array([1, 0]) + noise * np.random.randn(step, 2)
        Y[3*step:4*step] = -1
    
    elif dataset == 'periodic':
        np.random.seed(42)
        step = int(n_samples/4)
        
        X = np.zeros((n_samples, 2))
        Y = np.zeros(n_samples)
        
        X[0*step:1*step, :] = noise * np.random.randn(step, 2)
        Y[0*step:1*step] = 1
        X[1*step:2*step, :] = np.array([0, 2]) + noise * np.random.randn(step, 2)
        Y[1*step:2*step] = 1
        
        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)
        Y[2*step:3*step] = -1
        X[3*step:4*step, :] = np.array([0, 3]) + noise * np.random.randn(step, 2)
        Y[3*step:4*step] = -1
        
    X = X[Y <= 1, :]
    Y = Y[Y <=1 ]
    Y[Y==0] = -1
        
    # Add the 1 feature.  
    X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1)
    plot_support = False
    gamma = np.power(10., -bw)
    coef0 = 0
    if kernel == 'Gaussian':
      kernel = "rbf"
    elif kernel == 'Laplacian':
      kernel = lambda X, Y: laplacian_kernel(X, Y, gamma)
      plot_support = False

    classifier = svm.SVC(kernel=kernel, C=np.power(10., -reg), gamma=gamma, coef0=coef0, tol=tol)
    classifier.fit(X, Y)

    # plot the line, the points, and the nearest vectors to the plane
    plt.figure()
    plt.clf()
    fig = plt.axes()
    opt = {'marker': 'r*', 'label': '+'}
    plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)
    opt = {'marker': 'bo', 'label': '-'}
    plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)
    
    if plot_support:
        plt.scatter(classifier.support_vectors_[:, 0], classifier.support_vectors_[:, 1], s=80,
                    facecolors='none', edgecolors='k')

    mins = np.min(X, 0)
    maxs = np.max(X, 0)
    x_min = mins[0] - 1
    x_max = maxs[0] + 1
    y_min = mins[1] - 1
    y_max = maxs[1] + 1

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]  
    Xtest = np.c_[XX.ravel(), YY.ravel(), np.ones_like(XX.ravel())]
    Z = classifier.decision_function(Xtest)

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.contourf(XX, YY, Z > 0, cmap=plt.cm.jet, alpha=0.3)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['-'], levels=[0])

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    
    
def plot_data(X, Y, fig=None, options=dict()):
    fig.plot(X, Y, options.get('marker', 'b*'), 
        label=options.get('label', 'Raw data'),
        fillstyle=options.get('fillstyle', 'full'),
        ms=options.get('size', 10))


interact(kernelized_svm, 
         dataset=['blobs', 'circles', 'moons', 'xor', 'periodic'],
         kernel=['Gaussian', 'Laplacian'], 
         reg=ipywidgets.FloatSlider(value=-3,
                                    min=-3,
                                    max=3,
                                    step=0.5,
                                    readout_format='.1f',
                                    description='Regularization 10^:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),
         bw=ipywidgets.FloatSlider(value=-1,
                                    min=-3,
                                    max=3,
                                    step=0.1,
                                    readout_format='.1f',
                                    description='Bandwidth 10^:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),  
         noise=ipywidgets.FloatSlider(value=0.05,
                                    min=0.01,
                                    max=0.3,
                                    step=0.01,
                                    readout_format='.2f',
                                    description='Noise level:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),);

interactive(children=(Dropdown(description='dataset', options=('blobs', 'circles', 'moons', 'xor', 'periodic')…