In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

In [2]:
import numpy as np
import torch
import falkon
import os, sys
sys.path.append("../benchmark")
import time
import matplotlib.pyplot as plt
import math
from libsvmdata import fetch_libsvm
import libsvmdata
from collections import defaultdict
from optimizing_centers_fns import *

In [3]:
import falkon
from falkon.hypergrad.complexity_reg import GPComplexityReg, SimpleFalkonComplexityReg, TrainableSGPR
from falkon.hypergrad.common import test_train_predict
from summary import get_writer
writer = get_writer("test")

## What to do here

 - Download SVMLight datasets
 - Run the SGPR with RBF kernel optimizing centers
 - Compare with the PCA centers
 
 
We use LessIsMore notation to express the different projected kernel-ridge problems.


We have that $Z_m$ is the projected feature-map. In the case of Nystrom uniform sampling it is the kernel feature-map $\phi$ applied to the chosen samples $\tilde{x}_i, \dots, \tilde{x}_m$.

The solution to the learning problem is
$$
\tilde{f}_m(x) = \sum_{i=1}^m \tilde{\alpha}_i z_i(x)
$$
where the coefficient vector is
$$
\tilde{\alpha} = (Z_m Z_n^\top Z_n Z_m^\top + \lambda n Z_m Z_m^\top)^\dagger Z_m Z_n^\top Y
$$

Let $K = Z_n Z_n^\top$ be the kernel matrix with eigendecomposition $K = \sum_{i=1}^n \sigma_i u_i u_i^\top$. By choosing $Z_m = U_m^\top Z_n$ we 

In [4]:
falkon_opt = falkon.FalkonOptions(keops_active="no", use_cpu=True)

### Short analysis of any given dataset to look for best sigma

In [11]:
dset_name = "space_ga"
X, y = fetch_libsvm(dset_name)
X = torch.from_numpy(np.asarray(X.todense())).float()
Y = torch.from_numpy(y.reshape(-1, 1)).float()
print(X.shape, Y.shape)
Xtr, Ytr, Xts, Yts = preprocess_dataset(X, Y, n_train=1000)

Dataset: space_ga
Downloading data from https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/space_ga (552 kB)

file_sizes: 100%|█████████████████████████████| 565k/565k [00:01<00:00, 379kB/s]
Successfully downloaded file to /home/giacomo/data/libsvm/regression/space_ga
Loading svmlight file...
torch.Size([3107, 6]) torch.Size([3107, 1])


In [12]:
penalty_init = torch.tensor(8., dtype=Xtr.dtype)
sigma_init = torch.tensor([dset_sigmas_15la[dset_name]], dtype=Xtr.dtype)
kernel = falkon.kernels.GaussianKernel(sigma_init, opt=falkon_opt)

In [13]:
print(krr_train_error(Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init)))
print(krr_test_error(Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init)))

0.0135438675
0.013704878


## Error vs. M plots.

We solve the learning problem in closed-form for different values of `M`. We compare the error obtained for different versions of the problem:

 1. Full kernel-ridge regression is the baseline, does not change with M.
 2. Nystrom KRR with uniformly chosen centers
 3. ~Nystrom KRR with random Gaussian centers
 4. ~Nystrom KRR with SVD centers

In [5]:
def get_svd_errs(m_list, la_list, err_type, Xtr, Xts, Ytr, Yts, kernel):
    if err_type == "train":
        get_err_fn = get_train_error
    else:
        get_err_fn = get_test_error
    
    svd_errs = defaultdict(list)
    for m in m_list:
        for la in la_list:
            Bnm, Gmm, Bnm_test = get_svd_bg(Xtr, m, kernel, Xts)
            svd_errs[la].append(get_err_fn(Bnm, Bnm_test, Gmm, Ytr, Yts, la))
        print(m, end=".")
    return svd_errs
def error_evolution(m_list, num_reps, err_type, Xtr, Xts, Ytr, Yts, kernel, la):
    if err_type == "train":
        krr_err_fn = krr_train_error
        get_err_fn = get_train_error
    else:
        krr_err_fn = krr_test_error
        get_err_fn = get_test_error
    
    Kfull = kernel(Xtr, Xtr)
    err_krr = krr_err_fn(Xtr, Xts, Ytr, Yts, kernel, la)

    err_random, err_nystrom, err_svd = [], [], []
    err_random_std, err_nystrom_std = [], []
    fro_random, fro_nystrom, fro_svd = [], [], []
    for m in m_list:
        errs = []
        for i in range(num_reps):
            Bnm, Gmm, Bnm_test = get_random_bg(Xtr, m, kernel, Xts)
            errs.append(get_err_fn(Bnm, Bnm_test, Gmm, Ytr, Yts, la))
        err_random.append(np.mean(errs))
        err_random_std.append(np.std(errs))

        errs = []
        for i in range(num_reps):
            Bnm, Gmm, Bnm_test = get_nystrom_bg(Xtr, m, kernel, Xts)
            errs.append(get_err_fn(Bnm, Bnm_test, Gmm, Ytr, Yts, la))
        err_nystrom.append(np.mean(errs))
        err_nystrom_std.append(np.std(errs))

        Bnm, Gmm, Bnm_test = get_svd_bg(Xtr, m, kernel, Xts)
        err_svd.append(get_err_fn(Bnm, Bnm_test, Gmm, Ytr, Yts, la))

        # Frobenius error to full kernel
        Bnm, Gmm, Bnm_test = get_random_bg(Xtr, m, kernel, Xts)
        Ktilde = Bnm @ torch.pinverse(Gmm) @ Bnm.T
        fro_random.append(torch.sum((Kfull - Ktilde)**2))

        Bnm, Gmm, Bnm_test = get_nystrom_bg(Xtr, m, kernel, Xts)
        Ktilde = Bnm @ torch.pinverse(Gmm) @ Bnm.T
        fro_nystrom.append(torch.sum((Kfull - Ktilde)**2))

        Bnm, Gmm, Bnm_test = get_svd_bg(Xtr, m, kernel, Xts)
        Ktilde = Bnm @ torch.pinverse(Gmm) @ Bnm.T
        fro_svd.append(torch.sum((Kfull - Ktilde)**2))
        print(m, end=".")
    return {
        "err_random": np.asarray(err_random),
        "err_nystrom": np.asarray(err_nystrom),
        "err_svd": np.asarray(err_svd),
        "err_random_std": np.asarray(err_random_std),
        "err_nystrom_std": np.asarray(err_nystrom_std),
        "fro_random": np.asarray(fro_random),
        "fro_nystrom": np.asarray(fro_nystrom),
        "fro_svd": np.asarray(fro_svd),
        "err_krr": err_krr
    }

In [30]:
def plot_error_evolution(m_list, data, err_type, figax=None, title=""):
    if figax is None:
        fig, ax = plt.subplots()
    else:
        fig, ax = figax
    ax1 = ax.twinx()
    # ax.plot(m_list, err_data["err_random"], label="random", color="#381642")
    # ax.fill_between(m_list, 
    #                 err_data["err_random"] - err_data["err_random_std"], 
    #                 err_data["err_random"] + err_data["err_random_std"],
    #                 color="#7f528c", alpha=0.2)
    # ax1.plot(m_list, err_data["fro_random"], color="#381642", linestyle="--", label="random")

    ax.plot(m_list, data["err_nystrom"], label="nystrom", color="#1a3f93")
    ax.fill_between(m_list, 
                    data["err_nystrom"] - data["err_nystrom_std"], 
                    data["err_nystrom"] + data["err_nystrom_std"],
                    color="#738bc4", alpha=0.2)
    if "fro_nystrom" in data:
        ax1.plot(m_list, data["fro_nystrom"], color="#1a3f93", linestyle="--", label="nystrom")

    if "err_svd" in data:
        ax.plot(m_list, data["err_svd"], label="SVD", color="#366d0e")
    if "err_svd0" in data:
        ax.plot(m_list, data["err_svd0"], label="SVD($\lambda=0$)", color="k")
    if "err_svdhl" in data:
        ax.plot(m_list, data["err_svdhl"], label="SVD($\lambda = 10^{-3}$)", color="b")
    if "fro_svd" in data:
        ax1.plot(m_list, data["fro_svd"], color="#366d0e", linestyle="--", label="SVD")

    ax.plot(m_list, [data["err_krr"].item()] * len(m_list), color="red", label="Full KRR")

    ax1.set_yscale("log")

    ax.legend(loc="best")
    ax.grid()

    ax.set_xlabel("M")
    ax.set_ylabel("%s MSE" % (err_type))
    ax1.set_ylabel("Frobenius norm to full kernel")
    ax.set_title(title)
    return fig, ax

### All-Dataset Plot

In [1680]:
for dset_name in dset_sigmas_15la.keys():
    Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(dset_name, penalty=20)
    m_list = list(range(1, 60, 3))
    train_err_data = error_evolution(m_list, 15, "train", Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init))
    test_err_data = error_evolution(m_list, 15, "test", Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init))
    fig, ax = plt.subplots(ncols=2, figsize=(10, 4))
    title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
        dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
    plot_error_evolution(m_list, train_err_data, "train", (fig, ax[0]), title)
    title = "Test data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
        dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
    plot_error_evolution(m_list, test_err_data, "test", (fig, ax[1]), title)
    fig.tight_layout()
    fig.savefig("/home/giacomo/Dropbox/unige/hyperopt/figures/%s_err_with_m_280420_v4.png" % (dset_name), dpi=400)

Dataset: cpusmall
torch.Size([8192, 12]) torch.Size([8192, 1])
1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.

  


<IPython.core.display.Javascript object>

Dataset: abalone
torch.Size([4177, 8]) torch.Size([4177, 1])
1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.

  


<IPython.core.display.Javascript object>

Dataset: space_ga
torch.Size([3107, 6]) torch.Size([3107, 1])
1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.

  


<IPython.core.display.Javascript object>

Dataset: svmguide1
torch.Size([3089, 4]) torch.Size([3089, 1])
1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.

  


<IPython.core.display.Javascript object>

Dataset: cadata
torch.Size([20640, 8]) torch.Size([20640, 1])
1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.

  


<IPython.core.display.Javascript object>

### SVD Solution with Different Lambdas

In [89]:
dset_name = "space_ga"
Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(dset_name, penalty=15, dtype=torch.float64)

Dataset: space_ga
torch.Size([3107, 6]) torch.Size([3107, 1])


In [94]:
m_list = np.arange(2, 100, 4)
la_list = np.logspace(-10, -2, 6)

train_err_data = error_evolution(m_list, 5, "train", Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init))
train_svd_data = get_svd_errs(m_list, la_list, "train", Xtr, Xts, Ytr, Yts, kernel)
test_err_data = error_evolution(m_list, 5, "test", Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init))
test_svd_data = get_svd_errs(m_list, la_list, "test", Xtr, Xts, Ytr, Yts, kernel)

2.6.10.14.18.22.26.30.34.38.42.46.50.54.58.62.66.70.74.78.82.86.90.94.98.2.6.10.14.18.22.26.30.34.38.42.46.50.54.58.62.66.70.74.78.82.86.90.94.98.2.6.10.14.18.22.26.30.34.38.42.46.50.54.58.62.66.70.74.78.82.86.90.94.98.2.6.10.14.18.22.26.30.34.38.42.46.50.54.58.62.66.70.74.78.82.86.90.94.98.

In [95]:
del train_err_data['err_random']
del train_err_data['fro_random']
del train_err_data['fro_svd']
del train_err_data['fro_nystrom']

In [96]:
fig, ax = plt.subplots(figsize=(9, 5))
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
fig, ax = plot_error_evolution(m_list, train_err_data, "train", (fig, ax), title)
for l in train_svd_data.keys():
    ax.plot(m_list, train_svd_data[l], label="SVD($\lambda=%.1e$)" % (l), lw=2, linestyle=":")
ax.get_legend().remove()
ax.legend(bbox_to_anchor=(1.1, 0.95))
fig.tight_layout()

<IPython.core.display.Javascript object>

In [367]:
fig, ax = plt.subplots(figsize=(9, 5))
title = "Test data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
fig, ax = plot_error_evolution(m_list, test_err_data, "test", (fig, ax), title)
for l in test_svd_data.keys():
    ax.plot(m_list, test_svd_data[l], label="SVD($\lambda=%.1e$)" % (l), lw=2, linestyle=":")
ax.get_legend().remove()
ax.legend(bbox_to_anchor=(1.1, 0.95))
fig.tight_layout()

<IPython.core.display.Javascript object>

### Individual Dataset plots

In [1608]:
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, train_err_data, "train", None, title)

<IPython.core.display.Javascript object>

(<Figure size 640x480 with 2 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fe0ef32f910>)

In [1603]:
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, train_err_data, "train", None, title)

<IPython.core.display.Javascript object>

(<Figure size 640x480 with 2 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fe0ef57c250>)

In [1600]:
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, train_err_data, "train", None, title)

<IPython.core.display.Javascript object>

(<Figure size 640x480 with 2 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fe0ef5b3790>)

In [1597]:
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, train_err_data, "train", None, title)

<IPython.core.display.Javascript object>

(<Figure size 640x480 with 2 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fe0ef898750>)

In [1469]:
test_err_data = error_evolution(m_list, 15, "test", Xtr, Xts, Ytr, Yts, kernel, torch.exp(-penalty_init))

1.4.7.10.13.16.19.22.25.28.31.34.37.40.43.46.49.52.55.58.61.64.67.70.73.76.79.82.85.88.91.94.97.

In [1476]:
fig, ax = plt.subplots(ncols=2, figsize=(10, 4))
title = "Train data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, train_err_data, "train", (fig, ax[0]), title)
title = "Test data, %s. $\sigma=%.1f$, $\lambda=%.1e$" % (
    dset_name, sigma_init.item(), torch.exp(-penalty_init).item())
plot_error_evolution(m_list, test_err_data, "test", (fig, ax[1]), title)
fig.tight_layout()
fig.savefig("%s_err_with_m.png" % (dset_name), dpi=600)

<IPython.core.display.Javascript object>

## SGPR-like training plots

In [7]:
def train_gpflow_sgpr(penalty_init, sigma_init, centers_init, only_trace, 
                      lr, epochs, Xtr, Ytr, Xts, Yts, kernel):
    model = TrainableSGPR(sigma_init=sigma_init,
                          penalty_init=penalty_init,
                          centers_init=centers_init,
                          opt_centers=True,
                          opt_sigma=True,
                          opt_penalty=True,
                          num_epochs=epochs,
                          learning_rate=lr,
                          err_fn=mse)
    model.train(Xtr, Ytr, Xts, Yts)

In [8]:
def opt_plot(err_data, M, title, figax=None, legend=True):
    if figax is None:
        fig, ax = plt.subplots()
    else:
        fig, ax = figax
    num_epochs = len(err_data["tr_errs"])
    ax.plot(range(num_epochs), err_data["tr_errs"], label="Train MSE", color="#2f2f8c", lw=2)
    ax.plot(range(num_epochs), err_data["ts_errs"], label="Test MSE", color="#c67fb7", lw=2)

    ax1 = ax.twinx()
    ax1.plot(range(num_epochs), err_data["fro_errs"], color="#3b602a", 
             linestyle="--", label="Frobenius error", lw=2)


    ax.scatter(num_epochs + 2, err_data["tr_err_krr"], label="KRR(full) train MSE", 
               s=100, marker="*", color="#2f2f8c")
    ax.scatter(num_epochs + 2, err_data["ts_err_krr"], label="KRR(full) test MSE", 
               s=100, marker="*", color="#c67fb7")

    ax.scatter(num_epochs + 7, err_data["tr_err_svd"], label="SVD(%d) train MSE" % (M), 
               s=100, marker="o", color="#2f2f8c")
    ax.scatter(num_epochs + 7, err_data["ts_err_svd"], label="SVD(%d) test MSE" % (M), 
               s=100, marker="o", color="#c67fb7")


    if legend:
        lines, labels = ax.get_legend_handles_labels()
        lines2, labels2 = ax1.get_legend_handles_labels()
        ax.legend(lines + lines2, labels + labels2, loc="best")

    ax.grid()
    ax.set_xlabel("Epoch")
    ax.set_ylabel("MSE")
    ax1.set_ylabel("Frobenius Error")
    ax1.set_yscale('log')
    ax.set_title(title)
    fig.tight_layout()
    return fig, ax
def get_title(model_name, M, train_only_trace, dset_name, sigma_init=None, penalty_init=None):
    title = "%s - %s - M=%d" % (dset_name, model_name, M)
    if train_only_trace:
        title += " - TRACE optimized"
    if sigma_init is not None:
        title += " - $\sigma=%.1f$" % (sigma_init.item())
    if penalty_init is not None:
        title += " - $\lambda=%.2e$" % (penalty_init.item())
    return title

### ONLY-TRACE optimization

In [59]:
centers_init.shape[0]

100

In [None]:
M = 20
num_epochs = 200
train_only_trace = True
learning_rate = 0.2
only_trace_100 = {}
for dset_name in ["cpusmall", "space_ga"]:
    Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(dset_name, penalty=1e-3)
    centers_init = torch.randn(M, Xtr.shape[1], dtype=Xtr.dtype)
    res_flk = train_sgpr_like(opt_model="SGPR", penalty_init=penalty_init, sigma_init=sigma_init, 
                              centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate, 
                              epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel,
                              opt_centers=True, opt_sigma=False, opt_penalty=False)
    only_trace_100[dset_name] = res_flk

Dataset: cpusmall
torch.Size([8192, 12]) torch.Size([8192, 1])
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 1.917e+04 - const: 9.189e+02 - tot: 2.009e+04
Epoch 0 ( 0.61s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1308 - Ts  nrmse = 0.1247
Fro err: 7.829
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 1.572e+04 - const: 9.189e+02 - tot: 1.664e+04
Epoch 1 ( 1.22s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1046 - Ts  nrmse = 0.0981
Fro err: 7.067
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 1.431e+04 - const: 9.189e+02 - tot: 1.523e+04
Epoch 2 ( 1.82s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1046 - Ts  nrmse = 0.0975
Fro err: 6.529
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 1.334e+04 - const: 9.189e+02 - tot: 1.426e+04
Epoch 3 ( 2.43s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1064 - Ts  nrmse = 0.0990
Fro err: 6.086
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 1.260e+04 - const: 9.189e

Epoch 38 (23.55s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1003 - Ts  nrmse = 0.0901
Fro err: 1.576
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 4.750e+03 - const: 9.189e+02 - tot: 5.669e+03
Epoch 39 (24.16s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1004 - Ts  nrmse = 0.0902
Fro err: 1.547
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 4.690e+03 - const: 9.189e+02 - tot: 5.609e+03
Epoch 40 (24.76s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1004 - Ts  nrmse = 0.0903
Fro err: 1.520
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 4.636e+03 - const: 9.189e+02 - tot: 5.555e+03
Epoch 41 (25.37s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1004 - Ts  nrmse = 0.0903
Fro err: 1.493
LOSSES: log-det: -0.000e+00 - data-fit: -0.000e+00 - trace: 4.589e+03 - const: 9.189e+02 - tot: 5.508e+03
Epoch 42 (25.96s) - Sigma 8.000 - Penalty 1.00e-03 - Tr  nrmse = 0.1003 - Ts  nrmse = 0.0903
Fro err: 1.469
LOSSES: log-det: -0.000e+00 - data-f

In [105]:
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
for i, (dset_name, data) in enumerate(only_trace_100.items()):
    title = get_title("", 100, True, dset_name)
    opt_plot(data, 100, title, figax=(fig, ax[i]))

<IPython.core.display.Javascript object>

In [107]:
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
for i, (dset_name, data) in enumerate(only_trace_100.items()):
    title = get_title("", 100, True, dset_name)
    opt_plot(data, 100, title, figax=(fig, ax[i]))

<IPython.core.display.Javascript object>

In [109]:
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
for i, (dset_name, data) in enumerate(only_trace_100.items()):
    title = get_title("", 100, True, dset_name)
    opt_plot(data, 100, title, figax=(fig, ax[i]))

<IPython.core.display.Javascript object>

In [42]:
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
for i, (dset_name, data) in enumerate(only_trace_100.items()):
    title = get_title("", 100, True, dset_name)
    opt_plot(data, 100, title, figax=(fig, ax[i]))

<IPython.core.display.Javascript object>

In [None]:
M = 100
train_only_trace = False
learning_rate = 0.1
num_epochs = 20
results_100_flk = {}
results_100_sgpr = {}
for dset_name in ["cpusmall", "space_ga"]:
    Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(dset_name, penalty=15)
    centers_init = torch.randn(M, Xtr.shape[1], dtype=Xtr.dtype)
    res_flk = train_sgpr_like(opt_model="Falkon", penalty_init=penalty_init, sigma_init=sigma_init,
                              centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate,
                              epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)
    res_sgpr = train_sgpr_like(opt_model="SGPR", penalty_init=penalty_init, sigma_init=sigma_init,
                               centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate,
                               epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)
    results_100_flk[dset_name] = res_flk
    results_100_sgpr[dset_name]= res_sgpr

Dataset: cpusmall
torch.Size([8192, 12]) torch.Size([8192, 1])
VALUE        d_eff -3.01089e+05 - loss -3.96653e+07 - trace -2.07880e+04
Epoch 0 ( 1.56s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 213.8287 - Ts  mse = 501.5016
Fro err: 16.146
VALUE        d_eff -3.01827e+05 - loss -3.58626e+07 - trace -2.07464e+04
Epoch 1 ( 3.24s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 116.4996 - Ts  mse = 296.7578
Fro err: 16.272
VALUE        d_eff -3.01965e+05 - loss -3.42083e+07 - trace -2.09134e+04
Epoch 2 ( 4.56s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 80.9815 - Ts  mse = 255.2180
Fro err: 15.951
VALUE        d_eff -3.02031e+05 - loss -3.28677e+07 - trace -2.08092e+04
Epoch 3 ( 5.64s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 59.4460 - Ts  mse = 215.4174
Fro err: 15.713
VALUE        d_eff -3.02230e+05 - loss -3.12838e+07 - trace -2.07726e+04
Epoch 4 ( 6.81s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 45.9063 - Ts  mse = 179.1606
Fro err: 15.606
VALUE        d_eff -3.02588e+05 - l

Epoch 45 (60.64s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.8574 - Ts  mse = 26.0746
Fro err: 11.573
VALUE        d_eff -3.04616e+05 - loss -2.37450e+07 - trace -1.89789e+04
Epoch 46 (61.78s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.8698 - Ts  mse = 26.0093
Fro err: 11.500
VALUE        d_eff -3.04706e+05 - loss -2.37022e+07 - trace -1.89201e+04
Epoch 47 (63.08s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.7559 - Ts  mse = 25.6326
Fro err: 11.452
VALUE        d_eff -3.04838e+05 - loss -2.35617e+07 - trace -1.88697e+04
Epoch 48 (64.61s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.6723 - Ts  mse = 25.2841
Fro err: 11.411
VALUE        d_eff -3.04943e+05 - loss -2.35977e+07 - trace -1.88242e+04
Epoch 49 (66.12s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.6059 - Ts  mse = 25.1409
Fro err: 11.353
VALUE        d_eff -3.05073e+05 - loss -2.35150e+07 - trace -1.87686e+04
Epoch 50 (67.78s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 7.5746 - Ts  mse = 25.0093
Fro err: 11.291

Epoch 91 (127.36s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6321 - Ts  mse = 22.6281
Fro err: 9.704
VALUE        d_eff -3.07439e+05 - loss -2.31354e+07 - trace -1.71191e+04
Epoch 92 (128.72s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6818 - Ts  mse = 22.9891
Fro err: 9.694
VALUE        d_eff -3.07410e+05 - loss -2.31623e+07 - trace -1.70860e+04
Epoch 93 (129.95s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6266 - Ts  mse = 22.2976
Fro err: 9.616
VALUE        d_eff -3.07520e+05 - loss -2.30987e+07 - trace -1.70295e+04
Epoch 94 (131.27s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6143 - Ts  mse = 22.7475
Fro err: 9.588
VALUE        d_eff -3.07591e+05 - loss -2.30441e+07 - trace -1.69944e+04
Epoch 95 (133.37s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6020 - Ts  mse = 22.5569
Fro err: 9.573
VALUE        d_eff -3.07550e+05 - loss -2.30950e+07 - trace -1.69670e+04
Epoch 96 (136.15s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.5936 - Ts  mse = 22.6591
Fro err: 9.534

Epoch 37 (49.91s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.4988 - Ts  mse = 28.6569
Fro err: 11.981
VALUE        d_eff 3.80652e+03 - loss -1.19234e+07 - trace -1.93677e+04
Epoch 38 (50.99s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.3535 - Ts  mse = 28.4853
Fro err: 11.952
VALUE        d_eff 3.80652e+03 - loss -1.19067e+07 - trace -1.93477e+04
Epoch 39 (52.02s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.2681 - Ts  mse = 27.7571
Fro err: 11.920
VALUE        d_eff 3.80650e+03 - loss -1.18896e+07 - trace -1.93219e+04
Epoch 40 (53.06s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.1675 - Ts  mse = 27.4143
Fro err: 11.884
VALUE        d_eff 3.80648e+03 - loss -1.18729e+07 - trace -1.92932e+04
Epoch 41 (54.09s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.1183 - Ts  mse = 27.2007
Fro err: 11.852
VALUE        d_eff 3.80647e+03 - loss -1.18599e+07 - trace -1.92683e+04
Epoch 42 (55.30s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 8.0487 - Ts  mse = 26.6539
Fro err: 11.818
VALU

Epoch 83 (119.87s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6626 - Ts  mse = 23.0653
Fro err: 9.976
VALUE        d_eff 3.80318e+03 - loss -1.15541e+07 - trace -1.74371e+04
Epoch 84 (121.04s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6776 - Ts  mse = 22.7186
Fro err: 9.940
VALUE        d_eff 3.80310e+03 - loss -1.15773e+07 - trace -1.73975e+04
Epoch 85 (122.13s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.7971 - Ts  mse = 23.2726
Fro err: 9.897
VALUE        d_eff 3.80304e+03 - loss -1.16271e+07 - trace -1.73555e+04
Epoch 86 (123.32s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.7982 - Ts  mse = 22.6608
Fro err: 9.862
VALUE        d_eff 3.80288e+03 - loss -1.16465e+07 - trace -1.73106e+04
Epoch 87 (124.55s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.6940 - Ts  mse = 22.6664
Fro err: 9.823
VALUE        d_eff 3.80279e+03 - loss -1.15367e+07 - trace -1.72750e+04
Epoch 88 (125.73s) - Sigma 8.000 - Penalty 3.06e-07 - Tr  mse = 6.7867 - Ts  mse = 23.2557
Fro err: 9.802
VALU

In [50]:
for dset_name in dset_sigmas_15la.keys():
    fig, ax = plt.subplots(ncols=2, figsize=(16, 5))
    title = get_title("Falkon", 20, False, dset_name, sigma_init=sigma_init, penalty_init=torch.exp(-penalty_init))
    opt_plot(results_20_flk[dset_name], 20, title, figax=(fig, ax[0]))
    title = get_title("SGPR", 20, False, dset_name, sigma_init=sigma_init, penalty_init=torch.exp(-penalty_init))
    opt_plot(results_20_sgpr[dset_name], 20, title, figax=(fig, ax[1]))
    fig.tight_layout()
    fig.savefig("figures/opt_290420_M20_%s_v1.png" % (dset_name))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
title = get_title("Falkon", M, train_only_trace, dset_name)
opt_plot(res_flk, M, title, figax=(fig, ax[0]))
title = get_title("SGPR", M, train_only_trace, dset_name)
opt_plot(res_sgpr, M, title, figax=(fig, ax[1]))
fig.tight_layout();

In [22]:
dset_name = "svmguide1"
Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(dset_name, penalty=15, dtype=torch.float64)
print(Xtr.dtype)
M = 10
torch.manual_seed(82)
centers_init = torch.randn(M, Xtr.shape[1], dtype=Xtr.dtype)
learning_rate = 0.1
num_epochs = 30
train_only_trace = False

Dataset: svmguide1
torch.Size([3089, 4]) torch.Size([3089, 1])
torch.float64


In [24]:
res_sgpr = train_gpflow_sgpr(
    penalty_init=torch.tensor(1.), sigma_init=sigma_init, 
    centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate*3, 
    epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)

╒══════════════════════════╤═══════════╤══════════════════╤═════════╤═════════════╤═════════╤═════════╤════════════════════════════════════════════╕
│ name                     │ class     │ transform        │ prior   │ trainable   │ shape   │ dtype   │ value                                      │
╞══════════════════════════╪═══════════╪══════════════════╪═════════╪═════════════╪═════════╪═════════╪════════════════════════════════════════════╡
│ SGPR.kernel.variance     │ Parameter │ Softplus         │         │ False       │ ()      │ float64 │ 1.0                                        │
├──────────────────────────┼───────────┼──────────────────┼─────────┼─────────────┼─────────┼─────────┼────────────────────────────────────────────┤
│ SGPR.kernel.lengthscales │ Parameter │ Identity         │         │ True        │ ()      │ float64 │ 3.0                                        │
├──────────────────────────┼───────────┼──────────────────┼─────────┼─────────────┼─────────┼─────────┼───

In [25]:
res_sgpr = train_sgpr_like(opt_model="SGPR", penalty_init=torch.tensor(10.),
                           sigma_init=sigma_init,
                           centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate*1, 
                           epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)

LOSSES: log-det: 1.157e+03 - data-fit: 5.092e+00 - trace: 3.130e+00 - const: 9.189e+02 - tot: 2.085e+03
Epoch 0 ( 0.68s) - Sigma 3.100 - Penalty 9.05e+00 - Tr  nrmse = 0.4444 - Ts  nrmse = 0.4308
Fro err: 16.303
LOSSES: log-det: 1.108e+03 - data-fit: 5.473e+00 - trace: 2.556e+00 - const: 9.189e+02 - tot: 2.035e+03
Epoch 1 ( 1.27s) - Sigma 3.199 - Penalty 8.19e+00 - Tr  nrmse = 0.4426 - Ts  nrmse = 0.4282
Fro err: 12.955
LOSSES: log-det: 1.058e+03 - data-fit: 5.950e+00 - trace: 2.297e+00 - const: 9.189e+02 - tot: 1.985e+03
Epoch 2 ( 1.79s) - Sigma 3.297 - Penalty 7.41e+00 - Tr  nrmse = 0.4402 - Ts  nrmse = 0.4253
Fro err: 11.002
LOSSES: log-det: 1.008e+03 - data-fit: 6.473e+00 - trace: 2.134e+00 - const: 9.189e+02 - tot: 1.936e+03
Epoch 3 ( 2.32s) - Sigma 3.394 - Penalty 6.70e+00 - Tr  nrmse = 0.4374 - Ts  nrmse = 0.4224
Fro err: 9.628
LOSSES: log-det: 9.584e+02 - data-fit: 7.039e+00 - trace: 2.022e+00 - const: 9.189e+02 - tot: 1.886e+03
Epoch 4 ( 2.84s) - Sigma 3.490 - Penalty 6.07e+00

KeyboardInterrupt: 

In [109]:
res_sgpr = train_sgpr_like(opt_model="SGPR", penalty_init=torch.tensor(1e-1),
                           sigma_init=sigma_init,
                           centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate*2, 
                           epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)

LOSSES: log-det: -1.104e+03 - data-fit: 2.153e+05 - trace: 6.316e+01 - const: 9.189e+02 - tot: 2.152e+05
Epoch 0 ( 0.69s) - Sigma 7.800 - Penalty 1.22e-01 - Tr  nrmse = 0.0799 - Ts  nrmse = 0.0866
Fro err: 3.804
LOSSES: log-det: -1.007e+03 - data-fit: 1.824e+05 - trace: 5.528e+01 - const: 9.189e+02 - tot: 1.824e+05
Epoch 1 ( 1.32s) - Sigma 7.601 - Penalty 1.49e-01 - Tr  nrmse = 0.0648 - Ts  nrmse = 0.0732
Fro err: 4.030
LOSSES: log-det: -9.102e+02 - data-fit: 1.558e+05 - trace: 4.886e+01 - const: 9.189e+02 - tot: 1.559e+05
Epoch 2 ( 1.98s) - Sigma 7.405 - Penalty 1.81e-01 - Tr  nrmse = 0.0653 - Ts  nrmse = 0.0740
Fro err: 4.143
LOSSES: log-det: -8.148e+02 - data-fit: 1.348e+05 - trace: 4.255e+01 - const: 9.189e+02 - tot: 1.350e+05
Epoch 3 ( 2.61s) - Sigma 7.212 - Penalty 2.19e-01 - Tr  nrmse = 0.0663 - Ts  nrmse = 0.0754
Fro err: 4.299
LOSSES: log-det: -7.209e+02 - data-fit: 1.170e+05 - trace: 3.755e+01 - const: 9.189e+02 - tot: 1.173e+05
Epoch 4 ( 3.26s) - Sigma 7.024 - Penalty 2.65e-

KeyboardInterrupt: 

In [97]:
trans = PositiveTransform(1e-8)

In [98]:
trans._inverse(torch.tensor(0.11e-7))

tensor(-20.7233)

In [94]:
trans(torch.tensor(1e-8))

tensor(0.6931)

In [26]:
# Final good (log(deff / variance)*n - datafit/ variance - trace)
# l=1, m=10 => good results
# l=15, m=10 => ok, 0.0133
# l=15, m=50 => 0.0121, deff not maxed
res_flk = train_sgpr_like(opt_model="Falkon", penalty_init=torch.tensor(1e-1), sigma_init=sigma_init,
                          centers_init=centers_init, only_trace=train_only_trace, lr=learning_rate*1,
                          epochs=num_epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)

LOSSES: d-eff: -4.573e+03 - data-fit: 7.384e+02 - trace: 3.130e+02 - tot: -3.521e+03
Epoch 0 ( 0.54s) - Sigma 3.100 - Penalty 1.11e-01 - Tr  nrmse = 0.4185 - Ts  nrmse = 0.4138
Fro err: 16.540
LOSSES: d-eff: -4.473e+03 - data-fit: 6.468e+02 - trace: 2.124e+02 - tot: -3.613e+03
Epoch 1 ( 1.24s) - Sigma 3.197 - Penalty 1.03e-01 - Tr  nrmse = 0.4118 - Ts  nrmse = 0.4085
Fro err: 12.642
LOSSES: d-eff: -4.544e+03 - data-fit: 6.707e+02 - trace: 1.806e+02 - tot: -3.693e+03
Epoch 2 ( 2.01s) - Sigma 3.291 - Penalty 9.51e-02 - Tr  nrmse = 0.4073 - Ts  nrmse = 0.4054
Fro err: 10.328
LOSSES: d-eff: -4.630e+03 - data-fit: 7.128e+02 - trace: 1.605e+02 - tot: -3.757e+03
Epoch 3 ( 2.71s) - Sigma 3.383 - Penalty 8.71e-02 - Tr  nrmse = 0.4045 - Ts  nrmse = 0.4037
Fro err: 8.865
LOSSES: d-eff: -4.721e+03 - data-fit: 7.676e+02 - trace: 1.477e+02 - tot: -3.806e+03
Epoch 4 ( 3.39s) - Sigma 3.473 - Penalty 7.97e-02 - Tr  nrmse = 0.4023 - Ts  nrmse = 0.4025
Fro err: 7.813
LOSSES: d-eff: -4.812e+03 - data-fit:

In [277]:
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
title = get_title("Falkon", M, train_only_trace, dset_name)
opt_plot(res_flk, M, title, figax=(fig, ax[0]))
title = get_title("SGPR", M, train_only_trace, dset_name)
opt_plot(res_sgpr, M, title, figax=(fig, ax[1]))
fig.tight_layout();

<IPython.core.display.Javascript object>

In [24]:
def sgpr_like_multidset(M, only_trace, init_penalty=10, lr=0.1, epochs=30, dsets=("cpusmall", "space_ga", "cadata", "svmguide1"), dtype=torch.float64):
    results_flk, results_sgpr = {}, {}
    for dset_name in dsets:
        Xtr, Ytr, Xts, Yts, penalty_init, sigma_init, kernel = load_dset(
            dset_name, penalty=init_penalty, dtype=dtype)
        centers_init = torch.randn(M, Xtr.shape[1], dtype=Xtr.dtype)
        res_flk = train_sgpr_like(opt_model="Falkon", penalty_init=penalty_init, sigma_init=sigma_init,
                                  centers_init=centers_init, only_trace=only_trace, lr=lr,
                                  epochs=epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)
        res_sgpr = train_sgpr_like(opt_model="SGPR", penalty_init=penalty_init, sigma_init=sigma_init,
                                   centers_init=centers_init, only_trace=only_trace, lr=lr,
                                   epochs=epochs, Xtr=Xtr, Ytr=Ytr, Xts=Xts, Yts=Yts, kernel=kernel)
        results_flk[dset_name] = res_flk
        results_sgpr[dset_name]= res_sgpr
    return results_flk, results_sgpr
def plot_multidset(res_dict, M, only_trace):
    ncols = len(res_dict)
    key = list(res_dict.keys())[0]
    nrows = len(res_dict[key])
    
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(nrows * 2.5, ncols * 6))

    for i, (res_name, res) in enumerate(res_dict.items()):
        for j, key in enumerate(res.keys()):
            title = get_title(res_name, M, only_trace, key)
            opt_plot(res[key], M, title, figax=(fig, ax[j, i]), legend=(j==0 and i == 0))
    fig.tight_layout();
    return fig, ax

In [38]:
flk_100_l10, sgpr_100_l1 = sgpr_like_multidset(
    M=100,
    only_trace=False,
    init_penalty=1,
)

Dataset: cpusmall
torch.Size([8192, 12]) torch.Size([8192, 1])
VALUE        d_eff -5.897e+03 - loss -6.451e+03 - trace -1.930e-02. tot -1.235e+04
Epoch 0 ( 0.30s) - Sigma 8.100 - Penalty 4.07e-01 - Tr  nrmse = 0.3738 - Ts  nrmse = 0.3722
Fro err: 3.879
VALUE        d_eff -6.047e+03 - loss -6.199e+03 - trace -1.470e-02. tot -1.225e+04
Epoch 1 ( 0.69s) - Sigma 8.199 - Penalty 4.49e-01 - Tr  nrmse = 0.3927 - Ts  nrmse = 0.3913
Fro err: 3.544
VALUE        d_eff -6.200e+03 - loss -5.949e+03 - trace -1.182e-02. tot -1.215e+04
Epoch 2 ( 0.99s) - Sigma 8.298 - Penalty 4.97e-01 - Tr  nrmse = 0.4125 - Ts  nrmse = 0.4112
Fro err: 3.273
VALUE        d_eff -6.354e+03 - loss -5.701e+03 - trace -9.675e-03. tot -1.205e+04
Epoch 3 ( 1.26s) - Sigma 8.395 - Penalty 5.49e-01 - Tr  nrmse = 0.4331 - Ts  nrmse = 0.4319
Fro err: 3.026
VALUE        d_eff -6.508e+03 - loss -5.454e+03 - trace -7.972e-03. tot -1.196e+04
Epoch 4 ( 1.57s) - Sigma 8.491 - Penalty 6.06e-01 - Tr  nrmse = 0.4543 - Ts  nrmse = 0.4533
Fr

Fro err: 2.466
VALUE        d_eff -3.600e+03 - loss -1.686e+03 - trace -2.592e-03. tot -5.287e+03
Epoch 13 ( 4.04s) - Sigma 9.271 - Penalty 1.48e+00 - Tr  nrmse = 0.6565 - Ts  nrmse = 0.6559
Fro err: 2.388
VALUE        d_eff -3.649e+03 - loss -1.585e+03 - trace -2.264e-03. tot -5.234e+03
Epoch 14 ( 4.34s) - Sigma 9.343 - Penalty 1.62e+00 - Tr  nrmse = 0.6776 - Ts  nrmse = 0.6770
Fro err: 2.317
VALUE        d_eff -3.697e+03 - loss -1.488e+03 - trace -1.984e-03. tot -5.185e+03
Epoch 15 ( 4.72s) - Sigma 9.412 - Penalty 1.79e+00 - Tr  nrmse = 0.6979 - Ts  nrmse = 0.6973
Fro err: 2.253
VALUE        d_eff -3.744e+03 - loss -1.396e+03 - trace -1.744e-03. tot -5.140e+03
Epoch 16 ( 4.96s) - Sigma 9.478 - Penalty 1.96e+00 - Tr  nrmse = 0.7174 - Ts  nrmse = 0.7167
Fro err: 2.193
VALUE        d_eff -3.790e+03 - loss -1.308e+03 - trace -1.539e-03. tot -5.098e+03
Epoch 17 ( 5.20s) - Sigma 9.540 - Penalty 2.15e+00 - Tr  nrmse = 0.7359 - Ts  nrmse = 0.7352
Fro err: 2.138
VALUE        d_eff -3.836e+03 

KeyboardInterrupt: 

In [37]:
plot_multidset({"Falkon": flk_100_l10, "SGPR": sgpr_100_l10}, M=100, only_trace=False)

<IPython.core.display.Javascript object>

(<Figure size 1000x1200 with 16 Axes>,
 array([[<AxesSubplot:title={'center':'cpusmall - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'cpusmall - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'space_ga - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'space_ga - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'cadata - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'cadata - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'svmguide1 - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'svmguide1 - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>]],
       dtype=object))

In [33]:
plot_multidset({"Falkon": flk_100, "SGPR": sgpr_100}, M=100, only_trace=False)

<IPython.core.display.Javascript object>

(<Figure size 1000x1200 with 16 Axes>,
 array([[<AxesSubplot:title={'center':'cpusmall - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'cpusmall - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'space_ga - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'space_ga - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'cadata - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'cadata - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>],
        [<AxesSubplot:title={'center':'svmguide1 - Falkon - M=100'}, xlabel='Epoch', ylabel='MSE'>,
         <AxesSubplot:title={'center':'svmguide1 - SGPR - M=100'}, xlabel='Epoch', ylabel='MSE'>]],
       dtype=object))

In [31]:
results_flk = results_100_flk
results_sgpr = results_100_sgpr
M = 100
fig, ax = plt.subplots(nrows=len(results_flk), ncols=2, figsize=(10, 12))

for i, key in enumerate(results_flk.keys()):
    title_flk = get_title("Falkon", M, train_only_trace, key)
    opt_plot(results_flk[key], M, title_flk, figax=(fig, ax[i,0]), legend=(i==0))
    
    title_sgpr = get_title("SGPR", M, train_only_trace, key)
    opt_plot(results_sgpr[key], M, title_sgpr, figax=(fig, ax[i,1]), legend=False)
fig.tight_layout();

<IPython.core.display.Javascript object>

In [22]:
results_flk = results_20_flk
results_sgpr = results_20_sgpr
M = 20
fig, ax = plt.subplots(nrows=len(results_flk), ncols=2, figsize=(10, 12))

for i, key in enumerate(results_flk.keys()):
    title_flk = get_title("Falkon", M, train_only_trace, key)
    opt_plot(results_flk[key], M, title_flk, figax=(fig, ax[i,0]), legend=(i==0))
    
    title_sgpr = get_title("SGPR", M, train_only_trace, key)
    opt_plot(results_sgpr[key], M, title_sgpr, figax=(fig, ax[i,1]), legend=False)

fig.tight_layout();

<IPython.core.display.Javascript object>