In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.cluster import KMeans
from sklearn.gaussian_process.kernels import RBF

import plotly.express as px

import plotly.graph_objects as go

from scipy.linalg import toeplitz, block_diag

from mse_estimator import ErrorComparer
from data_generation import gen_cov_mat, gen_rbf_X, create_clus_split
from spe.relaxed_lasso import RelaxedLasso
from spe.estimators import better_test_est_split, kfoldcv, kmeanscv, cp_rf_train_test

In [2]:
np.random.seed(1)

In [3]:
err_cmp = ErrorComparer()

In [4]:
niter = 100

n=50**2
p=5
s=5 

k=10

snr = 0.4
tr_frac=.2

In [5]:
nx = ny = int(np.sqrt(n))
xs = np.linspace(0, 100, nx)
ys = np.linspace(0, 100, ny)
c_x, c_y = np.meshgrid(xs, ys)
c_x = c_x.flatten()
c_y = c_y.flatten()

In [6]:
## noise covariance matrix
Sigma_t = gen_cov_mat(c_x, c_y, RBF(length_scale=1.))
Chol_t = np.linalg.cholesky(Sigma_t + np.eye(n))

In [7]:
X = gen_rbf_X(c_x, c_y, p)

In [None]:
(test_err,
 kfcv_err,
 spcv_err,
 lin_err) = err_cmp.compare(
            RelaxedLasso(fit_intercept=False),
            [better_test_est_split, kfoldcv, kmeanscv, cp_linear_train_test],
            [{}, {"k": k}, {"k": k}, {}],
            niter=niter,
            n=n,
            p=p,
            s=s,
            snr=snr,
            X=X,
            beta=None,
            coord=np.stack([c_x, c_y]).T,
            Chol_t=Chol_t,
            tr_frac=tr_frac,
            friedman_mu=True,
            **{},
        )
    

In [None]:
risk = test_err.mean()
risk_kfcv = kfcv_err.mean()
risk_spcv = spcv_err.mean()
risk_lin = lin_err.mean()
risk, risk_kfcv, risk_spcv, risk_lin

In [None]:
df = pd.DataFrame({'GenCp': (lin_err.T),# - test_err.T) / test_err.T,
                   'KFCV': (kfcv_err.T),# - test_err.T) / test_err.T,
                   'SPCV': (spcv_err.T)})# - test_err.T) / test_err.T})

In [None]:
(df/risk).mean()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
    name='Control',
    x=['GenCp', 'KFCV', 'SPCV'], y=(df/risk).mean(),
    marker_color=px.colors.qualitative.Plotly,
    text=np.around((df/risk).mean(),3),
    textposition='outside',
    error_y=dict(
        type='data',
        color='black',
        symmetric=False,
        array=(df/risk).quantile(.75),
        arrayminus=(df/risk).quantile(.25))
))
fig.add_hline(y=1., line_color='red')
fig.update_layout(
    title="Linear Regression Test Error Estimates",
    xaxis_title="Method",
    yaxis_title="Relative MSE",
)
fig.show()