In [1]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process.kernels import Matern, RBF

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from doc.mse_estimator import ErrorComparer
from doc.data_generation import gen_rbf_X, gen_matern_X, create_clus_split, gen_cov_mat
from doc.plotting_utils import gen_model_barplots
from spe.estimators import kfoldcv, kmeanscv, better_test_est_split, cp_smoother_train_test, by_spatial 
from spe.smoothers import LinearRegression, BSplineRegressor

In [2]:
np.random.seed(1)

In [3]:
## number of realizations to run
niter = 100

## data generation parameters
n=10**2
p=5
s=5
delta = 0.75
snr = 0.4
tr_frac = .25

noise_kernel = 'matern'
noise_length_scale = 1.
noise_nu = .5

X_kernel = 'matern'
X_length_scale = 5.
X_nu = 2.5

## ErrorComparer parameters
nboot = 100
k = 5
models = [LinearRegression(fit_intercept=False)]
ests = [
    better_test_est_split,
    cp_smoother_train_test,
    # by_spatial,
    kfoldcv, 
    kmeanscv
]
est_kwargs = [
    {},
    {},
    {'k': k},
    {'k': k}
]

## plot parameters
title = "Repeated Measurements Model"
model_names = ["Linear Regression"]
est_names = ["GenCp", "KFCV", "SPCV"]

In [4]:
err_cmp = ErrorComparer()

In [5]:
nx = ny = int(np.sqrt(n))
xs = np.linspace(0, 10, nx)
ys = np.linspace(0, 10, ny)
c_x, c_y = np.meshgrid(xs, ys)
c_x = c_x.flatten()
c_y = c_y.flatten()
coord = np.stack([c_x, c_y]).T

In [6]:
if noise_kernel == 'rbf':
    Sigma_t = gen_cov_mat(c_x, c_y, RBF(length_scale=noise_length_scale))
elif noise_kernel == 'matern':
    Sigma_t = gen_cov_mat(c_x, c_y, Matern(length_scale=noise_length_scale, nu=noise_nu))
else:
    Sigma_t = np.eye(n)
    
Cov_st = delta*Sigma_t
Sigma_t = delta*Sigma_t + (1-delta)*np.eye(n)

if noise_kernel == 'rbf' or noise_kernel == 'matern':
    Chol_t = np.linalg.cholesky(Sigma_t)
else:
    Chol_t = np.eye(n)

In [7]:
if X_kernel == 'rbf':
    X = gen_rbf_X(c_x, c_y, p)
elif X_kernel == 'matern':
    X = gen_matern_X(c_x, c_y, p, length_scale=X_length_scale, nu=X_nu)
else:
    X = np.random.randn(n,p)

beta = np.zeros(p)
idx = np.random.choice(p,size=s,replace=False)
beta[idx] = np.random.uniform(-1,1,size=s)
# beta[idx] = np.random.uniform(1,3,size=s) * np.random.choice([-1,1],size=s,replace=True)

In [8]:
# tr_idx = create_clus_split(
#             int(np.sqrt(n)), int(np.sqrt(n)), tr_frac
#         )
tr_idx = np.ones(n, dtype=bool)

# Simulate $Y, Y^* \overset{iid}{\sim} \mathcal{N}(\mu, \Sigma_Y)$

In [9]:
model_errs = []

for model in models:
    errs = err_cmp.compare(model,
                         ests,
                         est_kwargs,
                         niter=niter,
                         n=n,
                         p=p,
                         s=s,
                         snr=snr, 
                         X=X,
                         beta=beta,
                         coord=coord,
                         Chol_y=Chol_t,
                         Chol_ystar=None,
                         Cov_y_ystar=None,
                         tr_idx=tr_idx,
                         fair=False,
                        est_sigma=False,
                        # est_sigma_model=model,
                         )
    model_errs.append(errs)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:01<00:00, 54.55it/s]


In [10]:
fig = gen_model_barplots(model_errs, model_names, est_names, title)
fig.show()