In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression

import plotly.express as px
import plotly.graph_objects as go

from scipy.linalg import toeplitz

from spe.mse_estimator import ErrorComparer

In [2]:
err_cmp = ErrorComparer()

In [9]:
niter = 100

n=100
p=100
s=20

rho = 0.5
c = np.zeros(n)
c[:20] = [rho**i for i in np.arange(20)] 
Sigma_t = toeplitz(c)
# Chol_t = np.linalg.cholesky(Sigma_t)
Chol_t = None

# Theta_p = np.linalg.inv(Chol_t @ Chol_t.T)
Theta_p = np.linalg.inv(Sigma_t)
# Theta_p = None
# Theta_p = np.diag(np.random.choice([0,1],size=n,replace=True))



# alphas = [0.05, 0.1, 0.2, 0.5, 0.8, 1.]
alphas = [0.05, 0.5, 1.]
na = len(alphas)

In [10]:
test_err = np.zeros((na, niter))
test_err_alpha = np.zeros((na, niter))
cb_err = np.zeros((na, niter))
blur_err = np.zeros((na, niter))

# X = np.random.randn(n,p)
X = np.eye(n)
beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)
# snr=1.

for i,alpha in enumerate(alphas):
    print(i)
    (test_err[i,:],
     test_err_alpha[i,:],
     cb_err[i,:],
     blur_err[i,:]) = err_cmp.compareBlurLinear(n=n,
                                         p=p,
                                         X=X,
                                         beta=beta,
#                                                 snr=snr,
                                         Chol_t=Chol_t,
                                         Theta_p=Theta_p,
                                         model=LinearRegression(fit_intercept=False),
                                         alpha=alpha,
                                         niter=niter,
                                         est_risk=False)
    

0
1
2


In [11]:
risk = test_err.mean(axis=1)
risk_alpha = test_err_alpha.mean(axis=1)
risk_cb = cb_err.mean(axis=1)
risk_blur = blur_err.mean(axis=1)
risk, risk_alpha, risk_cb, risk_blur

(array([3.34299241, 3.35571575, 3.30590215]),
 array([0., 0., 0.]),
 array([0., 0., 0.]),
 array([3.21153195, 3.32687961, 3.32076459]))

In [15]:
cb_df = pd.DataFrame(cb_err.T)
blur_df = pd.DataFrame((blur_err.T))# - test_err.T) / test_err.T)

In [13]:
fig_cb = px.box(cb_df, 
             labels={
                     "variable": "Alpha",
                     "value": "Risk"
                     },
             title="Coupled Bootstrap")
fig_cb.update_traces(boxmean=True)
fig_cb.add_trace(go.Scatter(x=[-0,na], 
                         y=[test_err.mean(),test_err.mean()], 
                         mode='lines', 
                         name='err'))
fig_cb.add_trace(go.Scatter(x=np.arange(na), 
                         y=risk_alpha, 
                         mode='markers', 
                         name='err_alpha'))
fig_cb.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)

In [17]:
fig_blur = px.box((blur_df - test_err.mean(1)*0) / test_err.mean(1), 
             labels={
                     "variable": "Alpha",
                     "value": "Risk"
                     },
             title="Blurred")
fig_blur.update_traces(boxmean=True)
# fig_blur.add_trace(go.Scatter(x=[0,na], 
#                          y=[test_err.mean(),test_err.mean()], 
#                          mode='lines', 
#                          name='err'))

fig_blur.add_hline(y=1., line_color='red')

# fig_blur.add_trace(go.Scatter(x=np.arange(na), 
#                          y=risk, 
#                          mode='markers', 
#                          name='err'))
fig_blur.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)