In [17]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.cluster import KMeans
from sklearn.ensemble import BaggingRegressor

import plotly.express as px

import plotly.graph_objects as go

from scipy.linalg import toeplitz, block_diag

from spe.mse_estimator import ErrorComparer, gen_rbf_X, create_clus_split
from spe.relaxed_lasso import RelaxedLasso, BaggedRelaxedLasso

In [18]:
err_cmp = ErrorComparer()

In [34]:
niter = 200

n=20**2
p=200
s=30

k=3
n_estimators=100
lambd=.45

base_estimator=RelaxedLasso(lambd=lambd, fit_intercept=False)

In [35]:
nx = ny = int(np.sqrt(n))
xs = np.linspace(0, 10, nx)
ys = np.linspace(0, 10, ny)
c_x, c_y = np.meshgrid(xs, ys)
c_x = c_x.flatten()
c_y = c_y.flatten()

In [36]:
rho = 0.5
delta = 0.8

D = np.abs(c_x - c_x[:,None]) + np.abs(c_y - c_y[:,None])
Sigma_t = rho**D
Sigma_t = delta*Sigma_t + (1-delta) * np.eye(n)

# m = 20
# c = np.zeros(n)
# c[:m] = [rho**i for i in np.arange(m)]
# Sigma_t = toeplitz(c)

# b = n / m
# c = [rho**i for i in np.arange(m)] 
# Sigma_t = block_diag(*[toeplitz(c) for _ in np.arange(b)])

Chol_t = np.linalg.cholesky(Sigma_t)
# Sigma_t = np.eye(n)
# Chol_t = np.eye(n)

In [37]:
# c_x = np.random.uniform(0,10,size=n)
# c_y = np.random.uniform(0,10,size=n)

# rn = int(np.sqrt(n))
# ctr_x = np.random.uniform(0,10,size=rn)
# ctr_y = np.random.uniform(0,10,size=rn)
# c_x = np.concatenate([c + .35 * np.random.randn(rn) for c in ctr_x])
# c_y = np.concatenate([c + .35 * np.random.randn(rn) for c in ctr_y])

X = gen_rbf_X(c_x, c_y, p)

beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

In [38]:
# xv, yv = np.meshgrid(np.arange(nx), np.arange(ny))
# pts = np.stack([xv.ravel(), yv.ravel()]).T
# n = nx*ny
# rn = int(np.sqrt(n))
# ctr = np.random.choice(pts.shape[0], size=rn, replace=True)
# ctr = pts[ctr]
# tr_idx = np.vstack([[pt + np.array((1.25*np.random.randn(2)).astype(int)) for _ in np.arange(rn)] for pt in ctr])
# tr_idx = np.maximum(0, tr_idx)
# tr_idx[:,0] = cx = np.minimum(nx-1, tr_idx[:,0])
# tr_idx[:,1] = cy = np.minimum(ny-1, tr_idx[:,1])
# tr_idx = np.unique(np.ravel_multi_index(tr_idx.T, (nx,ny)))
# tr_idx.shape

In [39]:
(test_err,
 kfcv_err,
 spcv_err,
 brel_err) = err_cmp.compareBaggedTrTs(base_estimator=base_estimator,
                                       n=n,
                                       p=p,
                                       X=X,
                                       beta=beta,
                                       coord=np.stack([c_x, c_y]).T,
                                       Chol_t=Chol_t,
                                       niter=niter,
                                       n_estimators=n_estimators,
                                       k=k)
    

0
0.245


KeyboardInterrupt: 

In [25]:
%debug

ERROR:root:No traceback has been produced, nothing to debug.


In [26]:
# groups = KMeans(n_clusters=10).fit(tr_idx).labels_

In [27]:
# px.scatter(pd.DataFrame({'x': tr_idx[:,0], 'y': tr_idx[:,1], 'g': groups}), x='x', y='y', color='g')

In [28]:
risk = test_err.mean()
risk_kfcv = kfcv_err.mean()
risk_spcv = spcv_err.mean()
# risk_rela = rela_err.mean()
risk_brel = brel_err.mean()
risk, risk_kfcv, risk_spcv, risk_brel

(73723906183.32756, 4905.566980741616, 481245915127.4649, 42757508137.69975)

In [29]:
df = pd.DataFrame({'GenCp': (brel_err.T),# - test_err.T) / test_err.T,
                   'KFCV': (kfcv_err.T),# - test_err.T) / test_err.T,
                   'SPCV': (spcv_err.T)})# - test_err.T) / test_err.T})

In [30]:
fig_rela = px.box((df - risk) / risk, 
             labels={
                     "variable": "Method",
                     "value": "Relative MSE"
                     },
             title="lin correction",
             points=False)
fig_rela.update_traces(boxmean=True)

fig_rela.add_hline(y=0., line_color='red')

In [31]:
fig = go.Figure(data=go.Scatter(
        x=[0, 1, 2],
        y=((df - risk)/risk).mean(),
        error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=((df - risk)/risk).std(),
            visible=True)
    ))
fig.show()

In [32]:
fig = go.Figure()
fig.add_trace(go.Bar(
    name='Control',
    x=['GenCp', 'KFCV', 'SPCV'], y=(df/risk).mean(),
    marker_color=px.colors.qualitative.Plotly,
    text=np.around((df/risk).mean(),3),
    textposition='outside',
    error_y=dict(
        type='data',
        color='black',
        symmetric=False,
        array=(df/risk).quantile(.9),
        arrayminus=(df/risk).quantile(.1))
))
# fig.add_trace(go.Bar(
#     name='Experimental',
#     x=['GenCp', 'KFCV', 'SPCV'], y=(df/risk).mean(),
#     error_y=dict(type='data', array=[1, 2])
# ))
# fig.update_layout(barmode='group')
fig.add_hline(y=1., line_color='red')
fig.update_layout(
    title="Bagged Relaxed Lasso Test Error Estimates",
    xaxis_title="Method",
    yaxis_title="Relative MSE",
#     legend_title="Legend Title",
#     font=dict(
#         family="Courier New, monospace",
#         size=18,
#         color="RebeccaPurple"
#     )
)
fig.show()

In [33]:
kfcv_err

array([4.28691286e+01, 7.50622090e+01, 7.83861295e+02, 3.72502893e+00,
       3.25491688e+02, 1.80114246e+02, 2.11572447e+01, 9.70348609e+00,
       1.26759615e+02, 5.00442809e+02, 6.07257265e+01, 3.93339286e+02,
       1.47354867e+03, 1.09445433e+00, 3.24362881e+03, 1.82038853e+03,
       4.12633299e+01, 7.14094981e+01, 9.38131513e+02, 9.92172990e+00,
       6.35720597e+01, 1.05756169e+02, 8.61504028e+00, 2.68738899e+01,
       3.94925526e+02, 7.95054201e+02, 1.90294294e+02, 1.74943609e+02,
       3.21518869e+03, 6.71950449e+01, 7.02318735e+01, 2.43979053e+01,
       7.22171048e+02, 1.58736990e+02, 7.42509437e-01, 6.53931418e+03,
       1.29226941e+02, 8.36731491e+00, 2.33862642e+02, 1.03823536e+01,
       3.17355017e+03, 8.28596369e+00, 1.04676797e+02, 3.04027694e+01,
       2.26324213e+02, 6.98939248e+01, 1.70493742e+03, 1.54474362e+02,
       2.40552367e+04, 5.09774999e+01, 2.34216634e+01, 4.29220674e+01,
       1.09785013e+02, 2.31902433e+01, 6.61115706e+00, 3.29674046e+01,
      