In [1]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

from spe.mse_estimator import ErrorComparer
from spe.forest import BlurredForest

In [2]:
err_cmp = ErrorComparer()

In [19]:
alphas = [0.05, 0.1, 0.2, 0.5, 0.8, 1.]
na = len(alphas)
niter = 100

test_err = np.zeros((na, niter))
# tree_err = np.zeros((na, niter))
blur_err = np.zeros((na, niter))

n=100
p=30
s=10
X = np.random.randn(n,p)
beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

for i,alpha in enumerate(alphas):
    print(i)
    (test_err[i,:],
#      tree_err[i,:],
     blur_err[i,:]) = err_cmp.compareForestIID(n=n,
                                         p=p,
                                         X=X,
                                         beta=beta,
                                         model=BlurredForest(max_depth=4, 
                                                             max_features="sqrt",
                                                             n_estimators=100),
                                         alpha=alpha,
                                         niter=niter,
                                         use_expectation=True,
                                         rand_type='full',
                                         est_risk=False)
    

0
1
2
3
4
5


In [20]:
risk = test_err.mean(axis=1)

In [21]:
blur_df = pd.DataFrame(blur_err.T - test_err.T)

In [22]:
fig_blur = px.box(blur_df, 
                  labels={
                         "variable": "Alpha",
                         "value": "Risk"
                         },
                  points='all',
                  title=f"Blurred Full refit n={n}, p={p}")
fig_blur.update_traces(boxmean=True)
# fig_blur.add_trace(go.Scatter(x=[-1,na], 
#                          y=[test_err.mean(),test_err.mean()], 
#                          mode='lines', 
#                          name=f'err: {test_err.mean():.2f}'))
fig_blur.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)

In [23]:
alphas = [0.05, 0.1, 0.2, 0.5, 0.8, 1.]
na = len(alphas)
niter = 200

test_err = np.zeros((na, niter))
# test_err_alpha = np.zeros((na, niter))
# cb_err = np.zeros((na, niter))
blur_err = np.zeros((na, niter))

n=100
p=200
s=30
X = np.random.randn(n,p)
beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

for i,alpha in enumerate(alphas):
    print(i)
    (test_err[i,:],
#      test_err_alpha[i,:],
#      cb_err[i,:],
     blur_err[i,:]) = err_cmp.compareForestIID(n=n,
                                         p=p,
                                         X=X,
                                         beta=beta,
                                         model=BlurredForest(max_depth=4, 
                                                             max_features="sqrt",
                                                             n_estimators=100),
                                         alpha=alpha,
                                         niter=niter,
                                         use_expectation=False,
                                         rand_type='full',
                                         est_risk=False)
    

0
1
2
3
4
5


In [24]:
risk = test_err.mean(axis=1)

In [25]:
blur_df = pd.DataFrame(blur_err.T - test_err.T)

In [26]:
fig_blur = px.box(blur_df, 
                  labels={
                         "variable": "Alpha",
                         "value": "Risk"
                         },
                  points=False,
                  title=f"Blurred Full refit n={n}, p={p}")
fig_blur.update_traces(boxmean=True)
# fig_blur.add_trace(go.Scatter(x=[-1,na], 
#                          y=[test_err.mean(),test_err.mean()], 
#                          mode='lines', 
#                          name=f'err: {test_err.mean():.2f}'))
fig_blur.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)

In [27]:
alphas = [0.05, 0.1, 0.2, 0.5, 0.8, 1.]
na = len(alphas)
niter = 200

test_err_w = np.zeros((na, niter))
# test_err_alpha = np.zeros((na, niter))
# cb_err = np.zeros((na, niter))
blur_err_w = np.zeros((na, niter))

n=100
p=200
s=30
X = np.random.randn(n,p)
beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

for i,alpha in enumerate(alphas):
    print(i)
    (test_err_w[i,:],
#      test_err_alpha[i,:],
#      cb_err[i,:],
     blur_err_w[i,:]) = err_cmp.compareForestIID(n=n,
                                         p=p,
                                         X=X,
                                         beta=beta,
                                         model=BlurredForest(max_depth=4, 
                                                             max_features="sqrt",
                                                             n_estimators=100),
                                         alpha=alpha,
                                         niter=niter,
                                         use_expectation=True,
                                         rand_type='rand',
                                         est_risk=False)
    

0
1
2
3
4
5


In [28]:
risk_w = test_err_w.mean(axis=1)

In [29]:
blur_df_w = pd.DataFrame(blur_err_w.T - test_err_w.T)

In [30]:
fig_blur_w = px.box(blur_df_w, 
                  labels={
                         "variable": "Alpha",
                         "value": "Risk"
                         },
                  points=False,
                  title=f"Blurred W refit n={n}, p={p}")
fig_blur_w.update_traces(boxmean=True)
# fig_blur_w.add_trace(go.Scatter(x=[-1,na], 
#                          y=[test_err_w.mean(),test_err_w.mean()], 
#                          mode='lines', 
#                          name=f'err: {test_err_w.mean():.2f}'))
fig_blur_w.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)

In [31]:
alphas = [0.05, 0.1, 0.2, 0.5, 0.8, 1.]
na = len(alphas)
niter = 200

test_err_w = np.zeros((na, niter))
# test_err_alpha = np.zeros((na, niter))
# cb_err = np.zeros((na, niter))
blur_err_w = np.zeros((na, niter))

n=100
p=200
s=30
X = np.random.randn(n,p)
beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

for i,alpha in enumerate(alphas):
    print(i)
    (test_err_w[i,:],
#      test_err_alpha[i,:],
#      cb_err[i,:],
     blur_err_w[i,:]) = err_cmp.compareForestIID(n=n,
                                         p=p,
                                         X=X,
                                         beta=beta,
                                         model=BlurredForest(max_depth=4, 
                                                             max_features="sqrt",
                                                             n_estimators=100),
                                         alpha=alpha,
                                         niter=niter,
                                         use_expectation=False,
                                         rand_type='rand',
                                         est_risk=False)
    

0
1
2
3
4
5


In [32]:
risk_w = test_err_w.mean(axis=1)

In [33]:
blur_df_w = pd.DataFrame(blur_err_w.T - test_err_w.T)

In [34]:
fig_blur_w = px.box(blur_df_w, 
                  labels={
                         "variable": "Alpha",
                         "value": "Risk"
                         },
                  points=False,
                  title=f"Blurred W refit n={n}, p={p}")
fig_blur_w.update_traces(boxmean=True)
# fig_blur_w.add_trace(go.Scatter(x=[-1,na], 
#                          y=[test_err_w.mean(),test_err_w.mean()], 
#                          mode='lines', 
#                          name=f'err: {test_err_w.mean():.2f}'))
fig_blur_w.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(na),
        ticktext = alphas,
    )
)