In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.cluster import KMeans
from sklearn.gaussian_process.kernels import Matern, RBF
from sklearn.ensemble import RandomForestRegressor

import plotly.express as px

import plotly.graph_objects as go

from scipy.linalg import toeplitz, block_diag

from spe.mse_estimator import ErrorComparer
from spe.data_generation import gen_rbf_X, gen_matern_X, create_clus_split, gen_cov_mat
from spe.forest import BlurredForest
from spe.estimators import kfoldcv, kmeanscv, better_test_est_split, cp_rf_train_test, cp_general_train_test, bag_kfoldcv, bag_kmeanscv

import os

In [2]:
niter = 100

n=30**2
p=30
s=30

k=10

delta = 0.8

alpha = .05
nboot = 100

n_estimators = 100
max_depth = 6

kernel = 'matern'
length_scale = 1
nu = 0.5

idx = -1

savedir='~'

In [3]:
# Parameters
niter = 100
n = 900
p = 30
s = 30
k = 10
delta = 0.5
alpha = 0.05
nboot = 100
kernel = "matern"
length_scale = 1.0
nu = 2.5
savedir = "/Users/kevinfry/Documents/GitHub/StructuredPredictionError/papermill_templates/output/RF/RF_our_vs_CV_alpha05/run_1/"
idx = 1


In [4]:
noise_kernel = kernel
X_kernel = kernel

noise_length_scale = length_scale
X_length_scale = length_scale

noise_nu = nu
X_nu = nu

In [5]:
models = RandomForestRegressor(n_estimators=n_estimators, 
                                max_depth=max_depth)

ests = [better_test_est_split,
        cp_general_train_test, 
        cp_general_train_test, 
        kfoldcv, 
        kmeanscv]

est_kwargs = [{'w_fit': False},
              {'use_trace_corr': False, 'nboot': nboot}, 
              {'use_trace_corr': True, 'nboot': nboot},
              {'k': k},
              {'k': k}]

In [6]:
if not os.path.exists(os.path.expanduser(savedir)):
    os.makedirs(os.path.expanduser(savedir))

params = pd.DataFrame({'niter': niter,
                       'n': n, 
                       'p': p, 
                       's': s,
                       'k': k,
                       'n_estimators': n_estimators,
                       'max_depth': max_depth,
                       'alpha': alpha,
                       'delta': delta,
                       'nboot': nboot,
                       'nk': noise_kernel, 
                       'nls': noise_length_scale, 
                       'nn': noise_nu, 
                       'xk': X_kernel,
                       'xls': X_length_scale,
                       'xn': X_nu}, index=[idx])
params.to_csv(os.path.expanduser(savedir + 'params.csv'))
dffp = os.path.expanduser(savedir + "err_df.csv")
barfp = os.path.expanduser(savedir + 'barchart.jpeg')

In [7]:
err_cmp = ErrorComparer()

In [8]:
nx = ny = int(np.sqrt(n))
xs = np.linspace(0, 10, nx)
ys = np.linspace(0, 10, ny)
c_x, c_y = np.meshgrid(xs, ys)
c_x = c_x.flatten()
c_y = c_y.flatten()
coord = np.stack([c_x, c_y]).T

In [9]:
if noise_kernel == 'rbf':
    Sigma_t = gen_cov_mat(c_x, c_y, RBF(length_scale=noise_length_scale))
elif noise_kernel == 'matern':
    Sigma_t = gen_cov_mat(c_x, c_y, Matern(length_scale=noise_length_scale, nu=noise_nu))
else:
    Sigma_t = np.eye(n)
    
Sigma_t = delta*Sigma_t + (1-delta)*np.eye(n)

if noise_kernel == 'rbf' or noise_kernel == 'matern':
    Chol_t = np.linalg.cholesky(Sigma_t)
else:
    Chol_t = np.eye(n)

In [10]:
if X_kernel == 'rbf':
    X = gen_rbf_X(c_x, c_y, p)
elif X_kernel == 'matern':
    X = gen_matern_X(c_x, c_y, p, length_scale=X_length_scale, nu=X_nu)
else:
    X = np.random.randn(n,p)

beta = np.zeros(p)
idx = np.random.choice(p,size=s)
beta[idx] = np.random.uniform(-1,1,size=s)

In [11]:
(test_err,
 rnd_err,
 trc_err,
 kfcv_err,
 spcv_err) = err_cmp.compare(models,
                             ests,
                             est_kwargs,
                             niter=niter,
                             n=n,
                             p=p,
                             s=p,
                             snr=0.4, 
                             X=X,
                             beta=beta,
                             coord=coord,
                             Chol_t=Chol_t,
                             Chol_s=None,
                             tr_idx=None,
                             fair=False,
                             )

[RandomForestRegressor(max_depth=6), RandomForestRegressor(max_depth=6), RandomForestRegressor(max_depth=6), RandomForestRegressor(max_depth=6), RandomForestRegressor(max_depth=6)]
0
0.3788888888888889


10


20


30


40


50


60


70


80


KeyboardInterrupt: 

In [None]:
risk = test_err.mean()
risk_kfcv = kfcv_err.mean()
risk_spcv = spcv_err.mean()
risk_rnd = rnd_err.mean()
risk_trc = trc_err.mean()
# risk, risk_kfcv, risk_spcv, risk_lin

In [None]:
save_df = pd.DataFrame({'Rand': (rnd_err.T),
                        'Trace': (trc_err.T),
                        'KFCV': (kfcv_err.T),
                        'SPCV': (spcv_err.T),
                        'Test': (test_err.T)})
save_df.to_csv(dffp)

In [None]:
df = pd.DataFrame({'Rand': (rnd_err.T),
                   'Trace': (trc_err.T),
                   'KFCV': (kfcv_err.T),
                   'SPCV': (spcv_err.T)})

In [None]:
(df/risk).mean()

In [None]:
df.var(), (df/risk).var()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
    name='Control',
    x=['Rand', 'Trace', 'KFCV', 'SPCV'], y=(df/risk).mean(),
    marker_color=px.colors.qualitative.Plotly,
    text=np.around((df/risk).mean(),3),
    textposition='outside',
    error_y=dict(
        type='data',
        color='black',
        symmetric=False,
        array=(df/risk).quantile(.75),
        arrayminus=(df/risk).quantile(.25))
#         array=(df/risk).mean() + (df/risk).std(),
#         arrayminus=(df/risk).mean() - (df/risk).std())
))
# fig.add_trace(go.Bar(
#     name='Experimental',
#     x=['GenCp', 'KFCV', 'SPCV'], y=(df/risk).mean(),
#     error_y=dict(type='data', array=[1, 2])
# ))
# fig.update_layout(barmode='group')
fig.add_hline(y=1., line_color='red')
fig.update_layout(
    title=f"VRF: \u03B1={alpha}, \u03B4={delta}, kern={kernel}, ls={length_scale}, v={nu}",
    xaxis_title="Method",
    yaxis_title="Relative MSE",
#     legend_title="Legend Title",
#     font=dict(
#         family="Courier New, monospace",
#         size=18,
#         color="RebeccaPurple"
#     )
)
fig.write_image(os.path.expanduser(barfp))
fig.show()