In [1]:
import sys

sys.path.extend(['DeepGMM'])

from DeepGMM.methods.toy_model_selection_method import ToyModelSelectionMethod
from kernel import CategoryKernel, RBFKernel
from model import train_HSIC_IV, NonlinearModel, train_mse
import pandas as pd
from utils import med_sigma, to_torch, gen_data, fit_restart, gen_radial_fn
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import functools

In [2]:
np.random.seed(1)

n_rep = 8
n = 1000
num_basis = 10
data_limits = (-7, 7)

config_hsic = {'batch_size': 256, 'lr': 5e-2, 
               'max_epoch': 700, 'num_restart': 1}

config_mse = {'batch_size': 256, 'lr': 5e-2, 
              'max_epoch': 300}

f = gen_radial_fn(num_basis=num_basis, data_limits=data_limits)

In [3]:
# for instrument in ['Gaussian', 'Binary']:
for instrument in ['Gaussian', 'Binary']:
    res_df = None
    ret_df_vis = None
    # get a fix x_vis
    iv_type = 'mix_{}'.format(instrument)
    _, _, _, X_vis = gen_data(f, n, iv_type)

    alphas = np.linspace(0, 1, 5)

    for j in range(len(alphas)):
        alpha = alphas[j]

        def rep_function(i):
            X, Y, Z, _ = gen_data(f, n, iv_type, alpha=alpha)
            # dev set for DeepGMM
            X_dev, Y_dev, Z_dev, _ = gen_data(f, n, iv_type, alpha=alpha)
            # oracle set
            X_o, Y_o, Z_o, _ = gen_data(f, n, iv_type, alpha=alpha, oracle=True)
            X_test, _, _, _ = gen_data(f, X_vis.shape[0], iv_type, alpha=alpha)

            # Pure predictive
            mse_net = NonlinearModel(input_dim=1, 
                                      lr=config_mse['lr'],
                                      lmd=-99)

            mse_net = train_mse(mse_net, config_mse, X, Y, Z)
            y_hat_mse = mse_net(to_torch(X_test)).detach().numpy()

            oracle_net = train_mse(mse_net, config_mse, X_o, Y_o, Z_o)
            y_hat_oracle = mse_net(to_torch(X_test)).detach().numpy()

            # HSIC IV
            s_z = med_sigma(Z)
            kernel_e = RBFKernel(sigma=1)

            if instrument == 'Binary':
                kernel_z = CategoryKernel()
            else:
                kernel_z = RBFKernel(sigma=s_z)
            
            # non regularized HSIC IV
            hsic_net = NonlinearModel(input_dim=1, 
                                      lr=config_hsic['lr'],
                                      kernel_e=kernel_e,
                                      kernel_z=kernel_z,
                                      lmd=0)

            hsic_net.load_state_dict(mse_net)
            hsic_net = train_HSIC_IV(hsic_net, config_hsic, X, Y, Z, verbose=False)

            intercept_adjust = Y.mean() - hsic_net(to_torch(X)).mean()
            y_hat_hsic = intercept_adjust + hsic_net(to_torch(X_test))
            y_hat_hsic = y_hat_hsic.detach().numpy().copy()
            
            # regularized HSIC IV
            hsic_net = NonlinearModel(input_dim=1, 
                                      lr=config_hsic['lr'],
                                      kernel_e=kernel_e,
                                      kernel_z=kernel_z,
                                      lmd=5e-5)

            hsic_net.load_state_dict(mse_net)
            hsic_net = train_HSIC_IV(hsic_net, config_hsic, X, Y, Z, verbose=False)

            intercept_adjust = Y.mean() - hsic_net(to_torch(X)).mean()
            y_hat_hsic_pen = intercept_adjust + hsic_net(to_torch(X_test))
            y_hat_hsic_pen = y_hat_hsic_pen.detach().numpy().copy()

            # prepare data for DeepGMM
            dat = [X, Z, Y, X_dev, Z_dev, Y_dev]
            # to torch
            for k in range(len(dat)):
                dat[k] = to_torch(dat[k]).double()

            deepGMM = ToyModelSelectionMethod()
            deepGMM.fit(*dat, g_dev=None, verbose=True)
            y_hat_deepGMM = deepGMM.predict(to_torch(X_test).double()).flatten().detach().numpy()

            inner_df = pd.DataFrame()
            inner_df_vis = pd.DataFrame()

            inner_df['f_x'] = f(X_test)
            inner_df['Pred'] = y_hat_mse
            inner_df['HSIC-IV'] = y_hat_hsic
            inner_df['HSIC-AR'] = y_hat_hsic_pen
            inner_df['D-GMM'] = y_hat_deepGMM
            inner_df['Oracle'] = y_hat_oracle
            inner_df['alpha'] = alpha
            inner_df['run_id'] = i

            return inner_df
        
        ret_df = Parallel(n_jobs=4)(
            delayed(rep_function)(i=i) for i in range(n_rep))

        # ret_df = [rep_function(i) for i in range(n_rep)]

        ret_df = functools.reduce(lambda df1, df2: df1.append(df2, ignore_index=True), ret_df)

        if res_df is None:
            res_df = ret_df
        else:
            res_df = res_df.append(ret_df, ignore_index=True)

    melt_res_df = res_df.melt(id_vars=['f_x', 'alpha', 'run_id'], var_name='Method',
                              value_name='y_pred')
    melt_res_df['MISE'] = (melt_res_df['f_x'] - melt_res_df['y_pred']) ** 2
    final_df = melt_res_df.groupby(['Method', 'alpha', 'run_id'])['MISE'].mean().reset_index()
    final_df['alpha'] = np.round(final_df.alpha, 2)
    final_df.to_csv("results/compare_df_NN_ins_{}.csv".format(instrument),
                    index=False)
    sns.set(font_scale=1.4, style='white', palette=sns.set_palette("tab10"))

    g = sns.catplot(data=final_df, kind="point", log=True,
                    x='alpha', y='MISE', hue='Method',
                    markers=["o", "x", "d", "s", "v"], linestyles=[':', '--', '-', '-.', ':'],
                    capsize=.07, aspect=1.5, height=3.2, ci=95)
    g.fig.get_axes()[0].set_yscale('log')
    g._legend.remove()

    plt.xlabel(r'$\alpha$')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), 
               ncol=5, fancybox=True, shadow=False, prop={'size': 10})
    plt.savefig('results/compare_NN_ins_{}.pdf'.format(instrument),
                bbox_inches="tight")
    plt.close()


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU core

p-value: 0.9491878725332269
iteration 1, accept the test!
p-value: 0.7973788055822338
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: -0.0008624847958145626 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: -0.0003339656069959308 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: -0.0003749015649348753 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-0.584403, dev-loss=-0.661850, mean-recent-eval=-0.221306
iteration 100, dev-MSE=0.000000, train-loss=0.271197, dev-loss=0.277111, mean-recent-eval=-0.204337
iteration 200, dev-MSE=0.000000, train-loss=0.086104, dev-loss=0.095005, mean-recent-eval=-0.093789
iteration 3

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU

p-value: 0.8987982886495935
iteration 1, accept the test!
p-value: 0.8856752610415047
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: -0.000152941788921957 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: -0.000186436692880201 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: -0.00022527235791368195 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=1.096508, dev-loss=0.972433, mean-recent-eval=-0.042228
iteration 100, dev-MSE=0.000000, train-loss=0.000174, dev-loss=0.002517, mean-recent-eval=-0.005997
iteration 200, dev-MSE=0.000000, train-loss=0.001226, dev-loss=0.003522, mean-recent-eval=-0.000402
iteration 300,

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU 

p-value: 0.7871027341716591
iteration 1, accept the test!
p-value: 0.7277282544123775
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: -4.9698088391228647e-05 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: -0.00019463940916631333 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: -0.00013508531632816447 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.388351, dev-loss=0.372539, mean-recent-eval=-0.154714
iteration 100, dev-MSE=0.000000, train-loss=0.004729, dev-loss=0.002104, mean-recent-eval=-0.035392
iteration 200, dev-MSE=0.000000, train-loss=0.000488, dev-loss=-0.000293, mean-recent-eval=-0.000112
iteration

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



iteration 3300, dev-MSE=0.000000, train-loss=0.003842, dev-loss=-0.005849, mean-recent-eval=0.047642
iteration 3400, dev-MSE=0.000000, train-loss=0.003403, dev-loss=-0.006222, mean-recent-eval=0.047634
iteration 3500, dev-MSE=0.000000, train-loss=0.003415, dev-loss=-0.008689, mean-recent-eval=0.047578
iteration 3600, dev-MSE=0.000000, train-loss=0.004254, dev-loss=-0.000181, mean-recent-eval=0.047551
best iteration: 3280


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU availab

p-value: 0.9738824830035783
iteration 1, accept the test!
p-value: 0.8367527485277695
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: -0.0005543783576054621 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: -0.0005989390793960116 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: -0.0007222860306172411 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.041075, dev-loss=0.027373, mean-recent-eval=-0.001746
iteration 100, dev-MSE=0.000000, train-loss=0.014725, dev-loss=0.009169, mean-recent-eval=-0.000672
iteration 200, dev-MSE=0.000000, train-loss=0.010527, dev-loss=0.009957, mean-recent-eval=-0.000914
iteration 300

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



iteration 2800, dev-MSE=0.000000, train-loss=0.008588, dev-loss=-0.000436, mean-recent-eval=0.040833
iteration 2900, dev-MSE=0.000000, train-loss=0.008273, dev-loss=-0.000443, mean-recent-eval=0.040722
iteration 3000, dev-MSE=0.000000, train-loss=0.008469, dev-loss=-0.000421, mean-recent-eval=0.040413
iteration 3100, dev-MSE=0.000000, train-loss=0.008462, dev-loss=-0.000426, mean-recent-eval=0.040459
best iteration: 2760


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU core

p-value: 0.9583857261020582
iteration 1, accept the test!
p-value: 0.8789992895895922
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.0018222967259547505 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.003482763562217564 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.003466911865742205 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-0.616575, dev-loss=-0.600690, mean-recent-eval=-0.075143
iteration 100, dev-MSE=0.000000, train-loss=0.146404, dev-loss=0.133531, mean-recent-eval=-0.087276
iteration 200, dev-MSE=0.000000, train-loss=0.023714, dev-loss=0.025817, mean-recent-eval=-0.022646
iteration 300, d

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
IPU available: False, using: 0 IPUs
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU availa

p-value: 0.9935162281193584
iteration 1, accept the test!
p-value: 0.9050864775532997
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.03202152125471611 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.03130912863828343 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.020462903033106297 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-2.206342, dev-loss=-2.001995, mean-recent-eval=-0.175585
iteration 100, dev-MSE=0.000000, train-loss=0.350049, dev-loss=0.299371, mean-recent-eval=-0.193315
iteration 200, dev-MSE=0.000000, train-loss=0.077495, dev-loss=0.043871, mean-recent-eval=-0.000668
iteration 300, dev-

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not 

p-value: 0.7359668691807557
iteration 1, accept the test!
p-value: 0.6355771383948758
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.020969233584873133 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.020845683242439964 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.01608064728297983 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.422914, dev-loss=0.421512, mean-recent-eval=-0.032962
iteration 100, dev-MSE=0.000000, train-loss=0.060912, dev-loss=0.042246, mean-recent-eval=0.001107
iteration 200, dev-MSE=0.000000, train-loss=0.055069, dev-loss=0.035726, mean-recent-eval=0.004822
iteration 300, dev-MSE

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU availa


iteration 4600, dev-MSE=0.000000, train-loss=0.004579, dev-loss=0.000528, mean-recent-eval=0.029885
iteration 4700, dev-MSE=0.000000, train-loss=0.003621, dev-loss=0.010166, mean-recent-eval=0.029950
iteration 4800, dev-MSE=0.000000, train-loss=0.004087, dev-loss=0.002427, mean-recent-eval=0.030023
iteration 4900, dev-MSE=0.000000, train-loss=0.003658, dev-loss=0.005290, mean-recent-eval=0.030034
iteration 5000, dev-MSE=0.000000, train-loss=0.003237, dev-loss=0.012285, mean-recent-eval=0.030135
iteration 5100, dev-MSE=0.000000, train-loss=0.003356, dev-loss=0.007131, mean-recent-eval=0.030096
iteration 5200, dev-MSE=0.000000, train-loss=0.003910, dev-loss=0.001006, mean-recent-eval=0.030207
iteration 5300, dev-MSE=0.000000, train-loss=0.003224, dev-loss=0.006337, mean-recent-eval=0.030240
iteration 5400, dev-MSE=0.000000, train-loss=0.003139, dev-loss=0.010489, mean-recent-eval=0.030211
iteration 5500, dev-MSE=0.000000, train-loss=0.004632, dev-loss=-0.003605, mean-recent-eval=0.03035

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) 

p-value: 0.9944610431097385
iteration 1, accept the test!
p-value: 0.829127597085213
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.019081567754038582 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.01821704294103877 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.016879208384971055 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=1.522975, dev-loss=1.302034, mean-recent-eval=-0.001414
iteration 100, dev-MSE=0.000000, train-loss=0.153918, dev-loss=0.179193, mean-recent-eval=0.008774
iteration 200, dev-MSE=0.000000, train-loss=0.142161, dev-loss=0.164497, mean-recent-eval=0.011412
iteration 300, dev-MSE=

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


p-value: 0.9748746083294929
iteration 1, accept the test!
p-value: 0.8795372541996855
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.011353091912318525 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.012627235167457842 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.013341108616626733 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-0.353197, dev-loss=-0.284220, mean-recent-eval=0.001347
iteration 100, dev-MSE=0.000000, train-loss=0.244036, dev-loss=0.285732, mean-recent-eval=-0.000390
iteration 200, dev-MSE=0.000000, train-loss=0.207456, dev-loss=0.238471, mean-recent-eval=0.001647
iteration 300, dev-

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



iteration 3400, dev-MSE=0.000000, train-loss=0.005509, dev-loss=0.008128, mean-recent-eval=0.007656
iteration 3500, dev-MSE=0.000000, train-loss=0.003645, dev-loss=0.001186, mean-recent-eval=0.007840
iteration 3600, dev-MSE=0.000000, train-loss=0.003717, dev-loss=0.003760, mean-recent-eval=0.007904
iteration 3700, dev-MSE=0.000000, train-loss=0.002848, dev-loss=-0.000103, mean-recent-eval=0.007956
iteration 3800, dev-MSE=0.000000, train-loss=0.004000, dev-loss=0.004320, mean-recent-eval=0.007972
iteration 3900, dev-MSE=0.000000, train-loss=0.004117, dev-loss=0.002237, mean-recent-eval=0.008089
iteration 4000, dev-MSE=0.000000, train-loss=0.003366, dev-loss=0.002402, mean-recent-eval=0.008073
iteration 4100, dev-MSE=0.000000, train-loss=0.003762, dev-loss=0.003296, mean-recent-eval=0.008241
iteration 4200, dev-MSE=0.000000, train-loss=0.003421, dev-loss=0.003389, mean-recent-eval=0.008252
iteration 4300, dev-MSE=0.000000, train-loss=0.002856, dev-loss=0.003198, mean-recent-eval=0.00834

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



iteration 900, dev-MSE=0.000000, train-loss=0.004297, dev-loss=0.003298, mean-recent-eval=0.001604
iteration 1000, dev-MSE=0.000000, train-loss=0.004409, dev-loss=0.003491, mean-recent-eval=0.001631
iteration 1100, dev-MSE=0.000000, train-loss=0.004380, dev-loss=0.003528, mean-recent-eval=0.001643
iteration 1200, dev-MSE=0.000000, train-loss=0.003402, dev-loss=0.002949, mean-recent-eval=0.001698
iteration 1300, dev-MSE=0.000000, train-loss=0.003779, dev-loss=0.003449, mean-recent-eval=0.001763
iteration 1400, dev-MSE=0.000000, train-loss=0.004267, dev-loss=0.002940, mean-recent-eval=0.001762
iteration 1500, dev-MSE=0.000000, train-loss=0.004562, dev-loss=0.002337, mean-recent-eval=0.001832
iteration 1600, dev-MSE=0.000000, train-loss=0.003965, dev-loss=0.002652, mean-recent-eval=0.001868
iteration 1700, dev-MSE=0.000000, train-loss=0.003633, dev-loss=0.002921, mean-recent-eval=0.001909
iteration 1800, dev-MSE=0.000000, train-loss=0.003778, dev-loss=0.002946, mean-recent-eval=0.001940


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The number of training samples ({self.num_training_batches}) is small

p-value: 0.9593187184913574
iteration 1, accept the test!
p-value: 0.9569135492735874
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.020363230355783545 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.020197229602722022 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.018602877866458728 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.182078, dev-loss=0.152674, mean-recent-eval=0.000746
iteration 100, dev-MSE=0.000000, train-loss=0.171015, dev-loss=0.157464, mean-recent-eval=0.001661
iteration 200, dev-MSE=0.000000, train-loss=0.154413, dev-loss=0.159351, mean-recent-eval=0.003477
iteration 300, dev-MSE

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU availa


iteration 1100, dev-MSE=0.000000, train-loss=0.000992, dev-loss=0.001704, mean-recent-eval=-0.002338
iteration 1200, dev-MSE=0.000000, train-loss=0.000874, dev-loss=0.000580, mean-recent-eval=-0.002390
iteration 1300, dev-MSE=0.000000, train-loss=0.000408, dev-loss=0.006981, mean-recent-eval=-0.002414
best iteration: 80
p-value: 0.9931832644184525
iteration 1, accept the test!
p-value: 0.9657190377029107
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.014234800720000936 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.013718134264150358 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.009256831884668002 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU 

p-value: 0.9344191196439596
iteration 1, accept the test!
p-value: 0.8763396548871347
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.0009149615913210649 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.0005282470099901622 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.00023660637882131068 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.902881, dev-loss=0.828786, mean-recent-eval=-0.214023
iteration 100, dev-MSE=0.000000, train-loss=0.043852, dev-loss=0.032383, mean-recent-eval=-0.073744
iteration 200, dev-MSE=0.000000, train-loss=0.004172, dev-loss=0.001366, mean-recent-eval=-0.000257
iteration 300, 

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs



iteration 4700, dev-MSE=0.000000, train-loss=0.000371, dev-loss=-0.008819, mean-recent-eval=0.035627
iteration 4800, dev-MSE=0.000000, train-loss=0.000071, dev-loss=0.004043, mean-recent-eval=0.035646
iteration 4900, dev-MSE=0.000000, train-loss=0.000015, dev-loss=-0.004406, mean-recent-eval=0.035651
iteration 5000, dev-MSE=0.000000, train-loss=-0.000002, dev-loss=-0.001036, mean-recent-eval=0.035695
iteration 5100, dev-MSE=0.000000, train-loss=0.000050, dev-loss=0.004976, mean-recent-eval=0.035694
iteration 5200, dev-MSE=0.000000, train-loss=-0.000001, dev-loss=-0.001030, mean-recent-eval=0.035667
iteration 5300, dev-MSE=0.000000, train-loss=-0.000003, dev-loss=0.001957, mean-recent-eval=0.035673
iteration 5400, dev-MSE=0.000000, train-loss=0.000041, dev-loss=-0.001802, mean-recent-eval=0.035622
best iteration: 5060


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU availab

p-value: 0.9392268000825471
iteration 1, accept the test!
p-value: 0.8439039758525884
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: -0.0014996922593132163 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: -0.0010203999578972482 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: -0.0008749241709093852 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-1.414829, dev-loss=-1.676892, mean-recent-eval=-0.204927
iteration 100, dev-MSE=0.000000, train-loss=-0.606414, dev-loss=-0.705825, mean-recent-eval=-0.252921
iteration 200, dev-MSE=0.000000, train-loss=0.334492, dev-loss=0.356881, mean-recent-eval=-0.302815
iteration

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU availa

p-value: 0.9938568895159945
iteration 1, accept the test!
p-value: 0.9596121248915656
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.018368100836687763 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.018215694890044983 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.01223937883790488 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.916286, dev-loss=0.802977, mean-recent-eval=-0.088721
iteration 100, dev-MSE=0.000000, train-loss=0.041731, dev-loss=0.045592, mean-recent-eval=-0.025341
iteration 200, dev-MSE=0.000000, train-loss=0.034154, dev-loss=0.044134, mean-recent-eval=0.005669
iteration 300, dev-MS

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


p-value: 0.9525171711503417
iteration 1, accept the test!
p-value: 0.8804906509727313
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.02718680805039092 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.02689226146333839 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.0184883164839197 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=0.579177, dev-loss=0.665519, mean-recent-eval=-0.012136
iteration 100, dev-MSE=0.000000, train-loss=0.071374, dev-loss=0.089033, mean-recent-eval=-0.004005
iteration 200, dev-MSE=0.000000, train-loss=0.052911, dev-loss=0.051558, mean-recent-eval=0.005518
iteration 300, dev-MSE=0

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU available: False, using: 0 TPU core

p-value: 0.9612865208177804
iteration 1, accept the test!
p-value: 0.955347253427315
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.03385290286843744 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.033194830407223395 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.02240688177046745 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=1.363118, dev-loss=1.434632, mean-recent-eval=-0.110840
iteration 100, dev-MSE=0.000000, train-loss=0.047877, dev-loss=0.035779, mean-recent-eval=-0.026387
iteration 200, dev-MSE=0.000000, train-loss=0.045150, dev-loss=0.029664, mean-recent-eval=0.011157
iteration 300, dev-MSE=

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1634272178570/work/torch/csrc/utils/python_arg_parser.cpp:1050.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU availa


iteration 600, dev-MSE=0.000000, train-loss=0.144561, dev-loss=0.140735, mean-recent-eval=0.007521
iteration 700, dev-MSE=0.000000, train-loss=0.140143, dev-loss=0.137365, mean-recent-eval=0.008436
iteration 800, dev-MSE=0.000000, train-loss=0.132129, dev-loss=0.130141, mean-recent-eval=0.009371
iteration 900, dev-MSE=0.000000, train-loss=0.130657, dev-loss=0.128891, mean-recent-eval=0.010314
iteration 1000, dev-MSE=0.000000, train-loss=0.124184, dev-loss=0.120040, mean-recent-eval=0.011276
iteration 1100, dev-MSE=0.000000, train-loss=0.117039, dev-loss=0.117169, mean-recent-eval=0.012263
iteration 1200, dev-MSE=0.000000, train-loss=0.113622, dev-loss=0.111625, mean-recent-eval=0.013276
iteration 1300, dev-MSE=0.000000, train-loss=0.106545, dev-loss=0.107204, mean-recent-eval=0.014319
iteration 1400, dev-MSE=0.000000, train-loss=0.100025, dev-loss=0.099648, mean-recent-eval=0.015395
iteration 1500, dev-MSE=0.000000, train-loss=0.095089, dev-loss=0.095835, mean-recent-eval=0.015618
ite

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
G

p-value: 0.9856915213749604
iteration 1, accept the test!
p-value: 0.9442955582900491
iteration 1, accept the test!
starting learning args eval 0
starting learning args eval 1
starting learning args eval 2
learning eval: 0.015138919867112952 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0025:'betas'=(0.5, 0.9)
learning eval: 0.026146167662252598 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0002:'betas'=(0.5, 0.9) OAdam:::'lr'=0.001:'betas'=(0.5, 0.9)
learning eval: 0.026741148466115597 OptimalObjective::lambda_1=0.250000 OAdam:::'lr'=0.0001:'betas'=(0.5, 0.9) OAdam:::'lr'=0.0005:'betas'=(0.5, 0.9)
size of f_z collection: 300
iteration 0, dev-MSE=0.000000, train-loss=-1.156920, dev-loss=-1.423919, mean-recent-eval=-0.162247
iteration 100, dev-MSE=0.000000, train-loss=-0.168074, dev-loss=-0.233347, mean-recent-eval=-0.191579
iteration 200, dev-MSE=0.000000, train-loss=0.307966, dev-loss=0.358036, mean-recent-eval=-0.206005
iteration 300, 

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
GPU available: False, used: False
TPU


iteration 1800, dev-MSE=0.000000, train-loss=0.000018, dev-loss=0.000504, mean-recent-eval=0.151420
iteration 1900, dev-MSE=0.000000, train-loss=0.000058, dev-loss=0.001081, mean-recent-eval=0.152581
iteration 2000, dev-MSE=0.000000, train-loss=0.000014, dev-loss=0.001026, mean-recent-eval=0.152678
iteration 2100, dev-MSE=0.000000, train-loss=0.000184, dev-loss=-0.001179, mean-recent-eval=0.152623
iteration 2200, dev-MSE=0.000000, train-loss=0.000000, dev-loss=0.000501, mean-recent-eval=0.152517
iteration 2300, dev-MSE=0.000000, train-loss=-0.000007, dev-loss=0.000180, mean-recent-eval=0.152564
iteration 2400, dev-MSE=0.000000, train-loss=0.000009, dev-loss=0.000629, mean-recent-eval=0.152620
iteration 2500, dev-MSE=0.000000, train-loss=0.000008, dev-loss=0.000409, mean-recent-eval=0.152622
iteration 2600, dev-MSE=0.000000, train-loss=-0.000000, dev-loss=-0.001113, mean-recent-eval=0.152518
iteration 2700, dev-MSE=0.000000, train-loss=0.000005, dev-loss=0.000135, mean-recent-eval=0.15

  zorder=z)


In [9]:
all_df = pd.DataFrame()
for instrument in ['Binary', 'Gaussian']:
# for instrument in ['Gaussian']:
    final_df = pd.read_csv("results/compare_df_NN_ins_{}.csv".format(instrument))
    final_df = final_df.replace({"HSIC-AR": "HSIC-X-pen", 
                                 "HSIC-IV": "HSIC-X",
                                 "Pred":"OLS",
                                 "D-GMM": "DeepGMM"})
    final_df[r'$Z$'] = instrument
    all_df = all_df.append(final_df, ignore_index=True)
    
sns.set(font_scale=1.8, style='white', palette=sns.set_palette("tab10"))

g = sns.catplot(data=all_df, kind="point", log=True,
                x='alpha', y='MISE', hue='Method',
                hue_order=['DeepGMM', 'OLS', 'Oracle', 'HSIC-X', 'HSIC-X-pen'],
                markers=["o", "x", "d", "s", "v"], linestyles=[':', '-', '--', '-.', ':'],
                capsize=.07, aspect=1.2, height=4, ci=95,
                col=r'$Z$', sharey=False)

g._legend.remove()

plt.legend(loc='upper center', bbox_to_anchor=(-.25, 1.43), 
  ncol=5, fancybox=True, shadow=True, prop={'size': 15.5})

for ax in g.axes.flat:
    ax.set_yscale('log')

g.set_xlabels(r'$\alpha$')
g.set_ylabels('MSE')

plt.savefig('results/compare_NN.pdf',
            bbox_inches="tight")
plt.close()

  zorder=z)
  zorder=z)
