In [2]:
import tensorflow as tf
import tensorflow.keras.optimizers as optimizers

import sys
sys.path.append("..")

from models.regression_model import RegressionDGP
from likelihoods import Gaussian
from utils_dataset import load_UCI_dataset
from utils_training import MCEM_sampler, MCEM_Q_maximizer, MCEM, MCEM_windows

In [3]:
# dataset information loading
dataset_name = 'boston'
_, _, train_shape, test_shape = load_UCI_dataset(dataset_name, data_dir='./data/')
dataset_size = train_shape[0] + test_shape[0]
print(f"Dataset {dataset_name}, total size:{dataset_size}")

############################## Getting data info:dataset name: boston ##############################
D: 13, N: 455, Ns: 51
X_mean: [1.7378345e+00 1.1138461e+01 1.1224440e+01 7.2527476e-02 5.5657738e-01
 6.2923098e+00 6.9054504e+01 3.6594815e+00 4.3472528e+00 4.0854724e+02
 1.8481098e+01 3.5551254e+02 1.2646418e+01], Y_mean: [22.656263], Y_std: [9.32293]
######################################################################
Dataset boston, total size:506


In [4]:
# model setting
d_in = train_shape[1]
d_out = 1
n_gp = 10
model = RegressionDGP(d_in, d_out, n_hidden_layers=2, n_rf=100, n_gp=n_gp,
                      likelihood=Gaussian(variance=0.1, trainable=True),
                      kernel_type_list=['RBF','RBF'], kernel_trainable=True,
                      random_fixed=True, input_cat=True)
print(f"{model.n_hidden_layers}-layer DGPs with Kernel type {model.kernel_type_list[0]}")

2-layer DGPs with Kernel type RBF


In [5]:
# EM sampler settings
batch_size=200
lr_mcmc_0 = 0.01
beta_mcmc = 0.9
sampler_EM = MCEM_sampler(model, dataset_name=dataset_name, batch_size=batch_size, data_dir='./data/',
                          lr_0=lr_mcmc_0, momentum_decay=beta_mcmc,
                          precond_type='identity', K_batches=None, second_moment_centered=None,
                          resample_in_cycle_head=False, start_sampling_epoch=2000, epochs_per_cycle=50)
# Maximizer setttings
lr_maximizer = 0.01
optimizer = optimizers.Adam(learning_rate=lr_maximizer)
maximizer = MCEM_Q_maximizer(model, dataset_size, optimizer)
# sampler settings after fixing hyper-params
lr_fixing_hyper_0 = 0.01
beta_fixing = 0.9
sampler_fixing_hyper = MCEM_sampler(model, dataset_name=dataset_name, batch_size=batch_size, data_dir='./data/',
                                    lr_0=lr_fixing_hyper_0, momentum_decay=beta_fixing,
                                    precond_type='identity', K_batches=None, second_moment_centered=None,
                                    resample_in_cycle_head=False, start_sampling_epoch=2000, epochs_per_cycle=50)

############################## Getting data info:dataset name: boston ##############################
D: 13, N: 455, Ns: 51
X_mean: [1.7378345e+00 1.1138461e+01 1.1224440e+01 7.2527476e-02 5.5657738e-01
 6.2923098e+00 6.9054504e+01 3.6594815e+00 4.3472528e+00 4.0854724e+02
 1.8481098e+01 3.5551254e+02 1.2646418e+01], Y_mean: [22.656263], Y_std: [9.32293]
######################################################################
Training size is 400 after remainder dropping. 
############################## Getting data info:dataset name: boston ##############################
D: 13, N: 455, Ns: 51
X_mean: [1.7378345e+00 1.1138461e+01 1.1224440e+01 7.2527476e-02 5.5657738e-01
 6.2923098e+00 6.9054504e+01 3.6594815e+00 4.3472528e+00 4.0854724e+02
 1.8481098e+01 3.5551254e+02 1.2646418e+01], Y_mean: [22.656263], Y_std: [9.32293]
######################################################################
Training size is 400 after remainder dropping. 


In [None]:
# MCEM settings and training
total_EM_steps = 20000
ds_train_M, _, _, _ = load_UCI_dataset(dataset_name, batch_size=100, data_dir='./data/')
log_p, mse = MCEM(sampler_EM, maximizer, sampler_fixing_hyper, total_EM_steps, ds_train_M,
                      num_samples_EM=100, num_samples_fixing_hyper=200,
                      print_epoch_cycle_EM=200, print_epoch_cycle_fixing=100)

In [None]:
log_p

In [None]:
log_p_droped = log_p[drop_mixing:, :]
mse_droped = mse[drop_mixing:, :]

n_models_droped = tf.shape(mse_droped)[0]
predict_log_p = tf.reduce_logsumexp(log_p_droped, axis=0) - tf.math.log(tf.cast(n_models_droped, tf.float32))
predict_log_p = tf.reduce_mean(predict_log_p)
predict_rmse = tf.math.sqrt(tf.reduce_mean(mse_droped))

print(f"Dataset: {dataset_name}")
print(f"Number of sampled models(after dropping {drop_mixing} samples): {n_models_droped}")
print(f"Test Log Likelihood of all sampled models: {predict_log_p}")
print(f"Test Root MSE of all sampled models: {predict_rmse}")

log_p_coll.append(predict_log_p)
rmse_coll.append(predict_rmse)
print('*' * 70)
print('*' * 70)
print(' ')


In [None]:
record_file = 'train_regression_results_2.txt'
repeat = 3 # run the task several times
print('#'*40, f"Results stored in {record_file}.", '#'*40)

dataset_name_list = ['boston', 'wine_red', 'wine_white', 'concrete',
                     'energy', 'kin8nm', 'naval', 'power','protein']
d_in_list = [13, 11, 11, 8, 8, 8, 16, 4, 9]
d_out = 1
batch_size = 200
lr_0 = 0.01
beta = 0.99
total_epochs = 50000
start_sampling_epoch = 40000
epochs_per_cycle = 100
print_epoch_cycle = 200
drop_mixing = 50

for i in range(len(dataset_name_list)):
    dataset_name = dataset_name_list[i]
    hid_n_gp = min(d_in_list[i], 30)

    log_p_coll = []
    rmse_coll = []
    for run_index in range(repeat):
        print('*' * 70)
        print('*' * 30, '2-layer DGPs ', f'Run {run_index}', '*' * 30)
        model = RegressionDGP(d_in_list[i], d_out, n_hidden_layers=2, n_rf=300,
                              n_gp=[hid_n_gp, d_out], likelihood=Gaussian(),
                              kernel_type_list=['RBF', 'RBF'], kernel_trainable=True,
                              random_fixed=True, set_nonzero_mean=False, input_cat=True)
        # return matrix [S, N]
        log_p, mse = regression_train(model, dataset_name=dataset_name, batch_size=batch_size,
                                      lr_0=lr_0, momentum_decay=beta,
                                      precond_type='rmsprop', K_batches=5, second_moment_centered=False,
                                      resample_in_cycle_head=False,
                                      total_epochs=total_epochs, start_sampling_epoch=start_sampling_epoch,
                                      epochs_per_cycle=epochs_per_cycle,
                                      print_epoch_cycle=print_epoch_cycle)
        log_p_droped = log_p[drop_mixing:, :]
        mse_droped = mse[drop_mixing:, :]

        n_models_droped = tf.shape(mse_droped)[0]
        predict_log_p = tf.reduce_logsumexp(log_p_droped, axis=0) - tf.math.log(tf.cast(n_models_droped, tf.float32))
        predict_log_p = tf.reduce_mean(predict_log_p)
        predict_rmse = tf.math.sqrt(tf.reduce_mean(mse_droped))

        print(f"Dataset: {dataset_name}")
        print(f"Number of sampled models(after dropping {drop_mixing} samples): {n_models_droped}")
        print(f"Test Log Likelihood of all sampled models: {predict_log_p}")
        print(f"Test Root MSE of all sampled models: {predict_rmse}")

        log_p_coll.append(predict_log_p)
        rmse_coll.append(predict_rmse)
        print('*' * 70)
        print('*' * 70)
        print(' ')

    log_p_coll = tf.concat(log_p_coll, axis=0)
    rmse_coll = tf.concat(rmse_coll, axis=0)
    with open(record_file, 'a') as f:
        f.write(f"Dataset: {dataset_name}, \n")
        f.write(f"Predict Mean Log Likelihood: {log_p_coll},\n  ")
        f.write(f"Their mean is: {tf.reduce_mean(log_p_coll)}, ")
        f.write(f"std is: {tf.math.reduce_std(log_p_coll)}\n")
        f.write(f"Predict RMSE: {rmse_coll},\n  ")
        f.write(f"Their mean is: {tf.reduce_mean(rmse_coll)}, ")
        f.write(f"std is: {tf.math.reduce_std(rmse_coll)}\n\n")
