### Robust regressions with different methods

In [1]:
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
%matplotlib inline  
%config InlineBackend.figure_format = 'retina'
from cn_reg_class import cn_reg_class
from mlp_reg_class import mlp_reg_class
from sklearn.datasets import load_boston
from util import nzr,get_train_test_datasets,gpusession,print_n_txt
print ("Packages loaded")

Packages loaded


### Run tests with different configurations

In [None]:
# Configurations
dataset,dataset_name = load_boston(),'boston' # select dataset to use
test_size = 0.2
outlier_rates = [0,0.05,0.1,0.15,0.2,0.3,0.4,0.5]
rseeds = [0,1,2,3,4,5]
h_dims = [256,128]
actv = tf.nn.relu
l2_reg_coef = 1e-4

# Flags
RUN_CN = 1
RUN_L2 = 1
RUN_L1 = 1
RUN_RBST = 1
RUN_LRBST = 1
SAVE_TXT = True
SAVE_FINAL = False
SAVE_BEST = True
REMOVE_PREVS = True

# RMSEs to save
rmses_cn = np.zeros(shape=(len(outlier_rates),len(rseeds)))
rmses_l2 = np.zeros(shape=(len(outlier_rates),len(rseeds)))
rmses_l1 = np.zeros(shape=(len(outlier_rates),len(rseeds)))
rmses_robust = np.zeros(shape=(len(outlier_rates),len(rseeds)))
rmses_leaky_robust = np.zeros(shape=(len(outlier_rates),len(rseeds)))

# Save to txt
txt_name = 'res/robust_regression_results.txt'
f = open(txt_name,'w')

for s_idx,rseed in enumerate(rseeds): # for differnt random seeds
    for o_idx,outlier_rate in enumerate(outlier_rates): # for different outlier rates
        x_train,x_test,y_train,y_test = get_train_test_datasets(
            dataset,dataset_name,_test_size=test_size,_outlier_rate=outlier_rate,
            _seed=rseed,_SAVE_MAT=False,_VERBOSE=False) # get training data with outliers
        # 1. Run ChoiceNet
        if RUN_CN:
            tf.reset_default_graph(); sess = gpusession()
            tf.set_random_seed(rseed); np.random.seed(rseed) # fix seeds
            CN = cn_reg_class(_name='cn_%s_err%02d_seed%d'%(dataset_name,outlier_rate*100,rseed),_x_dim=np.shape(x_train)[1],_y_dim=1,
                              _h_dims=h_dims,_k_mix=10,_actv=actv,_bn=slim.batch_norm,
                              _rho_ref_train=0.95,_tau_inv=1e-2,_var_eps=1e-4,
                              _pi1_bias=0.0,_log_sigma_Z_val=-1,
                              _kl_reg_coef=1e-6,_l2_reg_coef=l2_reg_coef,
                              _SCHEDULE_MDN_REG=0,_GPU_ID=1,_VERBOSE=0)
            sess.run(tf.global_variables_initializer()) # initialize variables
            CN.train(_sess=sess,_x_train=x_train,_y_train=y_train,
                     _lr=1e-3,_batch_size=256,_max_epoch=1e4,_kp=1.0,
                     _LR_SCHEDULE=1,_PRINT_EVERY=0,_PLOT_EVERY=0,
                     _SAVE_TXT=SAVE_TXT,_SAVE_BEST_NET=SAVE_BEST,_SAVE_FINAL=SAVE_FINAL,_REMOVE_PREVS=REMOVE_PREVS) # train
            CN.restore_from_npz(sess,_loadname='net/net_%s_best.npz'%(CN.name))
            rmse = CN.test(_sess=sess,_x_train=x_train,_y_train=y_train,_x_test=x_test,_y_test=y_test,
                           _PLOT_TRAIN=0,_PLOT_TEST=0,_SAVE_FIG=0,
                           _title_str4data='Outlier rate:[%.1f]'%(outlier_rate),
                           _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # test
            rmses_cn[o_idx,s_idx] = rmse
            chars = ("rseed:[%d] outlier rate:[%.2f] choicenet:[%.4f]"%(rseed,outlier_rate,rmse))
            print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
        # 2. Run MLP with L2 loss
        if RUN_L2:
            tf.reset_default_graph(); sess = gpusession()
            tf.set_random_seed(rseed); np.random.seed(rseed) # fix seeds
            MLP = mlp_reg_class(_name='l2_%s_err%02d_seed%d'%(dataset_name,outlier_rate*100,rseed),_x_dim=np.shape(x_train)[1],_y_dim=1,
                                 _h_dims=h_dims,_actv=actv,_bn=slim.batch_norm,#slim.batch_norm/None
                                 _l2_reg_coef=l2_reg_coef,_GPU_ID=1,_L1_LOSS=0,_ROBUST_LOSS=0,_LEAKY_ROBUST_LOSS=0,_VERBOSE=0)
            sess.run(tf.global_variables_initializer()) # initialize variables
            MLP.train(_sess=sess,_x_train=x_train,_y_train=y_train,
                      _lr=1e-3,_batch_size=256,_max_epoch=1e4,_kp=1.0,
                      _LR_SCHEDULE=0,_PRINT_EVERY=0,_PLOT_EVERY=0,
                      _SAVE_TXT=SAVE_TXT,_SAVE_BEST_NET=SAVE_BEST,_SAVE_FINAL=SAVE_FINAL,_REMOVE_PREVS=REMOVE_PREVS,
                      _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # train
            MLP.restore_from_npz(sess,_loadname='net/net_%s_best.npz'%(MLP.name))
            rmse = MLP.test(_sess=sess,_x_train=x_train,_y_train=y_train,_x_test=x_test,_y_test=y_test,
                            _PLOT_TRAIN=0,_PLOT_TEST=0,_SAVE_FIG=0,
                            _title_str4data='Outlier rate:[%.1f]'%(outlier_rate),
                            _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # test
            rmses_l2[o_idx,s_idx] = rmse
            chars = ("rseed:[%d] outlier rate:[%.2f] L2 loss:[%.4f]"%(rseed,outlier_rate,rmse))
            print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
        # 3. Run MLP with L1 loss
        if RUN_L1:
            tf.reset_default_graph(); sess = gpusession()
            tf.set_random_seed(rseed); np.random.seed(rseed) # fix seeds
            MLP = mlp_reg_class(_name='l1_%s_err%02d_seed%d'%(dataset_name,outlier_rate*100,rseed),_x_dim=np.shape(x_train)[1],_y_dim=1,
                                 _h_dims=h_dims,_actv=actv,_bn=slim.batch_norm,#slim.batch_norm/None
                                 _l2_reg_coef=l2_reg_coef,_GPU_ID=1,_L1_LOSS=1,_ROBUST_LOSS=0,_LEAKY_ROBUST_LOSS=0,_VERBOSE=0)
            sess.run(tf.global_variables_initializer()) # initialize variables
            MLP.train(_sess=sess,_x_train=x_train,_y_train=y_train,
                      _lr=1e-3,_batch_size=256,_max_epoch=1e4,_kp=1.0,
                      _LR_SCHEDULE=0,_PRINT_EVERY=0,_PLOT_EVERY=0,
                      _SAVE_TXT=SAVE_TXT,_SAVE_BEST_NET=SAVE_BEST,_SAVE_FINAL=SAVE_FINAL,_REMOVE_PREVS=REMOVE_PREVS,
                      _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # train
            MLP.restore_from_npz(sess,_loadname='net/net_%s_best.npz'%(MLP.name))
            rmse = MLP.test(_sess=sess,_x_train=x_train,_y_train=y_train,_x_test=x_test,_y_test=y_test,
                            _PLOT_TRAIN=0,_PLOT_TEST=0,_SAVE_FIG=0,
                            _title_str4data='Outlier rate:[%.1f]'%(outlier_rate),
                            _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # test
            rmses_l1[o_idx,s_idx] = rmse
            chars = ("rseed:[%d] outlier rate:[%.2f] L1 loss:[%.4f]"%(rseed,outlier_rate,rmse))
            print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
        # 4. Run MLP with the robust loss
        if RUN_RBST:
            tf.reset_default_graph(); sess = gpusession()
            tf.set_random_seed(rseed); np.random.seed(rseed) # fix seeds
            MLP = mlp_reg_class(_name='rbst_%s_err%02d_seed%d'%(dataset_name,outlier_rate*100,rseed),_x_dim=np.shape(x_train)[1],_y_dim=1,
                                 _h_dims=h_dims,_actv=actv,_bn=slim.batch_norm,#slim.batch_norm/None
                                 _l2_reg_coef=l2_reg_coef,_GPU_ID=1,_L1_LOSS=0,_ROBUST_LOSS=1,_LEAKY_ROBUST_LOSS=0,_VERBOSE=0)
            sess.run(tf.global_variables_initializer()) # initialize variables
            MLP.train(_sess=sess,_x_train=x_train,_y_train=y_train,
                      _lr=1e-3,_batch_size=256,_max_epoch=1e4,_kp=1.0,
                      _LR_SCHEDULE=0,_PRINT_EVERY=0,_PLOT_EVERY=0,
                      _SAVE_TXT=SAVE_TXT,_SAVE_BEST_NET=SAVE_BEST,_SAVE_FINAL=SAVE_FINAL,_REMOVE_PREVS=REMOVE_PREVS,
                      _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # train
            MLP.restore_from_npz(sess,_loadname='net/net_%s_best.npz'%(MLP.name))
            rmse = MLP.test(_sess=sess,_x_train=x_train,_y_train=y_train,_x_test=x_test,_y_test=y_test,
                            _PLOT_TRAIN=0,_PLOT_TEST=0,_SAVE_FIG=0,
                            _title_str4data='Outlier rate:[%.1f]'%(outlier_rate),
                            _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # test
            rmses_robust[o_idx,s_idx] = rmse
            chars = ("rseed:[%d] outlier rate:[%.2f] robust loss:[%.4f]"%(rseed,outlier_rate,rmse))
            print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
        # 5. Run MLP with the leaky robust loss
        if RUN_LRBST:
            tf.reset_default_graph(); sess = gpusession()
            tf.set_random_seed(rseed); np.random.seed(rseed) # fix seeds
            MLP = mlp_reg_class(_name='lrbst_%s_err%02d_seed%d'%(dataset_name,outlier_rate*100,rseed),_x_dim=np.shape(x_train)[1],_y_dim=1,
                                 _h_dims=h_dims,_actv=actv,_bn=slim.batch_norm,#slim.batch_norm/None
                                 _l2_reg_coef=l2_reg_coef,_GPU_ID=1,_L1_LOSS=0,_ROBUST_LOSS=0,_LEAKY_ROBUST_LOSS=1,_VERBOSE=0)
            sess.run(tf.global_variables_initializer()) # initialize variables
            MLP.train(_sess=sess,_x_train=x_train,_y_train=y_train,
                      _lr=1e-3,_batch_size=256,_max_epoch=1e4,_kp=1.0,
                      _LR_SCHEDULE=0,_PRINT_EVERY=0,_PLOT_EVERY=0,
                      _SAVE_TXT=SAVE_TXT,_SAVE_BEST_NET=SAVE_BEST,_SAVE_FINAL=SAVE_FINAL,_REMOVE_PREVS=REMOVE_PREVS,
                      _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # train
            MLP.restore_from_npz(sess,_loadname='net/net_%s_best.npz'%(MLP.name))
            rmse = MLP.test(_sess=sess,_x_train=x_train,_y_train=y_train,_x_test=x_test,_y_test=y_test,
                            _PLOT_TRAIN=0,_PLOT_TEST=0,_SAVE_FIG=0,
                            _title_str4data='Outlier rate:[%.1f]'%(outlier_rate),
                            _x_dim4plot=5,_x_name4plot='Average number of rooms per dwelling') # test
            rmses_leaky_robust[o_idx,s_idx] = rmse
            chars = ("rseed:[%d] outlier rate:[%.2f] leaky robust loss:[%.4f]"%(rseed,outlier_rate,rmse))
            print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
        chars = ''
        print_n_txt(_f=f,_chars=chars,_DO_PRINT=True)
        
print ("Done.")

rseed:[0] outlier rate:[0.00] choicenet:[3.5240]
rseed:[0] outlier rate:[0.00] L2 loss:[3.7152]
rseed:[0] outlier rate:[0.00] L1 loss:[3.6175]
rseed:[0] outlier rate:[0.00] robust loss:[7.7333]
rseed:[0] outlier rate:[0.00] leaky robust loss:[3.9667]

rseed:[0] outlier rate:[0.05] choicenet:[4.0288]
rseed:[0] outlier rate:[0.05] L2 loss:[4.9779]
rseed:[0] outlier rate:[0.05] L1 loss:[4.3208]
rseed:[0] outlier rate:[0.05] robust loss:[5.5505]
rseed:[0] outlier rate:[0.05] leaky robust loss:[4.5531]

rseed:[0] outlier rate:[0.10] choicenet:[4.2907]
rseed:[0] outlier rate:[0.10] L2 loss:[5.6332]
rseed:[0] outlier rate:[0.10] L1 loss:[5.4147]
rseed:[0] outlier rate:[0.10] robust loss:[6.2274]
rseed:[0] outlier rate:[0.10] leaky robust loss:[5.6169]

rseed:[0] outlier rate:[0.15] choicenet:[5.0174]
rseed:[0] outlier rate:[0.15] L2 loss:[6.7583]
rseed:[0] outlier rate:[0.15] L1 loss:[6.6024]
rseed:[0] outlier rate:[0.15] robust loss:[6.5461]
rseed:[0] outlier rate:[0.15] leaky robust loss:[6