In [1]:
import numpy as np
import tensorflow as tf
import gzip
import pickle
import sys
import copy
import os.path
from ddm.run_split import SplitMnistGenerator
from ddm.alg.cla_models_multihead import MFVI_IBP_NN, Vanilla_NN
from ddm.alg.utils import get_scores, concatenate_results
from ddm.alg.vcl import run_vcl
from copy import deepcopy

from bayes_opt import BayesianOptimization

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

W0814 18:28:57.083678 140369993201472 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:10: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.



# IBP network

In [2]:
hidden_size = [100]
batch_size = 128
no_epochs = 100
alpha0 = 1.0
tau0=1.0 # initial temperature
ANNEAL_RATE=0.000
MIN_TEMP=0.1

# Run vanilla VCL
tf.set_random_seed(12)
np.random.seed(1)

ibp_acc = np.array([])

coreset_size = 0
val = True
data_gen = SplitMnistGenerator(val)
single_head=False
in_dim, out_dim = data_gen.get_dims()
x_testsets, y_testsets = [], []
x_valsets, y_valsets = [], []
for task_id in range(data_gen.max_iter):
    
    tf.reset_default_graph()
    if val:
        x_train, y_train, x_test, y_test, x_val, y_val = data_gen.next_task()
        x_valsets.append(x_val)
        y_valsets.append(y_val)
    else:    
        x_train, y_train, x_test, y_test = data_gen.next_task()
    x_testsets.append(x_test)
    y_testsets.append(y_test)

    # Set the readout head to train
    head = 0 if single_head else task_id
    bsize = x_train.shape[0] if (batch_size is None) else batch_size
    
    # Train network with maximum likelihood to initialize first model
    if task_id == 0:
        ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0])
        ml_model.train(x_train, y_train, task_id, no_epochs, bsize)
        mf_weights = ml_model.get_weights()
        mf_variances = None
        mf_betas = None
        ml_model.close_session()

    # Train on non-coreset data
    mf_model = MFVI_IBP_NN(in_dim, hidden_size, out_dim, x_train.shape[0], prev_means=mf_weights, 
                           prev_log_variances=mf_variances, prev_betas=mf_betas,alpha0=alpha0,
                           learning_rate=0.01, temp=tau0, temp_prior=1.0, no_pred_samples=100)
    mf_model.train(x_train, y_train, head, no_epochs, bsize,
                   anneal_rate=ANNEAL_RATE, min_temp=MIN_TEMP)
    mf_weights, mf_variances, mf_betas = mf_model.get_weights()

    acc = get_scores(mf_model, x_valsets, y_valsets, single_head)
    ibp_acc = concatenate_results(acc, ibp_acc)
    
    mf_model.close_session()
    
ibp_acc

W0813 18:37:30.424512 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:53: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0813 18:37:30.427169 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:164: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0813 18:37:30.482261 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:58: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.

W0813 18:37:30.588547 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:62: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.

W0813 18:37:30.590138 140408376342336 deprecat

Epoch: 0001 cost= 0.059754695
Epoch: 0006 cost= 0.000913144
Epoch: 0011 cost= 0.000234285
Epoch: 0016 cost= 0.000082365
Epoch: 0021 cost= 0.000042895
Epoch: 0026 cost= 0.000023777
Epoch: 0031 cost= 0.000014887
Epoch: 0036 cost= 0.000009923
Epoch: 0041 cost= 0.000006549
Epoch: 0046 cost= 0.000004535
Epoch: 0051 cost= 0.000003333
Epoch: 0056 cost= 0.000002337
Epoch: 0061 cost= 0.000001674
Epoch: 0066 cost= 0.000001251
Epoch: 0071 cost= 0.000000906
Epoch: 0076 cost= 0.000000680
Epoch: 0081 cost= 0.000000512
Epoch: 0086 cost= 0.000000387
Epoch: 0091 cost= 0.000000281
Epoch: 0096 cost= 0.000000211


W0813 18:37:40.962682 140408376342336 deprecation.py:323] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:525: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.


Optimization Finished!
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>


W0813 18:37:41.177778 140408376342336 deprecation.py:323] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:496: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>


W0813 18:37:41.663980 140408376342336 deprecation.py:323] From /home/skessler/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0813 18:37:42.680103 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:503: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

W0813 18:37:42.716534 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:621: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.

W0813 18:37:42.732589 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:630: The name tf

_Z: (1, ?, 100)


W0813 18:37:42.987454 140408376342336 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:913: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Epoch: 0001 cost= 19.072743214
Epoch: 0006 cost= 5.168781454
Epoch: 0011 cost= 3.800158814
Epoch: 0016 cost= 3.674991938
Epoch: 0021 cost= 3.574922829
Epoch: 0026 cost= 3.511328907
Epoch: 0031 cost= 3.403818682
Epoch: 0036 cost= 3.414693139
Epoch: 0041 cost= 3.346116540
Epoch: 0046 cost= 3.362689755
Epoch: 0051 cost= 3.332217416
Epoch: 0056 cost= 3.280205180
Epoch: 0061 cost= 3.252052155
Epoch: 0066 cost= 3.273583114
Epoch: 0071 cost= 3.220419619
Epoch: 0076 cost= 3.239492937
Epoch: 0081 cost= 3.198088482
Epoch: 0086 cost= 3.186336327
Epoch: 0091 cost= 3.173645304
Epoch: 0096 cost= 3.183016286
Optimization Finished!
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>
_Z: (1, ?, 100)
Epoch: 0001 cost= 1.909118086
Epoch: 0006 cost= 1.566231799
Epoch: 0011 cost= 1.535252309
Epoch: 0016 cost= 1.504906576
Epoch: 0021 cost= 1.506916551
Epoch: 0026 cost= 1.488153032
Epoch: 00

array([[0.9607565 ,        nan,        nan,        nan,        nan],
       [0.8141844 , 0.92654261,        nan,        nan,        nan],
       [0.6212766 , 0.87267385, 0.92902882,        nan,        nan],
       [0.62222222, 0.85651322, 0.90394877, 0.94561934,        nan],
       [0.46146572, 0.67825661, 0.91141942, 0.82678751, 0.9293999 ]])

# BO

In [3]:
bo_params = {'acq': 'ei',
            'init_points': 5,
            'n_iter': 5}

param_bounds = {'alpha': (1, 10),
               'beta': (1, 10),
               'lambda_1': (0.1, 5.0),
               'lambda_2': (0.1, 5.0)}

model_params = {'hidden_size' : [100],
                'batch_size' : 128,
                'no_epochs' : 200,
                'learning_rate' : 0.001,
                'anneal_rate' : 0.0,
                'pred_samples': 100}

def cv_exp(alpha, beta, lambda_1, lambda_2):
    """ Runs BayesOpt on Split MNIST for lifelong learning with the BNN+IBP prior
    
    :params: optim params
    returns av accuracy over val set
    """
    # Run vanilla VCL
    tf.set_random_seed(12)
    np.random.seed(1)

    ibp_acc = np.array([])
    
    model_params_cv = copy.deepcopy(model_params)

    val = True
    data_gen = SplitMnistGenerator(val)
    single_head=False
    in_dim, out_dim = data_gen.get_dims()
    x_testsets, y_testsets = [], []
    x_valsets, y_valsets = [], []
    for task_id in range(data_gen.max_iter):

        tf.reset_default_graph()
        if val:
            x_train, y_train, x_test, y_test, x_val, y_val = data_gen.next_task()
            x_valsets.append(x_val)
            y_valsets.append(y_val)
        else:    
            x_train, y_train, x_test, y_test = data_gen.next_task()
        x_testsets.append(x_test)
        y_testsets.append(y_test)

        # Set the readout head to train
        head = 0 if single_head else task_id
        bsize = x_train.shape[0] if (model_params['batch_size'] is None) else model_params['batch_size']

        # Train network with maximum likelihood to initialize first model
        if task_id == 0:
            ml_model = Vanilla_NN(in_dim, model_params_cv['hidden_size'], out_dim, x_train.shape[0])
            ml_model.train(x_train, y_train, task_id, 100, 
                           model_params_cv['batch_size'])
            mf_weights = ml_model.get_weights()
            mf_variances = None
            mf_betas = None
            ml_model.close_session()

        # Train on non-coreset data
        mf_model = MFVI_IBP_NN(in_dim,
                               model_params_cv['hidden_size'],
                               out_dim,
                               x_train.shape[0],
                               prev_means=mf_weights, 
                               prev_log_variances=mf_variances,
                               prev_betas=mf_betas,
                               alpha0=alpha,
                               beta0=beta,
                               learning_rate=model_params_cv['learning_rate'],
                               lambda_1 = lambda_1, # initial temperature of the variational posterior for task 1
                               lambda_2 = lambda_2, # temperature of the Concrete prior
                               no_pred_samples=model_params_cv['pred_samples'], 
                               name='ibp_bo_alpha_{:.02}_beta_{:.02}_lambda_1_{:.02}_lambda_2_{:02}'.format(alpha,
                                                                                                           beta, 
                                                                                                           lambda_1,
                                                                                                           lambda_2))
        
        mf_model.train(x_train, y_train, head, model_params_cv['no_epochs'], 
                       model_params_cv['batch_size'],
                       anneal_rate=model_params_cv['anneal_rate'], 
                       min_temp=lambda_1)
        mf_weights, mf_variances, mf_betas = mf_model.get_weights()

        acc = get_scores(mf_model, x_valsets, y_valsets, single_head)
        ibp_acc = concatenate_results(acc, ibp_acc)

        mf_model.close_session()

    return np.nanmean(ibp_acc)

bo = BayesianOptimization(cv_exp, param_bounds)
bo.maximize()

|   iter    |  target   |   alpha   |   beta    | lambda_1  | lambda_2  |
-------------------------------------------------------------------------
Epoch: 0001 cost= 0.060023063
Epoch: 0006 cost= 0.001070564
Epoch: 0011 cost= 0.000234583
Epoch: 0016 cost= 0.000098851
Epoch: 0021 cost= 0.000055228
Epoch: 0026 cost= 0.000034481
Epoch: 0031 cost= 0.000022698
Epoch: 0036 cost= 0.000014909
Epoch: 0041 cost= 0.000010818
Epoch: 0046 cost= 0.000007686
Epoch: 0051 cost= 0.000005569
Epoch: 0056 cost= 0.000004187
Epoch: 0061 cost= 0.000003140
Epoch: 0066 cost= 0.000002421
Epoch: 0071 cost= 0.000001876
Epoch: 0076 cost= 0.000001440
Epoch: 0081 cost= 0.000001133
Epoch: 0086 cost= 0.000000870
Epoch: 0091 cost= 0.000000686
Epoch: 0096 cost= 0.000000534


W0814 18:29:17.721094 140369993201472 deprecation.py:323] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:531: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.


Optimization Finished!
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>


W0814 18:29:17.947698 140369993201472 deprecation.py:323] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:502: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>
z_discrete: (1, ?, 100)
biases: <unknown>
pre: <unknown>


W0814 18:29:18.463605 140369993201472 deprecation.py:323] From /home/skessler/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0814 18:29:19.341619 140369993201472 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:509: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

W0814 18:29:19.383440 140369993201472 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:627: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.

W0814 18:29:19.401348 140369993201472 deprecation_wrapper.py:119] From /home/skessler/Projects/IBP_BNN/ddm/alg/cla_models_multihead.py:636: The name tf

_Z: (1, ?, 100)


TypeError: Cannot interpret feed_dict key as Tensor: Can not convert a float64 into a Tensor.

In [None]:
def folder_name(experiment_name, param_bounds, bo_params, data_params, model_params, train_params, optim_params, results_folder="./results"):
    pp = ''.join('{}:{}|'.format(key, val) for key, val in sorted(param_bounds.items()))[:-1]
    bp = ''.join('{}:{}|'.format(key, val) for key, val in sorted(bo_params.items()))[:-1]
    mp = ''.join('{}:{}|'.format(key, val) for key, val in sorted(model_params.items()))[:-1]
    return os.path.join(results_folder, experiment_name, pp, bp, mp)

######################
## Store BO results ##
######################

# Folder for storing results
results_folder = "./results/"

experiment_name = 'ibp_split_mnist_bo'
folder = folder_name(results_folder=results_folder,
                     experiment_name=experiment_name,
                     param_bounds=param_bounds,
                     bo_params=bo_params,
                     model_params=model_params)

os.makedirs(folder, exist_ok=True)

with open(os.path.join(folder, 'res_all.pkl'), 'wb') as input_file:
    pickle.dump(bo.res, output)
    
with open(os.path.join(folder, 'res_max.pkl'), 'wb') as input_file:
    pickle.dump(bo.max, output)
    
alpha_opt = bo.max['params']['alpha']
beta_opt = bo.max['params']['beta']
lambda_1_opt = bo.max['params']['lambda_1']
lambda_2_opt = bo.max['params']['lambda_2']
print("alpha_opt: {}".format(alpha_opt))
print("beta_opt: {}".format(beta_opt))
print("lambda_1_opt: {}".format(lambda_1_opt))
print("lambda_2_opt: {}".format(lambda_2_opt))

In [None]:
########################################
## Experiment with Optimal Parameters ##
########################################

# Run vanilla VCL
tf.set_random_seed(12)
np.random.seed(1)

ibp_acc = np.array([])

coreset_size = 0
val = True
data_gen = SplitMnistGenerator(val)
single_head=False
in_dim, out_dim = data_gen.get_dims()
x_testsets, y_testsets = [], []
x_valsets, y_valsets = [], []
for task_id in range(data_gen.max_iter):
    
    tf.reset_default_graph()
    if val:
        x_train, y_train, x_test, y_test, x_val, y_val = data_gen.next_task()
        x_valsets.append(x_val)
        y_valsets.append(y_val)
    else:    
        x_train, y_train, x_test, y_test = data_gen.next_task()
    x_testsets.append(x_test)
    y_testsets.append(y_test)

    # Set the readout head to train
    head = 0 if single_head else task_id
    bsize = x_train.shape[0] if (batch_size is None) else batch_size
    
    # Train network with maximum likelihood to initialize first model
    if task_id == 0:
        ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0])
        ml_model.train(x_train, y_train, task_id, no_epochs, bsize)
        mf_weights = ml_model.get_weights()
        mf_variances = None
        mf_betas = None
        ml_model.close_session()

    # Train
    mf_model = MFVI_IBP_NN(in_dim,
                           model_params['hidden_size'],
                           out_dim,
                           x_train.shape[0],
                           prev_means=mf_weights,
                           prev_log_variances=mf_variances,
                           prev_betas=mf_betas,
                           alpha0=alpha_opt,
                           beta0=beta_opt,
                           learning_rate=model_params['learning_rate'],
                           temp = lambda_1_opt,
                           temp_prior=lambda_2_opt,
                           no_pred_samples=model_params['pred_samples'],
                           name='ibp_bo_opt')
        
    mf_model.train(x_train, y_train, head, model_params['no_epochs'], 
                   model_params['batch_size'],
                   anneal_rate=model_params['anneal_rate'], 
                   min_temp=model_params['min_temp'])
    
    mf_weights, mf_variances, mf_betas = mf_model.get_weights()

    acc = get_scores(mf_model, x_testsets, y_testsets, single_head)
    ibp_acc = concatenate_results(acc, ibp_acc)
    
    mf_model.close_session()
    
ibp_acc

In [None]:
# Run Vanilla VCL
tf.reset_default_graph()
tf.set_random_seed(12)
np.random.seed(1)
hidden_size = [10]
coreset_size = 0

data_gen = SplitMnistGenerator()
vcl_result = run_vcl(hidden_size, no_epochs, data_gen, 
                              lambda a: a, coreset_size, batch_size, single_head)
print(vcl_result)

In [None]:
# # Run IBP VCL
# tf.reset_default_graph()
# tf.set_random_seed(12)
# np.random.seed(1)
# coreset_size = 0

# hidden_size = [50]
# batch_size = 128
# no_epochs = 100
# alpha0 = 5.0
# tau0=0.1 # initial temperature
# temp_prior=1.0
# ANNEAL_RATE=0.000
# MIN_TEMP=0.1
# single_head=False

# # data_gen = SplitMnistGenerator()
# # vcl_ibp_result = vcl.run_vcl_ibp(hidden_size=hidden_size, no_epochs=no_epochs, data_gen=data_gen,
# #                                   batch_size=batch_size, single_head=single_head, alpha0=alpha0,
# #                                   learning_rate=0.01, temp_prior=temp_prior, no_pred_samples=100,
# #                                   tau0=tau0, tau_anneal_rate=ANNEAL_RATE, tau_min=MIN_TEMP)
# # print(vcl_ibp_result)

In [None]:
_ibp_acc = np.nanmean(ibp_acc, 1)
_vcl_result = np.nanmean(vcl_result, 1)

In [None]:
_vcl_result

In [None]:
plt.rc('text', usetex=True)
#plt.rc('font', family='serif')

fig = plt.figure(figsize=(7,3))
ax = plt.gca()
plt.plot(np.arange(len(_ibp_acc))+1, _ibp_acc, label='VCL + IBP', marker='o')
plt.plot(np.arange(len(_vcl_result))+1, _vcl_result, label='VCL', marker='o')
ax.set_xticks(range(1, len(_ibp_acc)+1))
ax.set_ylabel('Average accuracy')
ax.set_xlabel('\# tasks')
ax.legend()
fig.show()