In [3]:
import numpy as np
import tensorflow as tf
import gzip
import pickle
import sys
from ddm.run_split import SplitMnistGenerator
from ddm.alg.cla_models_multihead import MFVI_IBP_NN, Vanilla_NN
from ddm.alg.utils import get_scores, concatenate_results
from ddm.alg.vcl import run_vcl
from copy import deepcopy

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

# IBP network

In [None]:
hidden_size = [50]
batch_size = 128
no_epochs = 100
alpha0 = 5.0
tau0=0.1 # initial temperature
ANNEAL_RATE=0.000
MIN_TEMP=0.1

# Run vanilla VCL
tf.set_random_seed(12)
np.random.seed(1)

ibp_acc = np.array([])

coreset_size = 0
data_gen = SplitMnistGenerator()
single_head=False
in_dim, out_dim = data_gen.get_dims()
x_testsets, y_testsets = [], []
for task_id in range(data_gen.max_iter):
    
    tf.reset_default_graph()
    
    x_train, y_train, x_test, y_test = data_gen.next_task()
    x_testsets.append(x_test)
    y_testsets.append(y_test)

    # Set the readout head to train
    head = 0 if single_head else task_id
    bsize = x_train.shape[0] if (batch_size is None) else batch_size
    
    # Train network with maximum likelihood to initialize first model
    if task_id == 0:
        ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0])
        ml_model.train(x_train, y_train, task_id, no_epochs, bsize)
        mf_weights = ml_model.get_weights()
        mf_variances = None
        mf_betas = None
        ml_model.close_session()

    # Train on non-coreset data
    mf_model = MFVI_IBP_NN(in_dim, hidden_size, out_dim, x_train.shape[0], prev_means=mf_weights, 
                           prev_log_variances=mf_variances, prev_betas=mf_betas,alpha0=alpha0,
                           learning_rate=0.01, temp_prior=1.0, no_pred_samples=100)
    mf_model.train(x_train, y_train, head, no_epochs, bsize, tau0=tau0, 
                   anneal_rate=ANNEAL_RATE, min_temp=MIN_TEMP)
    mf_weights, mf_variances, mf_betas = mf_model.get_weights()

    acc = get_scores(mf_model, x_testsets, y_testsets, single_head)
    ibp_acc = concatenate_results(acc, ibp_acc)
    
    mf_model.close_session()
    
ibp_acc

Epoch: 0001 cost= 0.089797305
Epoch: 0006 cost= 0.001424488
Epoch: 0011 cost= 0.000447167
Epoch: 0016 cost= 0.000168873
Epoch: 0021 cost= 0.000087530
Epoch: 0026 cost= 0.000049515
Epoch: 0031 cost= 0.000031621
Epoch: 0036 cost= 0.000020822
Epoch: 0041 cost= 0.000013849
Epoch: 0046 cost= 0.000009798
Epoch: 0051 cost= 0.000007353
Epoch: 0056 cost= 0.000005071
Epoch: 0061 cost= 0.000003618
Epoch: 0066 cost= 0.000002720
Epoch: 0071 cost= 0.000001976
Epoch: 0076 cost= 0.000001494
Epoch: 0081 cost= 0.000001120
Epoch: 0086 cost= 0.000000879
Epoch: 0091 cost= 0.000000618
Epoch: 0096 cost= 0.000000470
Optimization Finished!
beta a: (50,)
beta b: (50,)
logpis: (1, ?, 50)
act: (100, ?, 50, 1)
pre: <unknown>
beta a: (50,)
beta b: (50,)
logpis: (1, 784, 50)
beta a: ()
beta b: ()
logpis: (1, 784, 50)
beta a: (2,)
beta b: (2,)
logpis: (1, 50, 2)
beta a: ()
beta b: ()
logpis: (1, 50, 2)
beta a: (50,)
beta b: (50,)
logpis: (1, ?, 50)
act: (10, ?, 50, 1)
pre: <unknown>
beta a: (50,)
beta b: (50,)
logpis

In [None]:
# Run Vanilla VCL
tf.reset_default_graph()
tf.set_random_seed(12)
np.random.seed(1)

coreset_size = 0

data_gen = SplitMnistGenerator()
vcl_result = vcl.run_vcl(hidden_size, no_epochs, data_gen, 
                              lambda a: a, coreset_size, batch_size, single_head)
print(vcl_result)

In [None]:
# Run IBP VCL
tf.reset_default_graph()
tf.set_random_seed(12)
np.random.seed(1)
coreset_size = 0

hidden_size = [50]
batch_size = 128
no_epochs = 100
alpha0 = 5.0
tau0=0.1 # initial temperature
temp_prior=1.0
ANNEAL_RATE=0.000
MIN_TEMP=0.1
single_head=False

# data_gen = SplitMnistGenerator()
# vcl_ibp_result = vcl.run_vcl_ibp(hidden_size=hidden_size, no_epochs=no_epochs, data_gen=data_gen,
#                                   batch_size=batch_size, single_head=single_head, alpha0=alpha0,
#                                   learning_rate=0.01, temp_prior=temp_prior, no_pred_samples=100,
#                                   tau0=tau0, tau_anneal_rate=ANNEAL_RATE, tau_min=MIN_TEMP)
# print(vcl_ibp_result)

In [None]:
ibp_acc = np.nanmean(ibp_acc, 1)
rand_vcl_result = np.nanmean(vcl_result, 1)

In [None]:
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

fig = plt.figure(figsize=(7,3))
ax = plt.gca()
plt.plot(np.arange(len(ibp_acc))+1, ibp_acc, label='VCL + IBP', marker='o')
plt.plot(np.arange(len(vcl_result))+1, vcl_result, label='VCL', marker='o')
ax.set_xticks(range(1, len(vcl)+1))
ax.set_ylabel('Average accuracy')
ax.set_xlabel('\# tasks')
ax.legend()
fig.show()