In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
import numpy as np
import tensorflow as tf
# -- `contrib` module in TF 1.3
from tensorflow.contrib.distributions import (
    NormalWithSoftplusScale, Categorical,
)
from sklearn.utils import shuffle

from nn4post import Inferencer
from nn4post.utils import get_param_shape, get_param_space_dim
from nn4post.utils.posterior import get_log_posterior
from nn4post.utils.tf_trainer import SimpleTrainer

import mnist

tf.reset_default_graph()

  return f(*args, **kwds)


In [2]:
# PARAMETERS
N_C = 1
NOISE_STD = 0.0
BATCH_SIZE = 64


# DATA
mnist_ = mnist.MNIST(NOISE_STD, BATCH_SIZE)


# MODEL
n_inputs = 28 * 28  # number of input features.
n_hiddens = 200  # number of perceptrons in the (single) hidden layer.
n_outputs = 10  # number of perceptrons in the output layer.

with tf.name_scope('data'):
    x = tf.placeholder(shape=[None, n_inputs], dtype=tf.float32, name='x')
    y = tf.placeholder(shape=[None], dtype=tf.int32, name='y')

input_ = {'x': x}
observed = {'y': y}

def model(input_, param):
    """ Shall be implemented by TensorFlow. This is an example, as a shallow
    neural network.

    Args:
        input_:
            `dict`, like `{'x_1': x_1, 'x_2': x_2}, with values Tensors.
        param:
            `dict`, like `{'w': w, 'b': b}, with values Tensors.

    Returns:
        `dict`, like `{'y': Y}`, where `Y` is an instance of
        `tf.distributions.Distribution`.
    """
    # shape: `[None, n_hiddens]`
    hidden = tf.sigmoid(
        tf.matmul(input_['x'], param['w_h']) + param['b_h'])
    # shape: `[None, n_outputs]`
    logits = tf.matmul(hidden, param['w_a']) + param['b_a']

    Y = Categorical(logits=logits)
    return {'y': Y}


# PRIOR
with tf.name_scope('prior'):
    w_h = NormalWithSoftplusScale(
        loc=tf.zeros([n_inputs, n_hiddens]),
        scale=tf.ones([n_inputs, n_hiddens]) * 10,
        name="w_h")
    w_a = NormalWithSoftplusScale(
        loc=tf.zeros([n_hiddens, n_outputs]),
        scale=tf.ones([n_hiddens, n_outputs]) * 10,
        name="w_a")
    b_h = NormalWithSoftplusScale(
        loc=tf.zeros([n_hiddens]),
        scale=tf.ones([n_hiddens]) * 100,
        name="b_h")
    b_a = NormalWithSoftplusScale(
        loc=tf.zeros([n_outputs]),
        scale=tf.ones([n_outputs]) * 100,
        name="b_a")

param_prior = {
    'w_h': w_h, 'w_a': w_a,
    'b_h': b_h, 'b_a': b_a,
}


# POSTERIOR
scale = mnist_.n_data / mnist_.batch_size
log_posterior = get_log_posterior(
    model, input_, observed, param_prior, scale=scale)

In [3]:
# INFERENCE
param_shape = get_param_shape(param_prior)
param_space_dim = get_param_space_dim(param_shape)
print('\n-- Dimension of parameter-space: {}.\n'.format(param_space_dim))

inferencer = Inferencer(N_C, param_space_dim, log_posterior)

with tf.name_scope('variables'):
    a = tf.Variable(
        np.zeros([N_C]),
        dtype='float32',
        name='a')
    mu = tf.Variable(
        np.random.normal(size=[N_C, param_space_dim]),
        dtype='float32',
        name='mu')
    zeta = tf.Variable(
        np.zeros([N_C, param_space_dim]),
        dtype='float32',
        name='zeta')

var = {'a': a, 'mu': mu, 'zeta': zeta}
loss, gradients = inferencer.make_loss_and_gradients(**var)
samples, weights = inferencer.make_samples_and_weights()


-- Dimension of parameter-space: 159010.



In [4]:
# TRAIN
batch_generator = mnist_.batch_generator()
def get_feed_dict_generator():
    while True:
        x_train, y_train, y_err_train = next(batch_generator)
        y_train = np.argmax(y_train, axis=1).astype('int32')
        yield {x: x_train, y: y_train}
trainer = SimpleTrainer(
    loss=loss,
    gvs=gradients,
    optimizer=tf.train.AdamOptimizer(0.005),
    logdir='../dat/logs/nn4post_advi_on_mnist',
    dir_to_ckpt='../dat/checkpoints/nn4post_advi_on_mnist/',
)
#n_iters = 30000
n_iters = 0  # test!
feed_dict_generator = get_feed_dict_generator()
trainer.train(n_iters, feed_dict_generator)

INFO:tensorflow:Summary name nn4post/loss/loss/add:0 is illegal; using nn4post/loss/loss/add_0 instead.
INFO:tensorflow:Summary name nn4post/loss/loss/add:0 is illegal; using nn4post/loss/loss/add_0 instead.
INFO:tensorflow:Restoring parameters from ../dat/checkpoints/nn4post_advi_on_mnist/checkpoint-120


0it [00:00, ?it/s]

INFO - Restored from ../dat/checkpoints/nn4post_advi_on_mnist/.
INFO - Start training at global step 120.





In [8]:
# SAMPLING
a_val, mu_val, zeta_val = trainer.sess.run([a, mu, zeta])
x_val, y_val, *rests = next(batch_generator)
y_val = np.argmax(y_val, axis=1).astype('int32')
feed_dict = {
    inferencer.q_parameters.a: a_val,
    inferencer.q_parameters.mu: mu_val,
    inferencer.q_parameters.zeta: zeta_val,
    inferencer.n_pred_samples: 10,
    x: x_val,
    y: y_val,
}
sample_vals, weight_vals = trainer.sess.run(
    [samples, weights],
    feed_dict=feed_dict,
)

In [9]:
sample_vals.shape, weight_vals.shape

((10, 159010), (10,))

In [10]:
weight_vals

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [None]:
# PREDICTION

# Get test data of MNIST
x_test, y_test, y_err_test = mnist_.test_data
# Adjust to the eagered form
y_test = y_test.astype('int32')


# Get the trained variables.
trained_var = {
    name:
        trainer.sess.run(v)
    for name, v in var.items()
}
print('a: ', trained_var['a'])
print('zeta mean: ', np.mean(trained_var['zeta']))
print('zeta std: ', np.std(trained_var['zeta']))

predictions_dict = build_prediction(
    trained_var, model, param_shape, input_, n_samples=100)
predictions = tf.stack(predictions_dict['y'], axis=0)

with tf.Session() as sess:
    feed_dict = {x: x_test}
    # shape: `[n_samples, n_data]`
    predictions = sess.run(predictions, feed_dict=feed_dict)

def get_most_freq(array):
    index = np.argmax(np.bincount(array))
    return index
# shape `[n_data]`
voted_predictions = np.array([
    get_most_freq(predictions[:,i])
    for i in range(predictions.shape[1])
])

def get_accuracy(xs, ys):
    """
    Args:
        xs:
            Numpy array.
        ys:
            Numpy array with the same shape and dtype as `xs`.

    Returns:
        `float` as the perception of `x == y` in `xs`, where `x` and `y` in
        `xs` and `ys` respectively and one-to-one correspondently.
    """
    assert xs.dtype == ys.dtype
    n_correct = 0
    n_mistake = 0
    for x, y in list(zip(xs, ys)):
        if x == y:
            n_correct += 1
        else:
            n_mistake += 1
    return n_correct / (n_correct + n_mistake)

targets = y_test  # shape `[n_data]`, dtype `int32`.
voted_predictions = voted_predictions.astype('int32')
accuracy = get_accuracy(voted_predictions, targets)
print('Accuracy: ', accuracy)