In [2]:
#SETTING UP AN RBM
import tensorflow as tf
import numpy as np
import tensorflow.examples.tutorials.mnist.input_data as input_data
from PIL import Image
from utils import tile_raster_images

In [3]:
#helper functions and constants
def sample_prob(probs):
    '''sample a probability distribtion (here, joint pdf)'''
    return tf.nn.relu(
        tf.sign(
            probs - tf.random_uniform(tf.shape(probs))))

alpha = 1.0
batchsize = 100

In [4]:
#extracting, unpacking data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
    mnist.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
#defining the network

#visible and hidden layers
X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])

#weights, biases
rbm_w = tf.placeholder("float", [784, 500]) #shared weights
rbm_vb = tf.placeholder("float", [784]) #generative biases
rbm_hb = tf.placeholder("float", [500]) #discriminative biases

In [6]:
#activations with sampling for beginning h0, forward pass v1 (discriminative), and backward pass h1 (generative)
h0 = sample_prob(tf.nn.sigmoid(tf.matmul(X, rbm_w) + rbm_hb))
v1 = sample_prob(tf.nn.sigmoid(
    tf.matmul(h0, tf.transpose(rbm_w)) + rbm_vb))
h1 = tf.nn.sigmoid(tf.matmul(v1, rbm_w) + rbm_hb)

#gradients and updates (need CD to approzimate log of intractable partition function!)
w_positive_grad = tf.matmul(tf.transpose(X), h0)
w_negative_grad = tf.matmul(tf.transpose(v1), h1)

update_w = rbm_w + alpha * \
    (w_positive_grad - w_negative_grad) / tf.to_float(tf.shape(X)[0])
update_vb = rbm_vb + alpha * tf.reduce_mean(X - v1, 0)
update_hb = rbm_hb + alpha * tf.reduce_mean(h0 - h1, 0)

#sampling probability distributions output from rbm on each end
h_sample = sample_prob(tf.nn.sigmoid(tf.matmul(X, rbm_w) + rbm_hb)) #generative

v_sample = sample_prob(tf.nn.sigmoid(
    tf.matmul(h_sample, tf.transpose(rbm_w)) + rbm_vb)) #discriminative

#error
err = X - v_sample
err_sum = tf.reduce_mean(err * err)

#layer initializations to 0, could try normal
n_vb = np.zeros([784], np.float32)
o_vb = np.zeros([784], np.float32)

n_hb = np.zeros([500], np.float32)
o_hb = np.zeros([500], np.float32)

n_w = np.zeros([784, 500], np.float32)
o_w = np.zeros([784, 500], np.float32)

In [7]:
#training the network
with tf.Session() as sess:
    #running the session, initializing
    init = tf.global_variables_initializer()
    sess.run(init)

    print(sess.run(
        err_sum, feed_dict={X: trX, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb}))
    
    #running batches of 100
    for start, end in zip(
            range(0, len(trX), batchsize), range(batchsize, len(trX), batchsize)):
        batch = trX[start:end]
        
        #updates for CD
        n_w = sess.run(update_w, feed_dict={
                       X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
        n_vb = sess.run(update_vb, feed_dict={
                        X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
        n_hb = sess.run(update_hb, feed_dict={
                        X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
        o_w = n_w
        o_vb = n_vb
        o_hb = n_hb
        
        #printing image and loss every 10,000
        if start % 10000 == 0:
            print(sess.run(
                err_sum, feed_dict={X: trX, rbm_w: n_w, rbm_vb: n_vb, rbm_hb: n_hb}))
            image = Image.fromarray(
                tile_raster_images(
                    X=n_w.T,
                    img_shape=(28, 28),
                    tile_shape=(25, 20),
                    tile_spacing=(1, 1)
                )
            )

    #display generated image at end of training
    image.show()

0.481281
0.242306
0.0946451
0.0789359
0.0714885
0.0677386
0.0641841
