In [3]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import datetime

In [4]:
Normal = tf.distributions.Normal
Bernoulli = tf.distributions.Bernoulli

In [5]:
# Set Eager API
#print("Setting Eager mode...")
#tf.enable_eager_execution()
#tfe = tf.contrib.eager

### Hyper-parameters

### Import MNIST

In [6]:
mnist = tf.keras.datasets.mnist

### Load data

In [7]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X = np.concatenate((X_train, X_test), axis=0)
X = np.reshape(X, (X.shape[0], -1))
y = np.concatenate((y_train, y_test)).astype('float32')

In [8]:
label_reshaped = y.reshape(len(y), 1)
onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
onehot_encoded = onehot_encoder.fit_transform(label_reshaped)

X = (X > 0.5).astype(np.float32)

data = np.concatenate((X, onehot_encoded), axis=1)
train_data, test_data = train_test_split(data, test_size=0.1, random_state=40)
train_data=train_data.astype('float')
test_data=test_data.astype('float')

In [17]:
y_dim=onehot_encoded.shape[1]

### Utility functions

In [9]:
def get_log_folder():
    now = datetime.datetime.now()
    day_of_the_year=now.strftime("%d_%m_%Y")
    suff = now.strftime("%Y%m%d%H%M%S")
    #print(now.strftime("%Y%m%d%H%M%S"))
    log_folder = "/tmp/class_vae/deep/{}/{}".format(day_of_the_year,suff)
    return log_folder

### Build neural network

In [10]:
class DenseLayer:

    def __init__(self, in_dim, out_dim, f=tf.nn.relu, name="dense"):
        self.name = name
        with tf.name_scope(self.name):
            self.f = f
            self.W = tf.Variable(tf.truncated_normal(shape=(in_dim, out_dim), stddev=0.1), name="W")
            self.b = tf.Variable(tf.constant(0.1, shape=[out_dim]), name="bias")
            tf.summary.histogram("weights", self.W)
            tf.summary.histogram("bias", self.b)

    def forward(self, X):
        with tf.name_scope(self.name):
            act = self.f(tf.matmul(X, self.W) + self.b)
            tf.summary.histogram("activation", act)
            return act

In [24]:
class VClassifier:

    def encode(self, X, input_dim, hidden_dims):
        encoder_layers = []
        in_dim = input_dim
        for h_dim in hidden_dims[:-1]:
            h = DenseLayer(in_dim, h_dim)
            encoder_layers.append(h)
            in_dim = h_dim

        middle_layer_dim = hidden_dims[-1]
        encoder_layers.append(DenseLayer(in_dim, 2 * middle_layer_dim, f=lambda x: x))

        current_value = X
        for layer in encoder_layers:
            current_value = layer.forward(current_value)

        means = current_value[:, :middle_layer_dim]
        stdevs = tf.nn.softplus(current_value[:, middle_layer_dim:]) + 1e-6
        return means, stdevs

    def decode(self, Z, output_dim, hidden_dims):
        decoder_layers = []

        in_dim = hidden_dims[-1]
        for hidden_dim in reversed(hidden_dims[:-1]):
            h = DenseLayer(in_dim, hidden_dim)
            decoder_layers.append(h)
            in_dim = hidden_dim

        decoder_layers.append(DenseLayer(in_dim, output_dim, f=lambda x: x))

        current_value = Z
        for decoder_layer in decoder_layers:
            current_value = decoder_layer.forward(current_value)

        return current_value
    
    def calculateKL(self, mean, std):
        inner = 1 + tf.math.log(std) - mean ** 2 - std ** 2
        kls = tf.math.reduce_sum(inner, axis=1)
        return tf.math.reduce_mean(kls)
    
    def create_accuracy_node(self, y_true, y_pred):
        correct_mask_node = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
        accuracy_node = tf.reduce_mean(tf.cast(correct_mask_node, tf.float32))
        return accuracy_node

    def __init__(self, x_dim, y_dim, hidden_dims, log_folder):
        self.x_dim = x_dim
        self.xy_dim = x_dim + y_dim
        tf.reset_default_graph()
        self.X = tf.placeholder(tf.float32, shape=(None, x_dim), name="x")
        self.y = tf.placeholder(tf.float32, shape=(None, y_dim), name="xy")

        # with tf.name_scope('input_reshape'):
        #    image_shaped_input = tf.reshape(self.X, [-1, 28, 28, 1])
        #    tf.summary.image('input', image_shaped_input, 10)

        #encoder
        means, stdevs = self.encode(self.X, x_dim, hidden_dims)

        n = Normal(
          loc=means,
          scale=stdevs,
        )
        Z = n.sample()

        #decoder
        self.logits = self.decode(Z, y_dim, hidden_dims)

        self.Y_hat_distribution = Bernoulli(logits=self.logits)
        #self.posterior_predictive = self.Y_hat_distribution.sample()

        #with tf.name_scope('sample_output_reshaped'):
        #    posterior_predictive_reshaped = tf.reshape(self.posterior_predictive, [-1, 28, 28, 1])
        #    tf.summary.image('sample_output', tf.cast(posterior_predictive_reshaped, tf.float32), 10)

        self.posterior_predictive_probs = tf.nn.sigmoid(self.logits)

        # with tf.name_scope('probs_output_reshaped'):
        #     posterior_predictive_probs_reshaped = tf.reshape(self.posterior_predictive_probs[:,0:x_dim], [-1, 28, 28, 1])
        #     tf.summary.image('probs_output', posterior_predictive_probs_reshaped, 10)

        with tf.name_scope('COST'):
            expected_log_likelihood = tf.reduce_sum(
                self.Y_hat_distribution.log_prob(self.y),
                axis=1
            )

            #tf.summary.scalar("Expected log-likelihood", tf.reduce_sum(expected_log_likelihood))

            kl = self.calculateKL(means, stdevs)
            tf.summary.scalar("KL", kl)

            exp_loglik = tf.reduce_mean(expected_log_likelihood)
            tf.summary.scalar("loglik", exp_loglik)

            elbo = exp_loglik #+ kl
            tf.summary.scalar("ELBO", elbo)

        self.accuracy_node = self.create_accuracy_node(self.y, self.posterior_predictive_probs)
        tf.summary.scalar("accuracy", self.accuracy_node)
            
        self.train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(-elbo)
        #self.train_op = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(-elbo)

        self.init_op = tf.global_variables_initializer()
        self.sess = tf.InteractiveSession()
        self.sess.run(self.init_op)

        self.merged_summary = tf.summary.merge_all()

        self.writer_train = tf.summary.FileWriter(log_folder)
        self.writer_train.add_graph(self.sess.graph)

    def fit(self, X_train, X_test, epochs=30, batch_sz=64):
        #costs=[]
        n_batches = len(X_train) // batch_sz
        print("n_batches:", n_batches)

        iter = 1
        for i in range(epochs):
            print("epoch: %d" % i)
            np.random.shuffle(X_train)
            for j in range(n_batches):
                batch = X_train[j * batch_sz:(j + 1) * batch_sz]
                #print(batch.shape)
                #print("x.shape={}".format(batch[:,0:784].shape))
                #print("y.shape={}".format(batch[:,784:794].shape))
                self.sess.run(self.train_op, feed_dict={self.X: batch[:,0:self.x_dim], self.y: batch[:,self.x_dim:self.xy_dim]})
                #c /= batch_sz
                #costs.append(-c)
                if j % 100 == 0:
                    s = self.sess.run(self.merged_summary, feed_dict={self.X: batch[:,0:self.x_dim], self.y: batch[:,self.x_dim:self.xy_dim]})
                    self.writer_train.add_summary(s, iter)
                    #print("iter: %d, cost: %.3f" % (j, c))
                iter += 1
            train_accuracy = self.calculate_accuracy(X_train, self.x_dim)
            print("Train accuracy {}".format(train_accuracy))
            test_accuracy = self.calculate_accuracy(X_test, self.x_dim)
            print("Test accuracy {}".format(test_accuracy))

        # plt.plot(costs)
        # plt.show()
        
    def calculate_accuracy(self, Xy, x_dim):
        accuracy = self.sess.run(self.accuracy_node, feed_dict={self.X: Xy[:,0:x_dim], self.y: Xy[:,self.x_dim:self.xy_dim]})
        return accuracy
        
    def predict(self, Xy, x_dim, xy_dim):
        y_pred = self.sess.run(self.posterior_predictive_probs_y, feed_dict={self.X: Xy[:,0:x_dim], self.y: Xy[:,self.x_dim:self.xy_dim]})
        return y_pred

    def predict_probs(self, X):
        return self.sess.run(self.posterior_predictive_probs, feed_dict={self.X: X})
    
    
    

In [26]:
log_folder = get_log_folder()
print('Starting autoencoder. Log folder={}'.format(log_folder))
model = VClassifier(x_dim=X.shape[1], y_dim=y_dim, hidden_dims=[1024, 512, 256, 128, 64], log_folder=log_folder)
model.fit(train_data, test_data, epochs=10)

Starting autoencoder. Log folder=/tmp/class_vae/deep/10_02_2019/20190210182124




n_batches: 984
epoch: 0
Train accuracy 0.9585396647453308
Test accuracy 0.9438571333885193
epoch: 1
Train accuracy 0.9715079069137573
Test accuracy 0.959857165813446
epoch: 2
Train accuracy 0.9731746315956116
Test accuracy 0.9584285616874695
epoch: 3
Train accuracy 0.9770317673683167
Test accuracy 0.9582856893539429
epoch: 4
Train accuracy 0.9847142696380615
Test accuracy 0.9677143096923828
epoch: 5
Train accuracy 0.9828730225563049
Test accuracy 0.9645714163780212
epoch: 6
Train accuracy 0.9886031746864319
Test accuracy 0.9712857007980347
epoch: 7
Train accuracy 0.9893015623092651
Test accuracy 0.9729999899864197
epoch: 8
Train accuracy 0.9895238280296326
Test accuracy 0.9728571176528931
epoch: 9
Train accuracy 0.990746021270752
Test accuracy 0.972000002861023


In [27]:
model.calculate_accuracy(train_data, x_dim=X.shape[1])

0.99073017

In [28]:
model.calculate_accuracy(test_data, x_dim=X.shape[1])


0.97185713