In [1]:
import numpy as np
import tensorflow as tf

In [2]:
train = '/data/dumps/bitfpt.train.txt'
test = '/data/dumps/bitfpt.test.txt'

In [3]:
def batch_gen(data, batch_n):
    inds = range(data.shape[0])
    np.random.shuffle(inds)
    for i in xrange(data.shape[0] / batch_n):
        ii = inds[i*batch_n:(i+1)*batch_n]
        yield data[ii, :]

In [4]:
def buffered_gen(f, batch_n=1024, buffer_size=2000):
    inp = open(f)
    data = []
    for i, line in enumerate(inp):
        data.append(np.array(map(float, line.strip().split('\t')[1])))
        if (i+1) % (buffer_size * batch_n) == 0:
            bgen = batch_gen(np.vstack(data), batch_n)
            for batch in bgen:
                yield batch
            data = []
    else:
        bgen = batch_gen(np.vstack(data[:-1]), batch_n)
        for batch in bgen:
            yield batch

In [5]:
def he_initializer(size):
    return tf.random_normal_initializer(mean=0.0, stddev=np.sqrt(1. / size), seed=None, dtype=tf.float32)

In [6]:
def linear_layer(tensor, input_size, out_size, init_fn=he_initializer,):
    W = tf.get_variable('W', shape=[input_size, out_size], initializer=init_fn(input_size))
    b = tf.get_variable('b', shape=[out_size], initializer=tf.constant_initializer(0.1))
    return tf.add(tf.matmul(tensor, W), b)

def bn_layer(tensor, size, epsilon=0.0001):
    batch_mean, batch_var = tf.nn.moments(tensor, [0])
    scale = tf.get_variable('scale', shape=[size], initializer=tf.constant_initializer(1.))
    beta = tf.get_variable('beta', shape=[size], initializer=tf.constant_initializer(0.))
    return tf.nn.batch_normalization(tensor, batch_mean, batch_var, beta, scale, epsilon)

In [7]:
class VAE:
    def __init__(self,
                 gpu_config = None,
                 batch_size=1024, 
                 input_space=166,
                 middle_layers=None,
                 latent_space=10,
                 learning_rate=0.001,
                 activation_fn=tf.nn.relu,
                 initializer=he_initializer):
        
        self.batch_size = batch_size
        self.input_space = input_space
        if middle_layers is None:
            self.middle_layers = [256, 256]
        else:
            self.middle_layers = middle_layers
        self.latent_space = latent_space
        self.learning_rate = learning_rate
        self.activation_fn = activation_fn
        self.initializer = initializer

        tf.reset_default_graph()
        self.input_x = tf.placeholder(tf.float32, [None, input_space])
        
        self._create_network()
        self._loss()
        
        if gpu_config is None:
            gpu_config = tf.ConfigProto()
            gpu_config.gpu_options.per_process_gpu_memory_fraction = 0.4
        
        self.sess = tf.Session(config=gpu_config)
        self.init_net()

    def _create_network(self):
        with tf.variable_scope("encoder"):
            self.encoder_layers, self.z_mean, self.z_log_sigma_sq = self._encoder()
        
        with tf.variable_scope("sample"):
            self.eps = tf.random_normal((self.batch_size, self.latent_space), 0, 1, dtype=tf.float32)
            self.z = tf.add(self.z_mean, tf.multiply(tf.exp(tf.divide(self.z_log_sigma_sq, 2.0)), self.eps), name="z")
        
        with tf.variable_scope("decoder"):
            self.decoder_layers = self._decoder()
            self.decoded = self.decoder_layers[-1]
    
    def _encoder(self):
        with tf.variable_scope("layer-0"):
            encoder_layers = [linear_layer(self.input_x, self.input_space, self.middle_layers[0])]
            
        for i in xrange(len(self.middle_layers) - 1):
            with tf.variable_scope("layer-%i" % (i+1)):
                activated = self.activation_fn(encoder_layers[-1])
                normed = bn_layer(activated, self.middle_layers[i])
                next_layer = linear_layer(normed, self.middle_layers[i], self.middle_layers[i+1])
            encoder_layers.append(next_layer)
        
        with tf.variable_scope("latent"):
            activated = tf.nn.relu(encoder_layers[-1])
            with tf.variable_scope("mean"):
                z_mean = linear_layer(activated, self.middle_layers[-1], self.latent_space)
            with tf.variable_scope("log_sigma_sq"):
                z_log_sigma_sq = linear_layer(activated, self.middle_layers[-1], self.latent_space)
        
        return encoder_layers, z_mean, z_log_sigma_sq
    
    def _decoder(self):
        sizes = self.middle_layers[::-1] + [self.input_space]
        with tf.variable_scope("layer-0"):
            decoder_layers = [linear_layer(self.z, self.latent_space, sizes[0])]
            
        for i in xrange(len(sizes) - 1):
            with tf.variable_scope("layer-%i" % (i+1)):
                activated = self.activation_fn(decoder_layers[-1])
                normed = bn_layer(activated, sizes[i])
                next_layer = linear_layer(normed, sizes[i], sizes[i+1])
            decoder_layers.append(next_layer)
        
        return decoder_layers
        
    def _loss(self):
        elementwise_logloss = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.decoded, labels=self.input_x)
        batch_logloss = tf.reduce_sum(elementwise_logloss, 1)
        self.ae_loss = tf.reduce_mean(batch_logloss)
        
        self.latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square(self.z_mean) - \
                                                tf.exp(self.z_log_sigma_sq), 1)
        self.cost = tf.reduce_mean(tf.add(batch_logloss, self.latent_loss))
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)

    def generate(self, z_mu=None):
        """ Generate data by sampling from latent space.
        
        If z_mu is not None, data for this point in latent space is
        generated. Otherwise, z_mu is drawn from prior in latent 
        space.        
        """
        if z_mu is None:
            z_mu = np.random.normal(size=(self.batch_size, self.latent_space))
        # Note: This maps to mean of distribution, we could alternatively
        # sample from Gaussian distribution
        return self.sess.run(tf.nn.sigmoid(self.decoded), feed_dict={self.z: z_mu})

    def init_net(self):
        init = tf.global_variables_initializer()
        self.sess.run(init)
        
    def train(self, log):
        sess = self.sess
        saver = tf.train.Saver()
        hist = []
        
        for e in xrange(100):
            train_gen = buffered_gen(train, batch_n=self.batch_size)
            for batch_x in train_gen:
                sess.run(self.train_op, feed_dict={self.input_x: batch_x})
            else:
                print >> log, "epoch #%d" % (e+1)
                saver.save(sess, './fpt.vae.%de.model.ckpt' % e)
                
                test_gen = buffered_gen(test, batch_n=self.batch_size)
                
                test_x = test_gen.next()
                loss = sess.run(self.ae_loss, feed_dict={self.input_x: test_x})
                print >> log, "ae_loss: %f" % loss
                log.flush()
                hist.append(loss)
        return hist
    
    def load(self, model):
        saver = tf.train.Saver()
        saver.restore(self.sess, model)

In [8]:
vae = VAE(batch_size=1024)

In [None]:
with open('./fpt.vae.log', 'w') as log:
    hist1 = vae.train(log)

In [9]:
vae.load('./fpt.vae.20e.model.ckpt')

In [10]:
npzfile = np.load('./delaney.fpts.npz')

In [11]:
data = npzfile['data']

In [12]:
data.shape

(1144, 168)

In [13]:
encoded_m, encoded_s = vae.sess.run([vae.z_mean, vae.z_log_sigma_sq], feed_dict={vae.input_x: data[:, 2:]})

In [16]:
encoded = np.hstack([encoded_m, encoded_s])

In [17]:
np.save('./vae.encoded.fpt', [encoded])