In [1]:
import numpy as np
import tensorflow as tf

In [2]:
train = '/data/dumps/shuf.train.txt'
test = '/data/dumps/bitfpt.test.txt'

In [3]:
def batch_gen(data, batch_n):
    inds = range(data.shape[0])
    np.random.shuffle(inds)
    for i in xrange(data.shape[0] / batch_n):
        ii = inds[i*batch_n:(i+1)*batch_n]
        yield data[ii, :]

In [4]:
def buffered_gen(f, batch_n=1024, buffer_size=2000):
    inp = open(f)
    data = []
    for i, line in enumerate(inp):
        data.append(np.array(map(float, line.strip().split('\t')[1])))
        if (i+1) % (buffer_size * batch_n) == 0:
            bgen = batch_gen(np.vstack(data), batch_n)
            for batch in bgen:
                yield batch
            data = []
    else:
        bgen = batch_gen(np.vstack(data[:-1]), batch_n)
        for batch in bgen:
            yield batch

In [5]:
def load_test():
    with open(test) as inp:
        data = [np.array(map(float, line.strip().split('\t')[1])) for line in inp]
    return np.vstack(data)

In [6]:
def he_initializer(size):
    return tf.random_normal_initializer(mean=0.0, stddev=np.sqrt(1. / size), seed=None, dtype=tf.float32)

In [7]:
def linear_layer(tensor, input_size, out_size, init_fn=he_initializer,):
    W = tf.get_variable('W', shape=[input_size, out_size], initializer=init_fn(input_size))
    b = tf.get_variable('b', shape=[out_size], initializer=tf.constant_initializer(0.1))
    return tf.add(tf.matmul(tensor, W), b)

In [8]:
def bn_layer(tensor, size, epsilon=0.0001):
    batch_mean, batch_var = tf.nn.moments(tensor, [0])
    scale = tf.get_variable('scale', shape=[size], initializer=tf.constant_initializer(1.))
    beta = tf.get_variable('beta', shape=[size], initializer=tf.constant_initializer(0.))
    return tf.nn.batch_normalization(tensor, batch_mean, batch_var, beta, scale, epsilon)

In [9]:
def sample_prior(loc=0., scale=1., size=(64, 10)):
    return np.random.normal(loc=loc, scale=scale, size=size)

In [10]:
class AAE(object):
    def __init__(self,
                 gpu_config=None,
                 batch_size=1024, 
                 input_space=166,
                 latent_space=20,
                 middle_layers=None,
                 activation_fn=tf.nn.tanh,
                 learning_rate=0.001,
                 initializer=he_initializer):

        self.batch_size = batch_size
        self.input_space = input_space
        self.latent_space = latent_space
        if middle_layers is None:
            self.middle_layers = [256, 256]
        else:
            self.middle_layers = middle_layers
        self.activation_fn = activation_fn
        self.learning_rate = learning_rate

        self.initializer = initializer

        tf.reset_default_graph()
        
        self.input_x = tf.placeholder(tf.float32, [None, input_space])
        self.z_tensor = tf.placeholder(tf.float32, [None, latent_space])

        # Encoder net: 152->256->256->10
        with tf.variable_scope("encoder"):
            self.encoder_layers = self.encoder()
            self.encoded = self.encoder_layers[-1]
        
        # Decoder net: 10->256->256->152
        with tf.variable_scope("decoder"):
            self.decoder_layers = self.decoder(self.encoded)
            self.decoded = self.decoder_layers[-1]
            tf.get_variable_scope().reuse_variables()
            self.generator_layers = self.decoder(self.z_tensor)
            self.generated = tf.nn.sigmoid(self.generator_layers[-1])

        # Discriminator net: 10->64->64->8->1
        sizes = [64, 64, 8, 1]
        with tf.variable_scope("discriminator"):
            self.disc_layers_neg = self.discriminator(self.encoded, sizes)
            self.disc_neg = self.disc_layers_neg[-1]
            tf.get_variable_scope().reuse_variables()
            self.disc_layers_pos = self.discriminator(self.z_tensor, sizes)
            self.disc_pos = self.disc_layers_pos[-1]

        self.pos_loss = tf.nn.relu(self.disc_pos) - self.disc_pos + tf.log(1.0 + tf.exp(-tf.abs(self.disc_pos)))
        self.neg_loss = tf.nn.relu(self.disc_neg) + tf.log(1.0 + tf.exp(-tf.abs(self.disc_neg)))
        self.disc_loss = tf.reduce_mean(tf.add(self.pos_loss, self.neg_loss))
            
        self.enc_loss = tf.reduce_mean(tf.nn.relu(self.disc_neg) - self.disc_neg + tf.log(1.0 + tf.exp(-tf.abs(self.disc_neg))))
        batch_logloss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.decoded, labels=self.input_x), 1)
        self.dec_loss = tf.reduce_mean(batch_logloss)
        
        disc_ws = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='discriminator')
        enc_ws = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder')
        ae_ws = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder') + \
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='decoder')
            
        self.train_discriminator = tf.train.AdamOptimizer(self.learning_rate).minimize(self.disc_loss, var_list=disc_ws)
        self.train_encoder = tf.train.AdamOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_ws)
        self.train_autoencoder = tf.train.AdamOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=ae_ws)

        if gpu_config is None:
            gpu_config = tf.ConfigProto()
            gpu_config.gpu_options.per_process_gpu_memory_fraction = 0.4
        
        self.sess = tf.Session(config=gpu_config)
        self.init_net()
        
    def encoder(self):
        sizes = self.middle_layers + [self.latent_space]
        with tf.variable_scope("layer-0"):
            encoder_layers = [linear_layer(self.input_x, self.input_space, sizes[0])]
        for i in xrange(len(sizes) - 1):
            with tf.variable_scope("layer-%i" % (i+1)):
                activated = self.activation_fn(encoder_layers[-1])
                normed = bn_layer(activated, sizes[i])
                next_layer = linear_layer(normed, sizes[i], sizes[i+1])
            encoder_layers.append(next_layer)
            
        return encoder_layers

    def decoder(self, tensor):
        sizes = self.middle_layers[::-1] + [self.input_space]
        with tf.variable_scope("layer-0"):
            decoder_layers = [linear_layer(tensor, self.latent_space, sizes[0])]
        for i in xrange(len(sizes) - 1):
            with tf.variable_scope("layer-%i" % (i+1)):
                activated = self.activation_fn(decoder_layers[-1])
                normed = bn_layer(activated, sizes[i])
                next_layer = linear_layer(normed, sizes[i], sizes[i+1])
            decoder_layers.append(next_layer)
        
        return decoder_layers
    
    def discriminator(self, tensor, sizes):
        with tf.variable_scope("layer-0"):
            disc_layers = [linear_layer(tensor, self.latent_space, sizes[0])]
        for i in xrange(len(sizes) - 1):
            with tf.variable_scope("layer-%i" % (i+1)):
                activated = tf.nn.tanh(disc_layers[-1])
                normed = bn_layer(activated, sizes[i])
                next_layer = linear_layer(normed, sizes[i], sizes[i+1])
            disc_layers.append(next_layer)

        return disc_layers
    
    def init_net(self):
        init = tf.global_variables_initializer()
        self.sess.run(init)        
    
    def train(self, log):
        sess = self.sess
        saver = tf.train.Saver()
        hist = []
        test_data = load_test()
        
        for e in xrange(100):
            print >> log, "epoch #%d" % (e+1)
            log.flush()
            train_gen = buffered_gen(train, batch_n=self.batch_size)
            for i, batch_x in enumerate(train_gen):
                if i%3 == 0:
                    batch_z = sample_prior(scale=1.0, size=(len(batch_x), self.latent_space))
                    sess.run(self.train_discriminator, feed_dict={self.input_x: batch_x, self.z_tensor: batch_z})
                elif i%3 == 1:
                    sess.run(self.train_encoder, feed_dict={self.input_x: batch_x})
                else:
                    sess.run(self.train_autoencoder, feed_dict={self.input_x: batch_x})
                if i%10000 == 0:
                    batch_z = sample_prior(scale=1.0, size=(len(test_data), self.latent_space))
                    losses = sess.run([self.disc_loss, self.enc_loss, self.dec_loss],
                                      feed_dict={self.input_x: test_data, self.z_tensor: batch_z})
                    discriminator_loss, encoder_loss, decoder_loss = losses
                    print >> log, "disc: %f, encoder : %f, decoder : %f" % (discriminator_loss/2., encoder_loss, decoder_loss)
                    log.flush()
            else:
                saver.save(sess, './fpt.aae.%de.model.ckpt' % e)
                batch_z = sample_prior(scale=1.0, size=(len(test_data), self.latent_space))
                losses = sess.run([self.disc_loss, self.enc_loss, self.dec_loss],
                                  feed_dict={self.input_x: test_data, self.z_tensor: batch_z})


                discriminator_loss, encoder_loss, decoder_loss = losses
                print >> log, "disc: %f, encoder : %f, decoder : %f" % (discriminator_loss/2., encoder_loss, decoder_loss)
                log.flush()
                hist.append(decoder_loss)
        return hist
    
    def load(self, model):
        saver = tf.train.Saver()
        saver.restore(self.sess, model)

In [11]:
aae = AAE(batch_size=1024)
#hist1 = aae.train()

In [None]:
with open('./fpt.aae.log', 'w') as log:
    hist1 = aae.train(log)

In [35]:
aae.load('./fpt.aae.2e.model.ckpt')

In [26]:
ws = aae.sess.run(tf.trainable_variables())

In [27]:
ws

[array([[-0.02988241,  0.0789177 ,  0.06428494, ..., -0.05050565,
          0.02660783,  0.07714392],
        [ 0.13195825,  0.20851606,  0.11670304, ..., -0.09745704,
         -0.01301303,  0.14002393],
        [-0.09290713,  0.15929775,  0.23436712, ...,  0.13923636,
         -0.05457636,  0.10620905],
        ..., 
        [ 0.07905301,  0.4065133 , -0.02141998, ..., -0.15664491,
         -0.07319986, -0.07860199],
        [ 0.6967414 ,  0.17786947, -0.01133258, ..., -0.08539762,
         -0.14626184,  0.12514989],
        [ 0.03546661,  0.0097551 , -0.08404708, ...,  0.07114545,
         -0.03902735, -0.05109206]], dtype=float32),
 array([ 0.88371348,  1.03677714, -0.00928048, -0.22330049,  0.45081276,
         1.53937411,  0.26727837, -0.37089479,  0.53884244,  0.48833209,
        -0.09171111, -0.05464429, -3.50060058,  0.45245078,  0.3595593 ,
         0.76105917,  1.73395813,  0.15793318, -1.22789121, -2.28505898,
         0.16804178, -1.01070058,  0.74529505,  1.95301104,  0.00

In [28]:
test_data = load_test()

In [29]:
len(test_data)

71880

In [30]:
batch_z = sample_prior(scale=1.0, size=(len(test_data), aae.latent_space))
losses = aae.sess.run([aae.disc_loss/2., aae.enc_loss, aae.dec_loss],
                  feed_dict={aae.input_x: test_data, aae.z_tensor: batch_z})

In [32]:
losses

[0.6931473, 0.69316441, 75.652428]

In [28]:
train_gen = buffered_gen(train, batch_n=aae.batch_size)

In [30]:
batch_x = train_gen.next()

In [39]:
batch_z = sample_prior(scale=1.0, size=(aae.batch_size, aae.latent_space))

In [31]:
batch_x.shape

(1024, 166)

In [32]:
batch_x[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        1.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,
        1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,
        0.,  1.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,  0.])

In [37]:
enc = aae.sess.run(aae.encoded, feed_dict={aae.input_x: test_data})

In [38]:
enc.shape

(71880, 20)

In [41]:
np.mean(enc, 0)

array([-0.06156453,  0.56594604, -0.20208864, -0.49380505, -0.62298375,
        1.70972967,  0.73637378,  0.62847859, -0.52843928,  1.57693613,
        0.30669901, -0.16800718, -0.10944746, -0.26041141,  0.12158084,
        1.17953598, -0.4970009 , -0.04014739, -0.17497876,  1.85869741], dtype=float32)

In [42]:
np.std(enc, 0)

array([ 1.71210229,  2.14550352,  1.79688513,  1.63827419,  1.67093217,
        1.66237223,  1.72700679,  2.06072617,  1.93848264,  1.95043862,
        2.21549845,  1.79797781,  2.44386959,  1.65755057,  1.63922477,
        1.97857976,  1.74846339,  2.11193728,  2.56701946,  1.94563019], dtype=float32)

In [36]:
dec = aae.sess.run(aae.decoded, feed_dict={aae.input_x: batch_x})

In [37]:
dec

array([[ 1.14352655, -0.10726269, -0.36635625, ..., -0.90651453,
         0.07476324,  1.34869421],
       [ 0.81175113, -0.24366948,  0.3815487 , ...,  1.0751555 ,
         0.79615617,  1.05575943],
       [-0.47874931,  1.0017159 ,  1.01733637, ...,  0.18819515,
        -0.33772081, -0.7150383 ],
       ..., 
       [-1.10746121,  0.25566137, -0.79130036, ...,  0.31022674,
        -0.38892552,  0.77254754],
       [-1.96231949,  0.84362745, -0.16631824, ..., -0.00241159,
         1.37921131,  1.3523339 ],
       [-0.75460851,  1.25448716,  1.31985617, ...,  1.49427366,
         0.24458474, -0.60305464]], dtype=float32)

In [40]:
losses = aae.sess.run([aae.disc_loss, aae.enc_loss, aae.dec_loss],
                  feed_dict={aae.input_x: batch_x, aae.z_tensor: batch_z})

In [41]:
losses

[1.7572014, 0.81400335, 137.1125]

In [42]:
aae.sess.run(aae.train_discriminator, feed_dict={aae.input_x: batch_x, aae.z_tensor: batch_z})

In [1]:
weights = aae.sess.run(tf.trainable_variables())
weights

NameError: name 'aae' is not defined

In [45]:
batch_x = train_gen.next()
aae.sess.run(aae.train_encoder, feed_dict={aae.input_x: batch_x})

In [48]:
batch_x = train_gen.next()
aae.sess.run(aae.train_autoencoder, feed_dict={aae.input_x: batch_x})

In [30]:
np.mean(gm)

0.33970061

In [31]:
np.sum(gm * tm / np.sqrt(np.sum(gm * gm)) / np.sqrt(np.sum(tm * tm)))

0.99872087572005985

In [32]:
gv = np.std(gen, 0)
tv = np.std(train, 0)

In [33]:
np.mean(gv)

0.25929013

In [20]:
np.sum(gv * tv / np.sqrt(np.sum(gv * gv)) / np.sqrt(np.sum(tv * tv)))

0.98712256425707801

In [59]:
data = np.vstack([test, train])

In [60]:
data.shape

(9003, 152)

In [61]:
lg_plus = np.log(np.maximum(gen.T, 0.0001))
lg_minus = np.log(np.maximum(1.-gen.T, 0.0001))

In [62]:
ll = np.matmul(data, lg_plus) + np.matmul(1-data, lg_minus)

In [63]:
ll.shape

(9003, 1000)

In [64]:
len(set(np.argmax(ll, 0)))

834

In [65]:
np.max(ll)

-9.3983239759245976

In [66]:
np.mean(np.sort(np.max(ll, 0))[-10:])

-11.125198687051745

In [67]:
np.mean(np.max(ll, 0))

-32.793119482221705

In [68]:
lat = aae.sess.run(aae.encoded, feed_dict={aae.input_x: test})

In [69]:
lat.shape

(1000, 10)

In [70]:
np.mean(lat)

-0.16402109

In [71]:
np.std(lat)

5.4336224

In [72]:
np.std(sample_prior(scale=5.5, size=(640, 10)))

5.5231681402600721