In [None]:
""" Auto Encoder Example.
Using an auto encoder on MNIST handwritten digits.
References:
    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
    learning applied to document recognition." Proceedings of the IEEE,
    86(11):2278-2324, November 1998.
Links:
    [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
"""

In [None]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.contrib import rnn as contrib_rnn
from tensorflow.python.framework import ops
import tensorflow.contrib.slim as slim


# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

In [None]:
# Set both these false to start training
generate_from_onehots = False  #set true to generate from custom one hots while testing
_test_ = False

def deconv2d(x, W, stride):
    strides=[1, stride, stride, 1];
    inshape = x.get_shape().as_list();
    kernel_shape = W.get_shape().as_list();
    output_shape = tf.pack([tf.shape(x)[0], tf.shape(x)[1]*strides[1], tf.shape(x)[2]*strides[2], kernel_shape[2]]);
    #print(output_shape);
    return tf.nn.conv2d_transpose(x, W, output_shape=output_shape, strides=strides, padding='SAME')
    

def weight_variable(shape, name=None):
    #initial = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
    #initial = tf.get_variable(name=name, shape=shape, regularizer=tf.contrib.layers.l2_regularizer(0.005))
    initial = tf.get_variable(name, shape=shape,
                        initializer=tf.contrib.layers.xavier_initializer())
    return initial

def bias_variable(shape, name=None):
    #initial = tf.constant(0.1, shape=shape)
    #return tf.Variable(initial, name=name)
    return tf.get_variable(name, shape=shape,
                        initializer=tf.constant_initializer(0.1))

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
    
    # Need to generate a unique name to avoid duplicates:
    rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
    
    tf.RegisterGradient(rnd_name)(grad)  # see _MySquareGrad for grad example
    g = tf.get_default_graph()
    with g.gradient_override_map({"PyFunc": rnd_name}):
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)

def mycustomfunc(x):
    one_hot_size_ = x.shape[-1]
    probs = tf.constant(x)
    probs = tf.reshape(probs,[-1, one_hot_size_])
    if _test_:
        oneHots = tf.one_hot(tf.argmax(probs, 1), one_hot_size_)
    else:
        logits = tf.log(probs/(1-probs))
        indexes = tf.multinomial(logits, 1)
        oneHots = tf.one_hot(indexes, one_hot_size_)
    shape_ = list(x.shape)
    oneHots = tf.reshape(oneHots, shape_)
    sess_ = tf.Session()
    with sess_.as_default():
       ret_ = oneHots.eval()
    return ret_
    #np.random.choice(np.arange(2), p=x)
    
def myonehot(x, name=None):
    with ops.name_scope(name, "Myonehot", [x]) as name:
        sqr_x = py_func(mycustomfunc,
                        [x],
                        [tf.float32],
                        name=name,
                        grad=_MyonehotGrad)  # <-- here's the call to the gradient
        return tf.reshape(sqr_x[0], tf.shape(x))
    
def _MyonehotGrad(op, grad):
    x = op.inputs[0]
    return grad#tf.constant(1.0, shape=shape_)#grad * 20 * x

def _getBinary(x):
    if _test_:
        return x>0.5
    return np.random.choice(np.arange(2), p=[1.0-x,x])

def mycustombinarizer(x):
    if _test_:
        return x>0.5
    """binary_x = np.float32(np.reshape(np.array(map((lambda a: _getBinary(a)), x.reshape(x.size))),x.shape))"""
    sess_ = tf.Session()
    probs = tf.constant(x)
    probs = tf.reshape(probs,[-1])
    probs = tf.pack([1-probs, probs], axis=1)
    probs = tf.log(probs/(1-probs))
    indexes = tf.multinomial(probs, 1)
    indexes = tf.cast(tf.reshape(indexes, list(x.shape)),tf.float32)
    with sess_.as_default():
       binary_x = indexes.eval()
    return binary_x

def binarizer(x, name=None):
    with ops.name_scope(name, "Binarizer", [x]) as name:
        sqr_x = py_func(mycustombinarizer,
                        [x],
                        [tf.float32],
                        name=name,
                        grad=_MyBinarizerGrad)  # <-- here's the call to the gradient
        return tf.reshape(sqr_x[0], tf.shape(x))
    
def _MyBinarizerGrad(op, grad):
    x = op.inputs[0]
    return grad

def newmyonehot(x):
    one_hot_size_ = x.get_shape().as_list()
    one_hot_size_ = one_hot_size_[-1];
    probs = tf.reshape(x,[-1, one_hot_size_])
    logits = tf.log(probs/(1-probs))
    indexes = tf.multinomial(logits, 1)
    oneHots = tf.one_hot(indexes, one_hot_size_)
    oneHots = tf.reshape(oneHots, tf.shape(x))    
    return oneHots

def makeonehot(x):
    t = tf.identity(x)
    oneHots = t + tf.stop_gradient(newmyonehot(x) - t)
    return oneHots

#im = tf.constant(0.1, shape=[1,5,5,1])
#net = slim.conv2d(im, 1,[3, 3], scope='aaa')
#net = slim.conv2d(net, 1,[3, 3], scope='aaa')



In [None]:
# Parameters
learning_rate = 0.0003
training_epochs = 500
batch_size = 256
display_step = 1
examples_to_show = 10

# Network Parameters
#n_hidden_1 = 256 # 1st layer num features
#n_hidden_2 = 128 # 2nd layer num features
n_fc1 = 1024
n_fc2 = 256
n_fc3 = 3
n_fc4 = 64
n_fc5 = 256
rnn_size = 256
max_length = 5
one_hot_size = 26
n_input = 784 # MNIST data input (img shape: 28*28)

# tf Graph input (only pictures)
X = tf.placeholder(tf.float32, shape=[None, n_input])
TrainTest = tf.placeholder(tf.float32,shape=[])
y_classify = tf.placeholder(tf.float32, [None, 10])
onehot_test = tf.placeholder(tf.int32, shape=[None, None, one_hot_size])
epoch_no = tf.placeholder(tf.int32,shape=[])
max_epoch = tf.placeholder(tf.int32,shape=[])

In [None]:
def encoder_FC(x, netname, reuse):
    with tf.variable_scope(netname+'encoder_FC') as scope:
        if reuse == True:
            scope.reuse_variables()
        x_image = tf.reshape(x, [-1,28,28,1])
        h_conv1 = tf.nn.relu(slim.conv2d(x_image, 32, [5, 5], scope='conv1'))
        #h_conv1 = tf.nn.relu(conv2d(x_image, weight_variable([5, 5, 1, 32],'c1')) + bias_variable([32]))
        h_pool1 = slim.max_pool2d(h_conv1, [2, 2], scope='pool1')
        #h_pool1 = max_pool_2x2(h_conv1)
        h_conv2 = tf.nn.relu(slim.conv2d(h_pool1, 64, [5, 5], scope='conv2'))
        #h_conv2 = tf.nn.relu(conv2d(h_pool1, weight_variable([5, 5, 32, 64],'c2')) + bias_variable([64]))
        h_pool2 = slim.max_pool2d(h_conv2, [2, 2], scope='pool2')
        #h_pool2 = max_pool_2x2(h_conv2)
        h_pool2_flat = slim.flatten(h_pool2, scope='pool2_flat')
        #h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(slim.fully_connected(h_pool2_flat, n_fc1, scope='fc1'))
        #h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, weight_variable([7*7*64, n_fc1],'fc1')) + bias_variable([n_fc1]))
        h_fc2 = slim.fully_connected(h_fc1, n_fc2, scope='fc2', activation_fn=None)
        #h_fc2 = tf.nn.relu(tf.matmul(h_fc1, weight_variable([n_fc1,n_fc2],'fc2')) + bias_variable([n_fc2]))
        return h_fc2
    
def encoder_RNN(x, netname, reuse):
    with tf.variable_scope(netname+'encoder_RNN') as scope:
        if reuse == True:
            scope.reuse_variables()
        x = tf.cast(x,tf.float32)
        data = tf.pack([x]*max_length);
        data = tf.transpose(data,[1,0,2])
        cell_encoder = tf.nn.rnn_cell.GRUCell(rnn_size)
        output, _ = tf.nn.dynamic_rnn(cell_encoder, data, dtype=tf.float32)
        output = tf.reshape(output, [-1, rnn_size])
        rnn_predictions = tf.nn.softmax(slim.fully_connected(output, one_hot_size, scope='rnn_softmax'))
        rnn_predictions = tf.reshape(rnn_predictions, [-1, max_length, one_hot_size])
        rnn_one_hots = makeonehot(rnn_predictions)
        print (rnn_one_hots.get_shape())
        return rnn_one_hots, rnn_predictions
    
def encoder(x, netname, reuse):
    h_fc2_ = encoder_FC(x, netname, reuse)
    rnn_one_hots_, rnn_predictions_ = encoder_RNN(h_fc2_, netname, reuse)
    return rnn_one_hots_, h_fc2_, rnn_predictions_
    

# Building the decoder
def decoder_FC(x, netname, train, reuse):
    with tf.variable_scope(netname+'decoder_FC')as scope:
        if reuse == True:
            scope.reuse_variables()
        x = tf.cast(x,tf.float32)
        x = x + (tf.random_uniform(tf.shape(x))-0.5)*train
        h_fc4 = tf.nn.relu(slim.fully_connected(x, n_fc1, scope='fc4'))
        #h_fc4 = tf.nn.relu(tf.matmul(x, weight_variable([n_fc2,n_fc1],'fc4')) + bias_variable([n_fc1]))
        h_fc5 = tf.nn.relu(slim.fully_connected(x, 7*7*64, scope='fc5'))
        #h_fc5 = tf.nn.relu(tf.matmul(x, weight_variable([n_fc2,7*7*64],'fc5')) + bias_variable([7*7*64]))
        h_fc5_square = tf.reshape(h_fc5, [-1, 7, 7, 64])
        #h_deconv1 = tf.nn.relu(slim.conv2d_transpose(h_fc5_square, 32, [5, 5], stride=2, scope='deconv1'))        
        h_deconv1 = tf.nn.relu(deconv2d(h_fc5_square, weight_variable([5, 5, 32, 64],'deconv1'), 2) + bias_variable([32],'deconv1_b'))
        #h_deconv2 = tf.nn.sigmoid(slim.conv2d_transpose(h_deconv1, 1, [5, 5], stride=2, scope='deconv2'))
        h_deconv2 = tf.nn.sigmoid(deconv2d(h_deconv1, weight_variable([5, 5, 1, 32],'deconv2'), 2) + bias_variable([1],'deconv2_b'))
        return tf.reshape(h_deconv2, [-1, 784])
    
   
# Building the decoder
def decoder_RNN(x, netname, reuse):
    with tf.variable_scope(netname+'decoder_RNN')as scope:
        if reuse == True:
            scope.reuse_variables()
        cell_decoder = tf.nn.rnn_cell.GRUCell(rnn_size)
        output_dec, _ = tf.nn.dynamic_rnn(cell_decoder, tf.cast(x,tf.float32), dtype=tf.float32)
        output_dec = tf.transpose(output_dec, [1, 0, 2])
        output_dec = tf.gather(output_dec, tf.shape(output_dec)[0] - 1)
        #output_dec = tf.nn.relu(tf.matmul(output_dec, weights['decoder_W_fc3']) + biases['decoder_b_fc3'])
        output_dec = slim.fully_connected(output_dec, n_fc5, scope='dfc3')
        output_dec = slim.fully_connected(output_dec, n_fc5, scope='dfc4',activation_fn=None)
        return output_dec

    
def decoder_RNN_FC(x, netname, reuse):
    output_dec = decoder_RNN(x, netname, reuse);
    return decoder_FC(output_dec, netname, 0, reuse);

In [None]:
if generate_from_onehots == False:
    # Construct model
    _1_hot_net1, encoder_FC_256_net1, predprob_net1 = encoder(X, 'net1', False)
    decoder_RNN_256_net1 = decoder_RNN(_1_hot_net1, 'net1', False)
    #encoder_op_net1 = encoder_FC(X, 'net1', False)
    decoder_FC_image_net1 = decoder_FC(encoder_FC_256_net1, 'net1', TrainTest, False)
    decoder_RNN_FC_image_net1 = decoder_RNN_FC(_1_hot_net1, 'net1', True)
    
    
    _1_hot_net2, encoder_FC_256_net2, predprob_net2 = encoder(X, 'net2', False)
    decoder_RNN_256_net2 = decoder_RNN(_1_hot_net2, 'net2', False)
    decoder_FC_image_net2 = decoder_FC(encoder_FC_256_net2, 'net2', TrainTest, False)
    decoder_RNN_FC_image_net2 = decoder_RNN_FC(_1_hot_net2, 'net2', True)

    # Prediction
    y_pred_image_net1 = decoder_FC_image_net1
    #y_pred_image_net1 = decoder_RNN_FC_image_net1
    y_pred_RNN_256_net1 = decoder_RNN_256_net1
    
    y_pred_image_net2 = decoder_FC_image_net2
    #y_pred_image_net2 = decoder_RNN_FC_image_net2
    y_pred_RNN_256_net2 = decoder_RNN_256_net2
    # Targets (Labels) are the input data.
    y_true_image = X

else:
    decoder_op = decoder_RNN(onehot_test)

In [None]:
if generate_from_onehots == False:
     # Define loss and optimizer, minimize the squared error
    #cost = cost1+cost2
    #_1_hot__ = tf.reshape(_1_hot, [-1, one_hot_size])   # this is weird .. uncommenting this line prevents convergence
    #cost3 = tf.reduce_mean(tf.pow(1-(tf.reduce_sum(_1_hot__,reduction_indices=[1])),2))
    #cost3 = tf.reduce_mean(-tf.log(tf.reduce_sum(_1_hot__,reduction_indices=[1])))
    cost_image_net1 = tf.reduce_mean(tf.pow(y_true_image - y_pred_image_net1, 2))
    cost_image_net2 = tf.reduce_mean(tf.pow(y_true_image - y_pred_image_net2, 2))
    
    
    cost_256_l2_net1 = tf.pow(encoder_FC_256_net1 - y_pred_RNN_256_net1, 2)
    cost_256_l1_net1 = tf.abs(encoder_FC_256_net1 - y_pred_RNN_256_net1)
    cost_256_l2_net2 = tf.pow(encoder_FC_256_net2 - y_pred_RNN_256_net2, 2)
    cost_256_l1_net2 = tf.abs(encoder_FC_256_net2 - y_pred_RNN_256_net2)
##    (1-sigmoid(1500(abs(x)-1)))*abs(x) + (sigmoid(1500(abs(x)-1)))*(x*x/2+0.5)
#     cost_256_net1= tf.reduce_mean(
#                     (1.0-tf.sigmoid(1500*(cost_256_l1_net1-1.0)))*cost_256_l1_net1 + 
#                     (  tf.sigmoid(1500*(cost_256_l1_net1-1.0)))*(cost_256_l2_net1/2.0+0.5)
#                     )
#     cost_256_net2= tf.reduce_mean(
#                     (1.0-tf.sigmoid(1500*(cost_256_l1_net2-1.0)))*cost_256_l1_net2 + 
#                     (  tf.sigmoid(1500*(cost_256_l1_net2-1.0)))*(cost_256_l2_net2/2.0+0.5)
#                     )

    cost_256_net1= tf.reduce_mean(cost_256_l2_net1)
    cost_256_net2= tf.reduce_mean(cost_256_l2_net2)    
    #cost_1hot_match = tf.reduce_mean(tf.pow(_1_hot_net1 - _1_hot_net2, 2))
    cost_1hot_match = tf.reduce_mean(tf.pow(predprob_net1 - predprob_net2, 2))

    
    xv=tf.cast(epoch_no,tf.float32)
    xm=tf.cast(max_epoch,tf.float32)
#     startLow= tf.sigmoid((xv-xm/2)/5)
    startLow= 2
    startHigh= 1 
    
    cost = cost_image_net1 + cost_256_net1 + cost_image_net2 + cost_256_net2 + cost_1hot_match
    costdisjoint = cost_image_net1*startHigh + cost_256_net1*startLow + cost_image_net2*startHigh + cost_256_net2*startLow
    totalcost = costdisjoint+cost_1hot_match
    #cost = cost + tf.reduce_mean(-tf.reduce_sum(y_classify * tf.log(y_pred_classify), reduction_indices=[1]))
    #optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
    #optimizerdj = tf.train.AdamOptimizer(learning_rate).minimize(costdisjoint)
    
    var1s = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net1encoder_FC')
    var1s = var1s+tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net1decoder_FC')
    
    var2s = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net1encoder_RNN')
    var2s = var2s+tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net1decoder_RNN')
    optimizer1 = tf.train.AdamOptimizer(learning_rate).minimize(
        cost_image_net1 + cost_256_net1, var_list=var1s+var2s)
    #optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(cost2,var_list=var2s)
    
    var3s = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net2encoder_FC')
    var3s = var3s+tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net2decoder_FC')
    
    var4s = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net2encoder_RNN')
    var4s = var4s+tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net2decoder_RNN')
    optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(
        cost_image_net2 + cost_256_net2, var_list=var3s+var4s)
    #optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(cost4,var_list=var4s)
    
    var5s = var2s+var4s
    optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(
        cost_1hot_match + cost_256_net1 + cost_256_net2, var_list=var5s)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    #cost2 = tf.reduce_mean(tf.pow(decoder_op_one_hot_only - y_pred, 2))
    optimizerdj = tf.train.AdamOptimizer(learning_rate).minimize(costdisjoint,var_list = var1s+var3s+var2s+var4s)
    optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(totalcost,var_list = var1s+var3s+var2s+var4s)


In [None]:
# Initializing the variables
init = tf.initialize_all_variables()

In [None]:
# Launch the graph
# Using InteractiveSession (more convenient while using Notebooks)
sess = tf.InteractiveSession()
sess.run(init)


In [None]:
def drawoutput():
    encode_decode, encode_decode2, onehots, predprob_, \
    encode_decode_net2, encode_decode2_net2, onehots_net2, predprob__net2 = sess.run( \
        [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1, \
         y_pred_image_net2, decoder_RNN_FC_image_net2, _1_hot_net2, predprob_net2], \
        feed_dict={X: mnist.test.images[:examples_to_show], TrainTest:0.0})
    #encode_decode, encode_decode2, onehots, predprob_ = sess.run( \
    #    [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1], \
    #    feed_dict={X: mnist.test.images[:examples_to_show]})
    print(np.argmax(onehots,2))
    print(np.max(predprob_,2))
    #print(cost2_)
    # Compare original images with their reconstructions
    f, a = plt.subplots(4, examples_to_show, figsize=(examples_to_show, 3))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
        a[3][i].imshow(np.reshape(encode_decode2_net2[i], (28, 28)))
    f.show()
    plt.draw()

In [None]:
if generate_from_onehots == False and _test_==False:
    training_epochs = 200
    examples_to_show = 11

    total_batch = int(mnist.train.num_examples/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _,c1,c2,c3,c4,c5 = sess.run(
                [optimizer,cost_image_net1,cost_256_net1,cost_image_net2,cost_256_net2,cost_1hot_match],
                feed_dict={X: batch_xs, y_classify: batch_ys, 
                           TrainTest:1.0, epoch_no:epoch, max_epoch: training_epochs})
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1),
                  #"cost=", "{:.9f}".format(c),
                  "cost1=", "{:.9f}".format(c1),
                  "cost2=", "{:.9f}".format(c2),
                  "cost3=", "{:.9f}".format(c3),
                  "cost4=", "{:.9f}".format(c4),
                  "cost5=", "{:.9f}".format(c5),
                  )
        if (epoch+1) % (display_step*10) == 0:
            drawoutput()

    print("Optimization Finished!")

    # Applying encode and decode over test set
    encode_decode = sess.run(
        y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
    # Compare original images with their reconstructions
    f, a = plt.subplots(2, examples_to_show, figsize=(examples_to_show, 2))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
    f.show()
    plt.draw()
    #plt.waitforbuttonpress() 

In [None]:
saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
if generate_from_onehots == False and _test_==False:
    save_path = saver.save(sess, "./model_256_msarkar_lang.ckpt")
else:
    saver.restore(sess, "./model_256_msarkar_lang.ckpt")

In [None]:
if generate_from_onehots == True:
    # TESTING CUSTOM
    # Applying encode and decode over test set

    onehots = np.zeros((1,3,20))
    pos = np.random.randint(20);
    prob = np.random.randint(2);
    onehots[0][0][(1-prob)*pos + prob*np.random.randint(20)] = 1;
    prob = np.random.randint(2);
    onehots[0][1][(1-prob)*pos + prob*np.random.randint(20)] = 1;
    prob = np.random.randint(2);
    onehots[0][2][(1-prob)*pos + prob*np.random.randint(20)] = 1;
    
    # Manually set the sentence 
    # edit this to generate new sentence .. the values are in range(one_hot_size) 
    # sentence length is not bounded
    sentence = [1];   
    onehots = np.zeros((1,len(sentence),20))
    for i in range(len(sentence)):
        onehots[0][i][sentence[i]] = 1;
    
    print(np.argmax(onehots,2))
    encode_decode = sess.run(
        decoder_op, feed_dict={onehot_test: onehots})
    #print(np.argmax(onehots,2))
    # Compare original images with their reconstructions
    f, a = plt.subplots(1, onehots.shape[0]+1, figsize=(onehots.shape[0]*3, 2))
    for i in range(onehots.shape[0]):
        #a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[i].imshow(np.reshape(encode_decode[i], (28, 28)))
    f.show()
    plt.draw()

In [None]:
if generate_from_onehots == False:
    # Result
    # Applying encode and decode over test set
    #encode_decode, onehots, encode_decode_onehotonly, cost2_ = sess.run(
    #    [y_pred, encoder_op, decoder_op_one_hot_only, cost2], feed_dict={X: mnist.test.images[:examples_to_show]})
    
    encode_decode, encode_decode2, onehots, predprob_, \
    encode_decode_net2, encode_decode2_net2, onehots_net2, predprob__net2 = sess.run( \
        [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1, \
         y_pred_image_net2, decoder_RNN_FC_image_net2, _1_hot_net2, predprob_net2], \
        feed_dict={X: mnist.test.images[:examples_to_show], TrainTest:0.0})
    #encode_decode, encode_decode2, onehots, predprob_ = sess.run( \
    #    [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1], \
    #    feed_dict={X: mnist.test.images[:examples_to_show]})
    print(np.argmax(onehots,2))
    print(np.max(predprob_,2))
    #print(cost2_)
    # Compare original images with their reconstructions
    f, a = plt.subplots(4, examples_to_show, figsize=(examples_to_show, 3))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
        a[3][i].imshow(np.reshape(encode_decode2_net2[i], (28, 28)))
    f.show()
    plt.draw()

In [None]:

#cost = costdisjoint + 0.1*cost5  after 20 iterations

"""
Epoch: 0001 cost1= 0.060610365 cost2= 0.001456601 cost3= 0.063119449 cost4= 0.000905316 cost5= 0.048858173
Epoch: 0002 cost1= 0.038265381 cost2= 0.003614630 cost3= 0.044919856 cost4= 0.003158854 cost5= 0.020252405
Epoch: 0003 cost1= 0.024723999 cost2= 0.003737362 cost3= 0.026506023 cost4= 0.003780622 cost5= 0.026562501
Epoch: 0004 cost1= 0.018576309 cost2= 0.002980163 cost3= 0.019376988 cost4= 0.003027871 cost5= 0.021814905
Epoch: 0005 cost1= 0.015814539 cost2= 0.002824710 cost3= 0.016395586 cost4= 0.002846907 cost5= 0.019350965
Epoch: 0006 cost1= 0.012713787 cost2= 0.002390323 cost3= 0.013109592 cost4= 0.002493173 cost5= 0.019350963
Epoch: 0007 cost1= 0.011025983 cost2= 0.002131242 cost3= 0.011436282 cost4= 0.002160230 cost5= 0.020372596
Epoch: 0008 cost1= 0.010698613 cost2= 0.001968088 cost3= 0.011302998 cost4= 0.001989576 cost5= 0.027043272
Epoch: 0009 cost1= 0.009618663 cost2= 0.001801275 cost3= 0.010104033 cost4= 0.001836486 cost5= 0.023918271
Epoch: 0010 cost1= 0.008725640 cost2= 0.001585961 cost3= 0.009267062 cost4= 0.001644548 cost5= 0.023557693
Epoch: 0011 cost1= 0.008295461 cost2= 0.001504187 cost3= 0.008877432 cost4= 0.001551532 cost5= 0.024519233
Epoch: 0012 cost1= 0.007812739 cost2= 0.001404112 cost3= 0.008684461 cost4= 0.001432679 cost5= 0.024519231
Epoch: 0013 cost1= 0.007785469 cost2= 0.001362351 cost3= 0.008305097 cost4= 0.001438453 cost5= 0.026081732
Epoch: 0014 cost1= 0.007566055 cost2= 0.001269917 cost3= 0.008127766 cost4= 0.001308516 cost5= 0.026622597
Epoch: 0015 cost1= 0.006859084 cost2= 0.001158703 cost3= 0.007357582 cost4= 0.001225068 cost5= 0.026382212
Epoch: 0016 cost1= 0.006784092 cost2= 0.001126843 cost3= 0.007232355 cost4= 0.001184428 cost5= 0.025600962
Epoch: 0017 cost1= 0.006202817 cost2= 0.000993841 cost3= 0.006698935 cost4= 0.001042053 cost5= 0.027403846
Epoch: 0018 cost1= 0.006432699 cost2= 0.000996745 cost3= 0.007024150 cost4= 0.001016795 cost5= 0.027824519
Epoch: 0019 cost1= 0.005964880 cost2= 0.000916859 cost3= 0.006544244 cost4= 0.000965654 cost5= 0.028185096
Epoch: 0020 cost1= 0.005557414 cost2= 0.000886401 cost3= 0.005953393 cost4= 0.000921355 cost5= 0.027103368
Optimization Finished!
"""

if generate_from_onehots == False:
    # Result
    # Applying encode and decode over test set
    #encode_decode, onehots, encode_decode_onehotonly, cost2_ = sess.run(
    #    [y_pred, encoder_op, decoder_op_one_hot_only, cost2], feed_dict={X: mnist.test.images[:examples_to_show]})
    
    encode_decode, encode_decode2, onehots, predprob_, \
    encode_decode_net2, encode_decode2_net2, onehots_net2, predprob__net2 = sess.run( \
        [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1, \
         y_pred_image_net2, decoder_RNN_FC_image_net2, _1_hot_net2, predprob_net2], \
        feed_dict={X: mnist.test.images[:examples_to_show], TrainTest:0.0})
    #encode_decode, encode_decode2, onehots, predprob_ = sess.run( \
    #    [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1], \
    #    feed_dict={X: mnist.test.images[:examples_to_show]})
    print(np.argmax(onehots,2))
    print(np.max(predprob_,2))
    #print(cost2_)
    # Compare original images with their reconstructions
    f, a = plt.subplots(4, examples_to_show, figsize=(examples_to_show, 3))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
        a[3][i].imshow(np.reshape(encode_decode2_net2[i], (28, 28)))
    f.show()
    plt.draw()

In [None]:
if generate_from_onehots == False:
    # display plots in this notebook
    # Applying encode and decode over test set
    print('****Looking at some TRAINING samples as well : useful to check overfitting')
    #ind = range(examples_to_show);
    #print(ind)
    #np.random.shuffle(ind);
    #print(ind)
    images__ = mnist.train.images[:examples_to_show];
    encode_decode, encode_decode2 = sess.run(
        [y_pred, decoder_RNN_FC], feed_dict={X: images__, TrainTest:0.0})
    #print(cost2_)
    #print(sess.run(cost, feed_dict={X: mnist.train.images[:examples_to_show]}))
    # Compare original images with their reconstructions
    f, a = plt.subplots(3, examples_to_show, figsize=(examples_to_show, 2))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(images__[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
    f.show()
    plt.draw()
    #plt.waitforbuttonpress()

In [None]:




#cost = costdisjoint + 10*cost5   after 30 iterations

"""
Epoch: 0001 cost1= 0.062787324 cost2= 0.001301013 cost3= 0.062241048 cost4= 0.001284453 cost5= 0.074579328
Epoch: 0002 cost1= 0.041112404 cost2= 0.003722330 cost3= 0.035201248 cost4= 0.004214905 cost5= 0.073677883
Epoch: 0003 cost1= 0.025945349 cost2= 0.003724482 cost3= 0.023045212 cost4= 0.003668737 cost5= 0.074038468
Epoch: 0004 cost1= 0.018444441 cost2= 0.003338973 cost3= 0.017491074 cost4= 0.003249121 cost5= 0.073798075
Epoch: 0005 cost1= 0.014570432 cost2= 0.002813716 cost3= 0.014376300 cost4= 0.002773168 cost5= 0.074579328
Epoch: 0006 cost1= 0.013128826 cost2= 0.002536717 cost3= 0.013258265 cost4= 0.002525580 cost5= 0.074278846
Epoch: 0007 cost1= 0.011637468 cost2= 0.002239972 cost3= 0.011527496 cost4= 0.002250951 cost5= 0.073437497
Epoch: 0008 cost1= 0.010718588 cost2= 0.002066593 cost3= 0.010292327 cost4= 0.002039759 cost5= 0.073858172
Epoch: 0009 cost1= 0.009763879 cost2= 0.001874423 cost3= 0.009422855 cost4= 0.001834712 cost5= 0.074278846
Epoch: 0010 cost1= 0.008507368 cost2= 0.001708979 cost3= 0.008298178 cost4= 0.001659426 cost5= 0.074399039
Epoch: 0011 cost1= 0.008081965 cost2= 0.001573510 cost3= 0.007576883 cost4= 0.001523594 cost5= 0.074459136
Epoch: 0012 cost1= 0.007761152 cost2= 0.001510136 cost3= 0.007446318 cost4= 0.001423335 cost5= 0.074038461
Epoch: 0013 cost1= 0.007857277 cost2= 0.001412459 cost3= 0.007283356 cost4= 0.001346507 cost5= 0.074158661
Epoch: 0014 cost1= 0.007329797 cost2= 0.001313280 cost3= 0.006817589 cost4= 0.001240621 cost5= 0.073918283
Epoch: 0015 cost1= 0.006638652 cost2= 0.001229314 cost3= 0.006049460 cost4= 0.001169476 cost5= 0.073677897
Epoch: 0016 cost1= 0.006283126 cost2= 0.001150892 cost3= 0.006064247 cost4= 0.001075986 cost5= 0.073918268
Epoch: 0017 cost1= 0.006149262 cost2= 0.001107690 cost3= 0.005871051 cost4= 0.001026551 cost5= 0.074038461
Epoch: 0018 cost1= 0.005928580 cost2= 0.001031580 cost3= 0.005745222 cost4= 0.000952911 cost5= 0.074278846
Epoch: 0019 cost1= 0.005887466 cost2= 0.000971276 cost3= 0.005562180 cost4= 0.000896822 cost5= 0.073918268
Epoch: 0020 cost1= 0.005591392 cost2= 0.000946584 cost3= 0.005346459 cost4= 0.000866824 cost5= 0.074098557
Epoch: 0021 cost1= 0.005831487 cost2= 0.000879509 cost3= 0.005578282 cost4= 0.000799240 cost5= 0.073918268
Epoch: 0022 cost1= 0.005558632 cost2= 0.000845184 cost3= 0.005242722 cost4= 0.000771731 cost5= 0.073557705
Epoch: 0023 cost1= 0.005799457 cost2= 0.000819442 cost3= 0.005414273 cost4= 0.000747545 cost5= 0.073137015
Epoch: 0024 cost1= 0.005268201 cost2= 0.000785169 cost3= 0.004954715 cost4= 0.000703301 cost5= 0.074519232
Epoch: 0025 cost1= 0.005229989 cost2= 0.000735353 cost3= 0.005001098 cost4= 0.000662482 cost5= 0.074098557
Epoch: 0026 cost1= 0.005055581 cost2= 0.000710857 cost3= 0.004737457 cost4= 0.000655101 cost5= 0.074579328
Epoch: 0027 cost1= 0.004539810 cost2= 0.000665494 cost3= 0.004221438 cost4= 0.000596062 cost5= 0.073798075
Epoch: 0028 cost1= 0.004902592 cost2= 0.000651156 cost3= 0.004573327 cost4= 0.000580378 cost5= 0.074158654
Epoch: 0029 cost1= 0.004658089 cost2= 0.000632667 cost3= 0.004485802 cost4= 0.000567020 cost5= 0.073437497
Epoch: 0030 cost1= 0.004598193 cost2= 0.000599556 cost3= 0.004344812 cost4= 0.000534149 cost5= 0.074399039
Optimization Finished!

"""



if generate_from_onehots == False:
    # Result
    # Applying encode and decode over test set
    #encode_decode, onehots, encode_decode_onehotonly, cost2_ = sess.run(
    #    [y_pred, encoder_op, decoder_op_one_hot_only, cost2], feed_dict={X: mnist.test.images[:examples_to_show]})
    
    encode_decode, encode_decode2, onehots, predprob_, \
    encode_decode_net2, encode_decode2_net2, onehots_net2, predprob__net2 = sess.run( \
        [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1, \
         y_pred_image_net2, decoder_RNN_FC_image_net2, _1_hot_net2, predprob_net2], \
        feed_dict={X: mnist.test.images[:examples_to_show], TrainTest:0.0})
    #encode_decode, encode_decode2, onehots, predprob_ = sess.run( \
    #    [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1], \
    #    feed_dict={X: mnist.test.images[:examples_to_show]})
    print(np.argmax(onehots,2))
    print(np.max(predprob_,2))
    #print(cost2_)
    # Compare original images with their reconstructions
    f, a = plt.subplots(4, examples_to_show, figsize=(examples_to_show, 3))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
        a[3][i].imshow(np.reshape(encode_decode2_net2[i], (28, 28)))
    f.show()
    plt.draw()

In [None]:


#TRAINIG SET




if generate_from_onehots == False:
    # Result
    # Applying encode and decode over test set
    #encode_decode, onehots, encode_decode_onehotonly, cost2_ = sess.run(
    #    [y_pred, encoder_op, decoder_op_one_hot_only, cost2], feed_dict={X: mnist.test.images[:examples_to_show]})
    
    encode_decode, encode_decode2, onehots, predprob_, \
    encode_decode_net2, encode_decode2_net2, onehots_net2, predprob__net2 = sess.run( \
        [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1, \
         y_pred_image_net2, decoder_RNN_FC_image_net2, _1_hot_net2, predprob_net2], \
        feed_dict={X: mnist.train.images[:examples_to_show], TrainTest:0.0})
    #encode_decode, encode_decode2, onehots, predprob_ = sess.run( \
    #    [y_pred_image_net1, decoder_RNN_FC_image_net1, _1_hot_net1, predprob_net1], \
    #    feed_dict={X: mnist.test.images[:examples_to_show]})
    print(np.argmax(onehots,2))
    print(np.max(predprob_,2))
    #print(cost2_)
    # Compare original images with their reconstructions
    f, a = plt.subplots(4, examples_to_show, figsize=(examples_to_show, 3))
    for i in range(examples_to_show):
        a[0][i].imshow(np.reshape(mnist.train.images[i], (28, 28)))
        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
        a[2][i].imshow(np.reshape(encode_decode2[i], (28, 28)))
        a[3][i].imshow(np.reshape(encode_decode2_net2[i], (28, 28)))
    f.show()
    plt.draw()