In [1]:
#
#   mnist_ae2.py   date. 7/4/2016
#   
#   Autoencoder tutorial code - trial of convolutional AE
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
from my_nn_lib import Convolution2D, MaxPooling2D
from my_nn_lib import FullConnected, ReadOutLayer
%matplotlib inline

# Up-sampling 2-D Layer (deconvolutoinal Layer)
class Conv2Dtranspose(object):
    '''
      constructor's args:
          input      : input image (2D matrix)
          output_siz : output image size
          in_ch      : number of incoming image channel
          out_ch     : number of outgoing image channel
          patch_siz  : filter(patch) size
    '''
    def __init__(self, input, output_siz, in_ch, out_ch, patch_siz, activation='relu'):
        self.input = input      
        self.rows = output_siz[0]
        self.cols = output_siz[1]
        self.out_ch = out_ch
        self.activation = activation
        
        wshape = [patch_siz[0], patch_siz[1], out_ch, in_ch]    # note the arguments order
        
        w_cvt = tf.Variable(tf.truncated_normal(wshape, stddev=0.1), 
                            trainable=True)
        b_cvt = tf.Variable(tf.constant(0.1, shape=[out_ch]), 
                            trainable=True)
        self.batsiz = tf.shape(input)[0]
        self.w = w_cvt
        self.b = b_cvt
        self.params = [self.w, self.b]
        
    def output(self):
        shape4D = [self.batsiz, self.rows, self.cols, self.out_ch]      
        linout = tf.nn.conv2d_transpose(self.input, self.w, output_shape=shape4D,
                            strides=[1, 2, 2, 1], padding='SAME') + self.b
        if self.activation == 'relu':
            self.output = tf.nn.relu(linout)
        elif self.activation == 'sigmoid':
            self.output = tf.sigmoid(linout)
        else:
            self.output = linout
        
        return self.output

# Create the model
def model(X, w_e, b_e, w_d, b_d):
    encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
    decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)
    
    return encoded, decoded

def encoding_CNN(x_inp, starting_channels=3):
    conv00 = Convolution2D(x_inp, (64, 64), starting_channels, 4, 
                          (5, 5), activation='relu')
    conv00_out = conv00.output()
    
    conv0 = Convolution2D(conv00_out, (64, 64), 4, 4, 
                          (3, 3), activation='relu')
    conv0_out = conv0.output()
    
    pool0 = MaxPooling2D(conv0_out)
    pool0_out = pool0.output()
    
    conv1 = Convolution2D(pool0_out, (32, 32), 4, 8, 
                          (3, 3), activation='relu')
    conv1_out = conv1.output()
    
    pool1 = MaxPooling2D(conv1_out)
    pool1_out = pool1.output()
    
    conv2 = Convolution2D(pool1_out, (16, 16), 8, 16, 
                          (3, 3), activation='relu')
    conv2_out = conv2.output()
    
    pool2 = MaxPooling2D(conv2_out)
    pool2_out = pool2.output()

    conv3 = Convolution2D(pool2_out, (8, 8), 16, 16, (3, 3), activation='relu')
    conv3_out = conv3.output()

    pool3 = MaxPooling2D(conv3_out)
    pool3_out = pool3.output()
    
    return pool3_out

def discrim_model(img_inp):
    dec_enc = encoding_CNN(img_inp, starting_channels=3)
    
    flatenned = tf.contrib.layers.flatten(dec_enc)
    flatenned = tf.layers.dense(flatenned, 32, activation=tf.nn.relu)
    d_pred = tf.layers.dense(flatenned, 1, activation=None)
    
    return d_pred

def mk_nn_model(x, sentp=None, y=None, real_imgp=None, bs=8):
    # Encoding phase
    x_image = x #tf.reshape(x, [-1, 64, 64, 1])
    if y==None:
        y=x
    
    enc_inp = encoding_CNN(x_image)
    
    # at this point the representation is (16, 4, 4) i.e. 128*2-dimensional
    
    
    flatenned = tf.contrib.layers.flatten(enc_inp)
    if sentp!=None:
        flatenned = tf.concat([flatenned, sentp],1)
    flatenned = tf.layers.dense(flatenned, 128*2, activation=tf.nn.relu)
    flatenned = tf.layers.dense(flatenned, 128*2, activation=tf.nn.relu)
    
    net = tf.reshape(flatenned, [-1, 4, 4, 16])
    
    # Decoding phase
    conv_t1 = Conv2Dtranspose(net, (8, 8), 16, 16,
                         (3, 3), activation='relu')
    conv_t1_out = conv_t1.output()

    conv_t2 = Conv2Dtranspose(conv_t1_out, (16, 16), 16, 8,
                         (3, 3), activation='relu')
    conv_t2_out = conv_t2.output()

    conv_t3 = Conv2Dtranspose(conv_t2_out, (32, 32), 8, 4, 
                         (3, 3), activation='relu')
    conv_t3_out = conv_t3.output()

    conv_t35 = Conv2Dtranspose(conv_t3_out, (64, 64), 4, 4, 
                         (3, 3), activation='relu')
    conv_t35_out = conv_t35.output()

    conv_last = Convolution2D(conv_t35_out, (64, 64), 4, 3, (5, 5),
                         activation=None)
    decoded_raw = conv_last.output()

    dec_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = decoded_raw, labels = y))
    
#     decoded = tf.reshape(decoded, [-1, 64*64])
#     dec_cross_entropy = -1. *y *tf.log(decoded + 1e-30) - (1. - y) *tf.log(1. - decoded + 1e-30)
#     dec_loss = tf.reduce_mean(dec_cross_entropy)
    
    decoded = tf.nn.sigmoid(decoded_raw)
    
    #discriminator
    
    d_fake_pred = discrim_model(decoded)
    d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_fake_pred, labels = tf.zeros_like(d_fake_pred)))
    
    d_real_pred = discrim_model(real_imgp)
    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_real_pred, labels = tf.ones_like(d_real_pred)))
#     sent_pred = tf.concat([color,shape,direc],axis=1)
    
#     sent_cross_entropy = -1. *sentp *tf.log(sent_pred + 1e-30) - (1. - sentp) *tf.log(1. - sent_pred + 1e-30)
#     sent_loss = tf.reduce_mean(sent_cross_entropy)

    d_loss = 
    
    loss = (4*dec_loss + sent_loss)/5
       
    return loss, decoded, dec_loss, sent_loss


In [2]:
bs=8
hs=64

x = tf.placeholder(tf.float32, [None, 64, 64, 3])
y = tf.placeholder(tf.float32, [None, 64, 64, 3])
real_imgp = tf.placeholder(tf.float32, [None, 64, 64, 3])
sentp = tf.placeholder(tf.float32, [None, 10])


loss, decoded, dec_loss, sent_loss = mk_nn_model(x, y=y, real_imgp=real_imgp, sentp=sentp, bs=bs)
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)


In [3]:
# len(mnist.train.images)

In [4]:
sentences = np.load('sentences_64.npy')
original_imgs = np.load('original_imgs_64.npy')
translated_imgs = np.load('translated_imgs_64.npy')

sentences_float = np.array(sentences, dtype=np.float32)
original_imgs_float = np.array(original_imgs, dtype=np.float32)
translated_imgs_float = np.array(translated_imgs, dtype=np.float32)

In [5]:


# onesar = np.ones((55000, hs, hs, 3))
# onesar[:,hs//2:,hs//2:,0:2] = 0

# onesar = onesar.reshape(55000, hs*hs)
# onesar = mnist.train.images

In [6]:
train_max = 95000

input_img = original_imgs_float[:train_max] #np.array([original_imgs_float[0] for i in range(55000)])
d_real_img = np.array(input_img)
np.random.shuffle(d_real_img)
output_img = translated_imgs_float[:train_max]
sent_inp = sentences_float[:train_max]

input_test = original_imgs_float[train_max:]
output_test = translated_imgs_float[train_max:]
sent_test = sentences_float[train_max:]


In [7]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3

sess =  tf.Session(config=config)
sess.run(tf.global_variables_initializer())
j=0

In [None]:

print('Training...')
for i in range(100000):
    if j+bs >= len(input_test):
        j=0
    batch_xs = input_img[j:j+bs]
    batch_ys = output_img[j:j+bs]
    batch_sent = sent_inp[j:j+bs]
    batch_real_img = d_real_img[j:j+bs]
    train_step.run({x: batch_xs, y: batch_ys, sentp: batch_sent, real_imgp = batch_real_img}, session=sess)
    if i % 500 == 0:
        train_loss= dec_loss.eval({x: batch_xs, y: batch_ys, sentp: batch_sent, real_imgp = batch_real_img}, session=sess)
        print('  step, loss = %7d: %6.5f' % (i, train_loss))
    j+=bs

# generate decoded image with test data
test_fd = {x: input_test[:bs], y: output_test[:bs], sentp: sent_test[:bs], real_imgp = d_real_img[:bs]}
decoded_imgs = decoded.eval(test_fd, session=sess)
print('loss (test) = ', dec_loss.eval(test_fd, session=sess))



Training...
  step, loss =       0: 0.09601
  step, loss =     500: 0.08532
  step, loss =    1000: 0.09882


In [None]:
test_loss = 0.0
nbatches = 0.0
decoded_imgs = []

j=0
while j+bs < len(input_test):
    test_fd = {x: input_test[j:j+bs], y: output_test[j:j+bs], sentp: sent_test[j:j+bs]}
    decoded_imgs.append(decoded.eval(test_fd, session=sess))
    test_loss += dec_loss.eval(test_fd, session=sess)
    
    nbatches+=1
    j+=bs
# generate decoded image with test data

test_loss /= nbatches
decoded_imgs = np.array(decoded_imgs)

print (test_loss)

In [None]:
decoded_imgs = decoded_imgs.reshape((-1,64, 64, 3))

In [None]:

x_test = input_test
y_test = output_test
n = bs  # how many digits we will display
# plt.figure(figsize=(20, 4))


for i in range(n):
#     ax = plt.subplot(2, n, i + 1)
    plt.imshow(np.array(x_test[i]*255, dtype=np.uint8))
    plt.show()
    plt.imshow(np.array(y_test[i]*255, dtype=np.uint8))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
    plt.show()
    # display reconstruction
#     ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(np.array(decoded_imgs[i]*255, dtype=np.uint8))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
    plt.show()

    
#plt.show()
plt.savefig('mnist_ae2.png')


In [None]:
total_parameters = 0
for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
    shape = variable.get_shape()
#     print(shape)
#     print(len(shape))
    variable_parameters = 1
    for dim in shape:
#         print(dim)
        variable_parameters *= dim.value
#     print(variable_parameters)
    total_parameters += variable_parameters
print(total_parameters)

In [19]:
saver = tf.train.Saver() 
saver.save(sess, 'train808RemakeSentMoretrain2/train808RemakeSentMoretrain2.chkp')

# Then you'll be able to access the model:

# sess = tf.Session()
# saver = tf.train.Saver()
# saver.restore(sess, 'filename.chkp')

'train808RemakeSentMoretrain/train808RemakeSentMoretrain.chkp'

In [14]:
# sess.close()