In [34]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt

import sys
from six.moves import urllib
import tarfile

def tfconvert(image):
    #return tf.divide(tf.subtract(image, 127.5), 255.0)
    return tf.subtract(tf.divide(image, 127.5), 1)

def tfrevert(image):
#     return tf.add(tf.multiply(image, 255.0), 127.5)
    return tf.clip_by_value(tf.multiply(tf.add(image, 1), 127.5), 0, 255)


In [35]:
class Cifar10data:

    IMAGE_SIZE = 24

    # Global constants describing the CIFAR-10 data set.
    NUM_CLASSES = 10
    NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
    NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

    DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
    datadir = '/tmp'

    def __init__(self, batch_size=10):
        self.batch_size = batch_size
        bindir = self.maybe_download_and_extract()
        self.dataset = self.distorted_inputs(bindir)
#         self.dataset = self.dataset.batch(batch_size)
        self.dataset = self.dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size=1000))
        self.dataset = self.dataset.prefetch(buffer_size=batch_size)
        self.iterator = self.dataset.make_one_shot_iterator()
        self.next_batch = self.iterator.get_next()

        self.test_dataset = self.distorted_test_inputs(bindir)
#         self.test_dataset = self.test_dataset.batch(batch_size)
        self.test_dataset = self.test_dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size=1000))
#         self.test_iterator = self.test_dataset.make_initializable_iterator()
        self.test_iterator = self.test_dataset.make_one_shot_iterator()
        self.test_next_batch = self.test_iterator.get_next()

    def next_train_batch(self, sess):
        train_images, train_labels = sess.run(self.next_batch)
        return train_images

    def next_test_batch(self, sess):
        test_images, test_labels = sess.run(self.test_next_batch)
        return test_images

    @staticmethod
    def maybe_download_and_extract(data_dir=datadir, DATA_URL=DATA_URL):
        """Download and extract the tarball from Alex's website."""
        dest_directory = data_dir
        if not os.path.exists(dest_directory):
            os.makedirs(dest_directory)
        filename = DATA_URL.split('/')[-1]
        filepath = os.path.join(dest_directory, filename)
        if not os.path.exists(filepath):
            def _progress(count, block_size, total_size):
                sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
                                                                 float(count * block_size) / float(total_size) * 100.0))
                sys.stdout.flush()
            filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
            print()
            statinfo = os.stat(filepath)
            print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
        extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
        if not os.path.exists(extracted_dir_path):
            tarfile.open(filepath, 'r:gz').extractall(dest_directory)
        return extracted_dir_path

    def get_train_inputs(self, data_dir=datadir):

        """Construct distorted input for CIFAR training using the Reader ops.
        Returns:
          images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
          labels: Labels. 1D tensor of [batch_size] size.
        Raises:
          ValueError: If no data_dir
        """
        if not data_dir:
            raise ValueError('Please supply a data_dir')
        data_dir = os.path.join(data_dir, 'cifar-10-batches-bin')
        dataset = self.distorted_inputs(data_dir=data_dir)
        return dataset

    def get_test_inputs(self, data_dir=datadir):
        """Construct distorted input for CIFAR test using the Reader ops.
        Returns:
          images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
          labels: Labels. 1D tensor of [batch_size] size.
        Raises:
          ValueError: If no data_dir
        """
        if not data_dir:
            raise ValueError('Please supply a data_dir')
        data_dir = os.path.join(data_dir, 'cifar-10-batches-bin')
        dataset = self.distorted_test_inputs(data_dir=data_dir)
        return dataset



    def distorted_inputs(self, data_dir):
        """Construct distorted input for CIFAR training using the Reader ops.
        Args:
          data_dir: Path to the CIFAR-10 data directory.
          batch_size: Number of images per batch.
        Returns:
          images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
          labels: Labels. 1D tensor of [batch_size] size.
        """
        filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                     for i in range(1, 6)]

        return self.__get_dataset(filenames, augmentation=True)

    def distorted_test_inputs(self, data_dir):
        """Construct distorted input for CIFAR training using the Reader ops.
        Args:
          data_dir: Path to the CIFAR-10 data directory.
          batch_size: Number of images per batch.
        Returns:
          images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
          labels: Labels. 1D tensor of [batch_size] size.
        """
        filenames = [os.path.join(data_dir, 'test_batch.bin')]

        return self.__get_dataset(filenames)


    def __get_dataset(self, filenames, augmentation=False):
        for f in filenames:
            if not tf.gfile.Exists(f):
                raise ValueError('Failed to find file: ' + f)

        # Dimensions of the images in the CIFAR-10 dataset.
        # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
        # input format.
        label_bytes = 1  # 2 for CIFAR-100
        height = 32
        width = 32
        depth = 3
        image_bytes = height * width * depth
        # Every record consists of a label followed by the image, with a
        # fixed number of bytes for each.
        record_bytes = label_bytes + image_bytes

        new_height = 28
        new_width = 28

        dataset = tf.data.FixedLengthRecordDataset(filenames, record_bytes=record_bytes)

        def transform(value):
            # Convert from a string to a vector of uint8 that is record_bytes long.
            record_bytes = tf.decode_raw(value, tf.uint8)

            # The first bytes represent the label, which we convert from uint8->int32.
            label = tf.strided_slice(record_bytes, [0], [label_bytes])
            label = tf.cast(label, tf.uint8)
            label = tf.reshape(label, shape=[])
            label = tf.one_hot(label, depth=10)

            # label = tf.one_hot(label, depth=NUM_CLASSES)

            # The remaining bytes after the label represent the image, which we reshape
            # from [depth * height * width] to [depth, height, width].
            image = tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes])


            # Convert from [depth, height, width] to [height, width, depth].
            image = tf.reshape(image, [depth, height, width])
            image = tf.transpose(image, [1, 2, 0])

            ### resize image
            # image_file = tf.gfile.FastGFile(filenames[i], 'rb').read()
            # image = tf.image.decode_jpeg(image)
#             image = tf.image.rgb_to_grayscale(image)
#             image = tf.image.resize_images(image, [new_height, new_width])
            # image_data = tf.image.convert_image_dtype(image_data, dtype=tf.uint8)
            image = tf.cast(image, dtype=tf.float32)
            # image = tf.cast(image, dtype=tf.uint8)

            # if augmentation:
            #
            #     # Image processing for training the network. Note the many random
            #     # distortions applied to the image.
            #
            #     # Randomly crop a [height, width] section of the image.
            #     distorted_image = tf.random_crop(image, [height, width, 3])
            #
            #     # Randomly flip the image horizontally.
            #     distorted_image = tf.image.random_flip_left_right(distorted_image)
            #
            #     # Because these operations are not commutative, consider randomizing
            #     # the order their operation.
            #     # NOTE: since per_image_standardization zeros the mean and makes
            #     # the stddev unit, this likely has no effect see tensorflow#1458.
            #     distorted_image = tf.image.random_brightness(distorted_image,
            #                                                  max_delta=63)
            #     distorted_image = tf.image.random_contrast(distorted_image,
            #                                                lower=0.2, upper=1.8)
            #     image = distorted_image

#             image = tf.image.per_image_standardization(image)
            image = tfconvert(image)
            return image, label

#         dataset = dataset.map(map_func=transform, num_parallel_calls=8)
        dataset = dataset.apply(tf.contrib.data.map_and_batch(map_func=transform, batch_size=self.batch_size, num_parallel_batches=8))
                                
        return dataset

In [36]:
class InputData:
    def __init__(self):
        # Import MNIST data
        from tensorflow.examples.tutorials.mnist import input_data
        self.mnist = input_data.read_data_sets("/tmp/data/", one_hot=True, source_url='http://yann.lecun.com/exdb/mnist/')

        self.Input_shape = [-1, 28, 28, 1]
        self.X = tf.placeholder(tf.float32, [None, 28, 28, 1])

    def next_train_batch(self, n=32):
        next, _ = self.mnist.train.next_batch(n)
        return np.reshape(next, (-1, 28, 28, 1))

    def next_test_batch(self, n=32):
        next, _ = self.mnist.train.next_batch(n)
        return np.reshape(next, (-1, 28, 28, 1))

In [37]:
# """ Auto Encoder Example.
# Build a 2 layers auto-encoder with TensorFlow to compress images to a
# lower latent space and then reconstruct them.
# References:
#     Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
#     learning applied to document recognition." Proceedings of the IEEE,
#     86(11):2278-2324, November 1998.
# Links:
#     [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
# Author: Aymeric Damien
# Project: https://github.com/aymericdamien/TensorFlow-Examples/
# """

# print('start')


# TENSORBOARD_PATH = '/tmp/tensorboard/log'
# # Training Parameters
# learning_rate = 0.01
# num_steps = 1
# batch_size = 32

# display_step = 1000
# examples_to_show = 10

# # Network Parameters
# num_hidden_1 = 256 # 1st layer num features
# num_hidden_2 = 128 # 2nd layer num features (the latent dim)
# num_input = 784 # MNIST data input (img shape: 28*28)

# tf.reset_default_graph()
# # tf Graph input (only pictures)

# mode = tf.placeholder(tf.bool)

# weights = {
#     'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
#     'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
#     'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
#     'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
# }
# biases = {
#     'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
#     'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
#     'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
#     'decoder_b2': tf.Variable(tf.random_normal([num_input])),
# }

# def max_unpool_2x2(x, output_shape):
#     out = tf.concat([x, tf.zeros_like(x)], 3)
#     out = tf.concat([out, tf.zeros_like(out)], 2)
#     out_size = output_shape
#     return tf.reshape(out, out_size)

# def max_pool_2x2(x):
#     _, argmax = tf.nn.max_pool_with_argmax(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME')
#     pool = tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
#     return pool, argmax

# # Building the encoder
# def encoder(x):
#     # Encoder Hidden layer with sigmoid activation #1
#     layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
#                                    biases['encoder_b1']))
#     # Encoder Hidden layer with sigmoid activation #2
#     layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
#                                    biases['encoder_b2']))
#     return layer_2


# # Building the decoder
# def decoder(x):
#     # Decoder Hidden layer with sigmoid activation #1
#     layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
#                                    biases['decoder_b1']))
#     # Decoder Hidden layer with sigmoid activation #2
#     layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
#                                    biases['decoder_b2']))
#     return layer_2

# # Building the encoder
# def encoder_cnn(layer):
#     # Encoder Hidden layer with sigmoid activation #1
#     layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv1')
#     layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool')
#     layer = tf.layers.conv2d(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv2')
#     layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool')
#     layer = tf.layers.flatten(inputs=layer, name='flatten_c')
#     layer = tf.layers.dense(inputs=layer, activation=tf.nn.relu, units=10, name='fc_e1')

#     # layer = tf.layers.batch_normalization(inputs=x, training=mode)
#     return layer


# # Building the decoder
# def decoder_cnn(layer):
#     layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=49, name='fc_d1')
#     layer = tf.reshape(tensor=layer, shape=[-1, 7, 7, 1])
#     layer = tf.image.resize_nearest_neighbor(images=layer, size=[layer.shape[1] * 2, layer.shape[2] * 2])
#     layer = tf.layers.conv2d_transpose(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, name='conv_trans1')
#     layer = tf.image.resize_nearest_neighbor(images=layer, size=[layer.shape[1] * 2, layer.shape[2] * 2])
#     layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, name='conv_trans2')
#     layer = tf.layers.flatten(inputs=layer, name='flatten_d')
#     layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=784, name='fc_d2')
#     layer = tf.reshape(tensor=layer, shape=Input_shape)
#     return layer

# input = InputData()
# Input_shape = input.Input_shape

# # Construct model
# encoder_op = encoder_cnn(input.X)
# decoder_op = decoder_cnn(encoder_op)

# # Prediction
# y_pred = decoder_op
# # Targets (Labels) are the input data.
# y_true = input.X

# # Define loss and optimizer, minimize the squared error
# loss = tf.losses.mean_squared_error(y_pred, y_true)
# optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

# # Initialize the variables (i.e. assign their default value)
# init = tf.global_variables_initializer()

# # Start Training
# # Start a new TF session


# with tf.Session() as sess:

#     if tf.gfile.Exists(TENSORBOARD_PATH):
#         tf.gfile.DeleteRecursively(TENSORBOARD_PATH)
#     tf.gfile.MakeDirs(TENSORBOARD_PATH)

#     summary_writer = tf.summary.FileWriter(TENSORBOARD_PATH, sess.graph)

#     # Run the initializer
#     sess.run(init)

#     # Training
#     for i in range(1, num_steps+1):
#         # Prepare Data
#         # Get the next batch of MNIST data (only images are needed, not labels)
#         batch_x = input.next_train_batch(batch_size)

#         # Run optimization op (backprop) and cost op (to get loss value)
#         _, l = sess.run([optimizer, loss], feed_dict={input.X: batch_x})
#         # Display logs per step
#         if i % display_step == 0 or i == 1:
#             print('Step %i: Minibatch Loss: %f' % (i, l))

#         loss_summary = tf.Summary()
#         loss_summary.value.add(tag='loss', simple_value = l)
#         summary_writer.add_summary(loss_summary, global_step=i)

#     # Testing
#     # Encode and decode images from test set and visualize their reconstruction.
#     n = 4
#     input_cifar = Cifar10data(n)
#     # MNIST test set
#     batch_x = input.next_test_batch(n)
#     # Encode and decode the digit image
#     recontruct = sess.run(decoder_op, feed_dict={input.X: batch_x})
    
#     tf.summary.image('mnist_original', batch_x, collections=['image_mnist'])
#     tf.summary.image('mnist_reconstruct', recontruct, collections=['image_mnist'])          
#     merge = tf.summary.merge_all(key='image_mnist')
    
#     summary = sess.run(merge)
#     summary_writer.add_summary(summary)    

#     # cifar test set
#     cifar_x = input_cifar.next_train_batch(sess)
#     # Encode and decode the digit image
#     reconstruct_cifar = sess.run(decoder_op, feed_dict={input.X: cifar_x})    

#     tf.summary.image('cifar_original', cifar_x, collections=['image_cifar'])    
#     tf.summary.image('cifar_reconstruct', reconstruct_cifar, collections=['image_cifar'])       
#     merge_cifar = tf.summary.merge_all(key='image_cifar')
     
#     summary_cifar = sess.run(merge_cifar)
#     summary_writer.add_summary(summary_cifar)
    
#     summary_writer.flush()
# print('end')

# Training

In [38]:
import sys

#git clone
#https://github.com/tensorflow/compression

sys.path.append('/home/ubuntu/github/compression')


In [39]:
def forward_v1(layer):    
    #shape (-1, w, h, c)
    with tf.variable_scope("forward_v1", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv1')
        layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool1')
        layer = tf.layers.conv2d(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv2')
        layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool2')
        #shape (-1, w/4, h/4, 32)    
        layer = tf.layers.conv2d(inputs=layer, filters=3, kernel_size=[3, 3], strides=[1, 1], padding='valid', activation=None, kernel_regularizer=None, name='conv3')
        #shape (-1, w/4 - 2, h/4 -2, 3)
    #    layer = tf.layers.flatten(inputs=layer, name='flatten_c')
    #    layer = tf.layers.dense(inputs=layer, activation=tf.nn.relu, units=10, name='fc_e1')
    return layer

def backward_v1(layer):
#     layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=49, name='fc_d1')
#     layer = tf.reshape(tensor=layer, shape=[-1, 7, 7, 1])
    with tf.variable_scope("backward_v1", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1], padding='valid', activation=None, name='conv_trans3')
        layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, name='conv_trans1')
        layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=1, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, name='conv_trans2')
    #     layer = tf.layers.flatten(inputs=layer, name='flatten_d')
    #     layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=784, name='fc_d2')
        layer = tf.reshape(tensor=layer, shape=Input_shape)
    return layer


In [51]:
def forward_v2(layer):
    #shape (-1, w, h, c)
    with tf.variable_scope("forward_v2", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv1')
#         layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool1')
        layer = tf.contrib.layers.gdn(layer, name='gdn1')

        layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv2')
#         layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool2')
        layer = tf.contrib.layers.gdn(layer, name='gdn2')

#         layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv3')
        # layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool2')
#         layer = tf.contrib.layers.gdn(layer, name='gdn3')
        
        layer = tf.layers.conv2d(inputs=layer, filters=3, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.identity, kernel_regularizer=None, name='conv4')
        # [-1, 7, 7, 5]
        # [-1, 8, 8, 5]
    return layer

def backward_v2(layer):
    with tf.variable_scope("backward_v2", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.identity, kernel_regularizer=None, name='conv4_transpose')

#         layer = tf.contrib.layers.gdn(layer, inverse=True, name='igdn3')
        # layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
#         layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name='conv3_transpose')
        
        layer = tf.contrib.layers.gdn(layer, inverse=True, name='igdn2')
#         layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name='conv2_transpose')

#         layer = tf.layers.conv2d_transpose(inputs=layer, filters=20, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name='conv1_5_transpose')
        
        layer = tf.contrib.layers.gdn(layer, inverse=True, name='igdn1')
#         layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=3, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.identity, kernel_regularizer=None, name='conv1_transpose')
#         layer = tf.reshape(tensor=layer, shape=Input_shape)

    return layer


In [52]:
def forward_v3(layer):
    #shape (-1, w, h, c)
    with tf.variable_scope("forward_v3", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d(inputs=layer, filters=16, kernel_size=[5, 5], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv1')
        layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool1')
        layer = tf.contrib.layers.gdn(layer, name='gdn1')

        layer = tf.layers.conv2d(inputs=layer, filters=32, kernel_size=[5, 5], strides=[1, 1], padding='same', activation=tf.nn.relu, kernel_regularizer=None, name='conv2')
        layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name = 'max_pool2')
        layer = tf.contrib.layers.gdn(layer, name='gdn2')

        layer = tf.layers.conv2d(inputs=layer, filters=5, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=None, kernel_regularizer=None, name='conv3')
        # [-1, 7, 7, 5]
        # [-1, 8, 8, 5]
    return layer

def backward_v3(layer):
    with tf.variable_scope("backward_v3", reuse=tf.AUTO_REUSE):
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1], padding='same', activation=None, kernel_regularizer=None, name='conv3_transpose')

        layer = tf.contrib.layers.gdn(layer, inverse=True, name='igdn2')
        layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=16, kernel_size=[5, 5], strides=[1, 1], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name='conv2_transpose')

        layer = tf.contrib.layers.gdn(layer, inverse=True, name='igdn1')
        layer = tf.image.resize_nearest_neighbor(images=layer, size=[tf.shape(layer)[1] * 2, tf.shape(layer)[2] * 2])
        layer = tf.layers.conv2d_transpose(inputs=layer, filters=3, kernel_size=[5, 5], strides=[1, 1], padding='same', activation=None, kernel_regularizer=None, name='conv1_transpose')
#         layer = tf.reshape(tensor=layer, shape=Input_shape)

    return layer

In [53]:

import compression.python.layers.entropybottleneck
from compression.python.layers.entropybottleneck import EntropyBottleneck

tf.reset_default_graph()

global_step = tf.train.get_or_create_global_step()

def forward_transform(layer):    
    return forward_v2(layer)

def backward_transform(layer):
    return backward_v2(layer)

Input_shape = [-1, 32, 32, 3]

# Build autoencoder.
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
y = forward_transform(x)

# with tf.variable_scope("entropy_bottleneck", reuse=tf.AUTO_REUSE):
entropy_bottleneck = EntropyBottleneck()
y_, likelihoods = entropy_bottleneck(y, training=True)

# y_ = y   
x_ = backward_transform(y_)

# print(likelihoods)
# Information content (= predicted codelength) in bits of each batch element
# (note that taking the natural logarithm and dividing by `log(2)` is
# equivalent to taking base-2 logarithms):
bits = tf.reduce_sum(tf.log(likelihoods), axis=(1, 2, 3)) / -np.log(2)

# Squared difference of each batch element:
squared_error = tf.reduce_sum(tf.squared_difference(x, x_), axis=(1, 2, 3))

learning_rate_decay = tf.train.exponential_decay(1e-4, global_step=0, decay_steps=5000, decay_rate=0.95, staircase=True, name='exponential_decay_learning_rate')
learning_rate_summary_op = tf.summary.scalar('learning rate decay', learning_rate_decay, collections=['learning_rate'])

# The loss is a weighted sum of mean squared error and entropy (average
# information content), where the weight controls the trade-off between
# approximation error and entropy.
# main_loss = 0.5 * tf.reduce_mean(squared_error) + tf.reduce_mean(bits)
main_loss = tf.reduce_mean(squared_error) + 0.1 * tf.reduce_mean(bits)
# main_loss = tf.reduce_mean(squared_error)
# main_loss = tf.losses.mean_squared_error(x, x_)

# Training operations
# decay_learning_rate = tf.train.exponential_decay(learning_rate=1e-4, 
#                                            global_step=global_step, 
#                                            decay_steps=100), 
#                                            decay_rate=0.95, 
#                                            staircase=True)
    
# Minimize loss and auxiliary loss, and execute update op.
main_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_decay)
main_step = main_optimizer.minimize(main_loss, global_step=global_step)
# 1e-3 is a good starting point for the learning rate of the auxiliary loss,
# assuming Adam is used.
aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0], global_step=global_step)
group_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

INFO:tensorflow:Summary name learning rate decay is illegal; using learning_rate_decay instead.


In [59]:
from time import gmtime, strftime
SAVE_PATH = '/tempssd/mnist_autoencoder/save/'

timestring = strftime("%Y_%b_%d_%H_%M_%S", gmtime())
default_dir = os.path.join(SAVE_PATH, timestring)
summmary_path = os.path.join(default_dir, 'summary')
if not (os.path.isdir(summmary_path)):
    os.makedirs(summmary_path)

model_path = os.path.join(default_dir, 'model')
if not (os.path.isdir(model_path)):
    os.makedirs(model_path)

best_test_accuracy_dir = os.path.join(model_path, "best_test_accuracy")
if not (os.path.isdir(best_test_accuracy_dir)):
    os.makedirs(best_test_accuracy_dir)
    
best_test_accuracy_model_name = os.path.join(best_test_accuracy_dir, "step")


# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start Training
# Start a new TF session

num_steps = 60000
TENSORBOARD_PATH = '/tmp/tensorboard/log'


batch_size = 32
display_step = 1000

input = InputData()
input_cifar = Cifar10data(batch_size)


with tf.Session() as sess:

    summary_writer = tf.summary.FileWriter(summmary_path, sess.graph)
        

    # Run the initializer
    sess.run(init)
    
    best_test_accuracy = 10000

    # Training
    for i in range(1, num_steps+1):
        # Prepare Data
        # Get the next batch of MNIST data (only images are needed, not labels)
        batch_x = input_cifar.next_train_batch(sess)

        # Run optimization op (backprop) and cost op (to get loss value)
        _, ml, bnl, learning_rate_summary = sess.run([group_op, main_loss, entropy_bottleneck.losses[0], learning_rate_summary_op], feed_dict={x: batch_x})
#         _, ml = sess.run([main_step, main_loss], feed_dict={x: batch_x})
        # Display logs per step
        if i % display_step == 0 or i == 1:
            print('Step %i: Minibatch main loss: %f, bottleneck loss: %f' % (i, ml, bnl))
#             print('Step %i: Minibatch main loss: %f' % (i, ml))

        step = global_step.eval()

        summary_writer.add_summary(learning_rate_summary, global_step=step)
    
        main_loss_summary = tf.Summary()
        main_loss_summary.value.add(tag='main loss', simple_value = ml)
        summary_writer.add_summary(main_loss_summary, global_step=step)
        
        bottleneck_loss_summary = tf.Summary()
        bottleneck_loss_summary.value.add(tag='bottleneck loss', simple_value = bnl)
        summary_writer.add_summary(bottleneck_loss_summary, global_step=step)
        
        if best_test_accuracy > ml:
            best_test_accuracy = ml
            best_test_step = step
            tf.train.Saver().save(sess, save_path=best_test_accuracy_model_name, global_step=step)
    
    # MNIST test set
    
    # Encode and decode the digit image
    #x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    #y = forward_transform(x)

    strings = entropy_bottleneck.compress(y)
# #     tf.cast(strings, tf.string)
    
    
    shape = tf.shape(y)[1:]
    print("shape {}".format(shape))
    
# #     tf.cast(shape, tf.int32)    

# #     with tf.variable_scope("entropy_bottleneck", reuse=tf.AUTO_REUSE):
# #         entropy_bottleneck_evak = EntropyBottleneck()(y, training=False)
# #         y_, likelihoods = entropy_bottleneck(y, training=True)

#     ry_ = entropy_bottleneck.decompress(strings, shape, channels=3)   
#     rx_ = backward_transform(ry_)
    
    ry_, likelihoods = entropy_bottleneck(y, training=False)
#     ry_ = y_
    rx_ = backward_transform(ry_)
#     rx_ = x_

    
#     n = 3
# #     input_cifar = Cifar10data(n)
#     batch_x = input.next_test_batch(n)
    
#     recontruct= sess.run(rx_, feed_dict={x: batch_x})

#     tf.summary.image('mnist_original', batch_x, collections=['image_mnist'])
#     tf.summary.image('mnist_reconstruct', recontruct, collections=['image_mnist'])          
#     merge = tf.summary.merge_all(key='image_mnist')

#     summary = sess.run(merge)
#     summary_writer.add_summary(summary) 

    # cifar test set
    cifar_x = input_cifar.next_test_batch(sess)
    # Encode and decode the digit image
    reconstruct_cifar, string_value = sess.run([rx_, strings], feed_dict={x: cifar_x})    
    
    string_value = [len(s) for s in string_value]
    print("string length: {} * {}".format(string_value, len(string_value)))
    
    tf.summary.image('cifar_original', tfrevert(cifar_x), collections=['image_cifar'])    
    tf.summary.image('cifar_reconstruct', tfrevert(reconstruct_cifar), collections=['image_cifar'])       
    merge_cifar = tf.summary.merge_all(key='image_cifar')

    summary_cifar = sess.run(merge_cifar)
    summary_writer.add_summary(summary_cifar)
    

    summary_writer.flush()    
        
    if not (os.path.isdir(os.path.dirname(TENSORBOARD_PATH))):
        os.makedirs(os.path.dirname(TENSORBOARD_PATH))
    
    if os.path.exists(TENSORBOARD_PATH):
        os.unlink(TENSORBOARD_PATH)  
    
    os.symlink(summmary_path, TENSORBOARD_PATH)

print("model: {} \nsummary: {}".format(best_test_accuracy_dir, summmary_path))

print('end')

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Step 1: Minibatch main loss: 1389.603516, bottleneck loss: 123.001389
Step 1000: Minibatch main loss: 166.540878, bottleneck loss: 117.652985
Step 2000: Minibatch main loss: 165.353180, bottleneck loss: 111.154053
Step 3000: Minibatch main loss: 150.643997, bottleneck loss: 104.424881
Step 4000: Minibatch main loss: 162.383820, bottleneck loss: 98.555153
Step 5000: Minibatch main loss: 164.846512, bottleneck loss: 93.745583
Step 6000: Minibatch main loss: 156.305237, bottleneck loss: 89.812317
Step 7000: Minibatch main loss: 144.610489, bottleneck loss: 85.625969
Step 8000: Minibatch main loss: 145.794388, bottleneck loss: 81.347725
Step 9000: Minibatch main loss: 151.148438, bottleneck loss: 76.860649
Step 10000: Minibatch main loss: 158.819809, bottleneck loss: 72.130005
Step 11000: Minibatch main