In [1]:
#!wget --directory-prefix=downloads http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip
#!mkdir -p dataset/annotations
#!unzip downloads/stuffthingmaps_trainval2017.zip -d dataset/annotations/

In [2]:
import tensorflow as tf
import numpy as np
import logging
import time
import os

logging.basicConfig(level=logging.DEBUG)

In [3]:
import zipfile
import sys
from six.moves import urllib

URL = 'http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip'
src_path = '/tmp/dataset/annotations'


def list_files(src_path):
    name = []
    for filename in os.listdir(src_path):
        path = os.path.join(src_path, filename)
        name.append(path)
    print(name)
    return name


def tfconvert(image):
    #return tf.divide(tf.subtract(image, 127.5), 255.0)
    return tf.subtract(tf.divide(image, 127.5), 1)


def tfrevert(image):
    #     return tf.add(tf.multiply(image, 255.0), 127.5)
    return tf.clip_by_value(tf.multiply(tf.add(image, 1), 127.5), 0, 255)


class CocoAnnotationData:
    def __init__(self, src_path):
        self.src_path = src_path
        self.maybe_download_and_extract(src_path)
        pass

    @staticmethod
    def maybe_download_and_extract(data_dir=src_path, DATA_URL=URL):
        """Download and extract the tarball from Alex's website."""
        dest_directory = data_dir
        if not os.path.exists(dest_directory):
            os.makedirs(dest_directory)
        filename = DATA_URL.split('/')[-1]
        filepath = os.path.join(dest_directory, filename)
        if not os.path.exists(filepath):
            def _progress(count, block_size, total_size):
                sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
                                                                 float(count * block_size) / float(total_size) * 100.0))
                sys.stdout.flush()
            filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
            print()
            statinfo = os.stat(filepath)
            print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
        extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
        if not os.path.exists(extracted_dir_path):
            zipfile.ZipFile(filepath).extractall(dest_directory)
        return extracted_dir_path

    def __call__(self, sub_set_path, batch_size=32, buffle_size=1000, shape=[-1, 256, 256, 3]):
        self.shape = shape
        src_path = os.path.join(self.src_path, sub_set_path)
        filenames = list_files(src_path)
        dataset = self._get_dataset(filenames, batch_size, shape)
        # dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size=buffle_size))
        dataset = dataset.prefetch(buffer_size=batch_size)
        return dataset

    def _get_dataset(self, filenames, batch_size, shape, augmentation=False):
        for f in filenames:
            if not tf.gfile.Exists(f):
                raise ValueError('Failed to find file: ' + f)

        # input format.
        height = shape[1]
        width = shape[2]
        depth = shape[3]

        dataset = tf.data.Dataset.from_tensor_slices(filenames)

        def transform(filename):
            image_string = tf.read_file(filename)
            image = tf.image.decode_png(image_string, channels=3)
            #image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
            image = tf.cast(image, dtype=tf.float32)

            image = tf.image.resize_images(image, [height, width], method=tf.image.ResizeMethod.BILINEAR, align_corners=True)
            image = tf.reshape(image, [height, width, depth])
            # if augmentation:
            #
            #     # Image processing for training the network. Note the many random
            #     # distortions applied to the image.
            #
            #     # Randomly crop a [height, width] section of the image.
            #     distorted_image = tf.random_crop(image, [height, width, 3])
            #
            #     # Randomly flip the image horizontally.
            #     distorted_image = tf.image.random_flip_left_right(distorted_image)
            #
            #     # Because these operations are not commutative, consider randomizing
            #     # the order their operation.
            #     # NOTE: since per_image_standardization zeros the mean and makes
            #     # the stddev unit, this likely has no effect see tensorflow#1458.
            #     distorted_image = tf.image.random_brightness(distorted_image,
            #                                                  max_delta=63)
            #     distorted_image = tf.image.random_contrast(distorted_image,
            #                                                lower=0.2, upper=1.8)
            #     image = distorted_image

            #             image = tf.image.per_image_standardization(image)
            image = tfconvert(image)
            return image

        #         dataset = dataset.map(map_func=transform, num_parallel_calls=8)
        dataset = dataset.apply(tf.contrib.data.map_and_batch(map_func=transform, batch_size=batch_size, num_parallel_batches=8))

        return dataset

In [4]:
class DataGenerator:
    def __init__(self, dir, batch_size, shape):
        self.coco = CocoAnnotationData(dir)
        self.batch_size = batch_size        
        self.shape = shape

    def __call__(self, sub_set_path):
        dataset = self.coco(sub_set_path, self.batch_size, self.shape)
        iterator = dataset.make_one_shot_iterator()
        next_batch = iterator.get_next()        
        return next_batch


In [5]:
class Net:
    def __init__(self, shape):
        self.name = 'net'
        self.layer_index = 0
        self.x_shape = shape

    @property
    def next_layer_name(self):
        self.layer_index += 1
        return "{}_layer_{}".format(self.name, self.layer_index)

    @property
    def vars(self):
        return [var for var in tf.global_variables() if self.name in var.name]


class Discriminator(Net):
    def __init__(self, shape):
        super().__init__(shape)
        self.name = 'd_net'

    def __call__(self, x):
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) as vs:
            layer = tf.reshape(tensor=x, shape=self.x_shape)
            layer = tf.layers.conv2d(inputs=layer, filters=64, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name=self.next_layer_name)
#             layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name=self.next_layer_name)
            layer = tf.layers.conv2d(inputs=layer, filters=32, kernel_size=[5, 5], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name=self.next_layer_name)
#             layer = tf.layers.max_pooling2d(inputs=layer, pool_size=[2, 2], strides=[2, 2], padding='same', name=self.next_layer_name)
            layer = tf.layers.conv2d(inputs=layer, filters=16, kernel_size=[3, 3], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name=self.next_layer_name)
            layer = tf.layers.conv2d(inputs=layer, filters=8, kernel_size=[3, 3], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name=self.next_layer_name)
            layer = tf.layers.conv2d(inputs=layer, filters=1, kernel_size=[3, 3], strides=[2, 2], padding='same', activation=tf.nn.sigmoid, kernel_regularizer=None, name=self.next_layer_name)
#             layer = tf.layers.flatten(inputs=layer, name=self.next_layer_name)
#             layer = tf.layers.dense(inputs=layer, activation=tf.nn.relu, units=10, name=self.next_layer_name)
        return layer


class Generator(Net):
    def __init__(self, shape, z_shape):
        super().__init__(shape)
        self.z_shape = z_shape        
        self.name = 'g_net'

    def __call__(self, z, batch_size):
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) as vs:
            #z_shape=[batch_size, self.z_shape[1]]
            #x_shape=[batch_size, self.x_shape[1], self.x_shape[2], self.x_shape[3]]
            layer = tf.reshape(tensor=z, shape=self.z_shape)
            layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=32 * 32 * 3, name=self.next_layer_name)
            layer = tf.reshape(tensor=layer, shape=[-1, 32, 32, 3])
            layer = tf.image.resize_nearest_neighbor(images=layer, size=[layer.shape[1] * 2, layer.shape[2] * 2])
            layer = tf.layers.conv2d_transpose(inputs=layer, filters=32, kernel_size=[3, 3], strides=[1, 1],
                                               padding='same', activation=tf.nn.sigmoid, name=self.next_layer_name)
            layer = tf.image.resize_nearest_neighbor(images=layer, size=[layer.shape[1] * 2, layer.shape[2] * 2])
            layer = tf.layers.conv2d_transpose(inputs=layer, filters=64, kernel_size=[3, 3], strides=[1, 1],
                                               padding='same', activation=tf.nn.sigmoid, name=self.next_layer_name)
            layer = tf.image.resize_nearest_neighbor(images=layer, size=[layer.shape[1] * 2, layer.shape[2] * 2])
            layer = tf.layers.conv2d_transpose(inputs=layer, filters=3, kernel_size=[3, 3], strides=[1, 1],
                                               padding='same', activation=tf.nn.sigmoid, name=self.next_layer_name)
            #             layer = tf.layers.flatten(inputs=layer, name='flatten_d')
            #             layer = tf.layers.dense(inputs=layer, activation=tf.nn.sigmoid, units=784, name='fc_d2')
            #layer = tf.reshape(tensor=layer, shape=self.x_shape)
            layer = tf.reshape(tensor=layer, shape=self.x_shape)
        return layer

In [6]:
tf.reset_default_graph()

X_SHAPE = [-1, 256, 256, 3]
Z_SHAPE = [-1, 100]
BATCH_SIZE = 16

netD = Discriminator(X_SHAPE)
netG = Generator(X_SHAPE, Z_SHAPE)
global_step = tf.train.get_or_create_global_step()

z = tf.placeholder(tf.float32, [None, Z_SHAPE[1]], name='z')

real_images = tf.placeholder(tf.float32, [None, X_SHAPE[1], X_SHAPE[2], X_SHAPE[3]], name='x')

# generated images
gen_images = netG(z, BATCH_SIZE)

# get the output from D on the real and fake data
errD_real = netD(real_images)
errD_fake = netD(gen_images)

# cost functions
errD = tf.reduce_mean(errD_real) - tf.reduce_mean(errD_fake)
errG = -tf.reduce_mean(errD_fake)

# gradient penalty
epsilon = tf.random_uniform([], 0.0, 1.0)
#sampling x_hat
x_hat = real_images*epsilon + (1-epsilon) * gen_images
d_hat = netD(x_hat)
gradients = tf.gradients(d_hat, x_hat)[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
lambda_weight = 10 # suggested in paper
gradient_penalty = lambda_weight * tf.reduce_mean((slopes - 1.0) ** 2)
errD += gradient_penalty

# tensorboard summaries
# tf.summary.scalar('d_loss', errD, collections=['train_loss'])
# tf.summary.scalar('g_loss', errG, collections=['train_loss'])
# merged_summary_op = tf.summary.merge_all(key='train_loss')

with tf.variable_scope('Optimizer', reuse=tf.AUTO_REUSE):
    # optimize G
    G_train_op = tf.train.AdamOptimizer(learning_rate=1e-5).minimize(errG, var_list=netG.vars, global_step=global_step)

    # optimize D
    D_train_op = tf.train.AdamOptimizer(learning_rate=1e-5).minimize(errD, var_list=netD.vars)

In [7]:

TENSORBOARD_PATH = '/tmp/wgan/summary'
restore_dir = '/tempssd/wgan/restore/'
max_steps = 200
test_frequency = 10

n_critic_per_step = 20
best_test_G_loss = sys.maxsize

datagenerator = DataGenerator(src_path, BATCH_SIZE, X_SHAPE)

next_batch = datagenerator('train2017')
next_test_batch = datagenerator('val2017')

from time import gmtime, strftime
SAVE_PATH = '/tempssd/wgan/save/'

timestring = strftime("%Y_%b_%d_%H_%M_%S", gmtime())
default_dir = os.path.join(SAVE_PATH, timestring)
summmary_path = os.path.join(default_dir, 'summary')
if not (os.path.isdir(summmary_path)):
    os.makedirs(summmary_path)

model_path = os.path.join(default_dir, 'model')
if not (os.path.isdir(model_path)):
    os.makedirs(model_path)
    
best_model_name = os.path.join(model_path, "step")

def add_value_summary(name, value, step, summary_writer):
    summary_time_epoch = tf.Summary()
    summary_time_epoch.value.add(tag=name, simple_value=value)
    summary_writer.add_summary(summary_time_epoch, global_step=step)


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    summary_writer = tf.summary.FileWriter(summmary_path, sess.graph)

    # restore path not empty
    if os.path.exists(os.path.dirname(restore_dir)):
        # gcloud_load()
        tf.train.Saver().restore(sess, restore_dir)

    for _ in range(max_steps):

        start = time.time()

        train_images = [] # to keep last training image to evaluate D loss later        
        # train the discriminator for n_critic_per_step runs
        for critic_itr in range(n_critic_per_step):
            train_images = sess.run(next_batch)
            batch_z = np.random.normal(-1.0, 1.0, size=[BATCH_SIZE, 100]).astype(np.float32)
            sess.run(D_train_op, feed_dict={real_images: train_images, z: batch_z})

        # now train the generator once! use normal distribution, not uniform!!
        train_images = sess.run(next_batch)
        batch_z = np.random.normal(-1.0, 1.0, size=[BATCH_SIZE, 100]).astype(np.float32)
        
        _, step = sess.run([G_train_op, global_step], feed_dict={z: batch_z})

        G_loss, D_loss = sess.run([errD, errG], feed_dict={real_images: train_images, z: batch_z})

        time_epoch = time.time() - start

        add_value_summary('G_loss', G_loss, step, summary_writer)
        add_value_summary('D_loss', D_loss, step, summary_writer)
        add_value_summary('time_epoch', time_epoch, step, summary_writer)

        logging.debug("step: {}, D_loss: {}, G_loss: {}, time: {}".format(step, D_loss, G_loss, time_epoch))

        if step % test_frequency == 0 or step + 1 == max_steps:
            test_images = sess.run(next_test_batch)
            batch_test_z = np.random.normal(-1.0, 1.0, size=[BATCH_SIZE, 100]).astype(np.float32)
            D_test_loss, G_test_loss = sess.run([errD, errG], feed_dict={real_images: test_images, z: batch_test_z})

            add_value_summary('test_G_loss', G_test_loss, step, summary_writer)
            add_value_summary('test_D_loss', D_test_loss, step, summary_writer)

            logging.debug("test step: {}, D_loss: {}, G_loss: {}".format(step, D_test_loss, G_test_loss))

            if best_test_G_loss > G_test_loss:
                best_test_G_loss = G_test_loss
                best_test_step = step
                tf.train.Saver().save(sess, save_path=best_model_name, global_step=step)

                batch_z = np.random.normal(-1.0, 1.0, size=[BATCH_SIZE, 100]).astype(np.float32)
                gen_imgs = sess.run(gen_images, feed_dict={z: batch_z})

                tf.summary.image('original', tfrevert(test_images), collections=['image'])
                tf.summary.image('reconstruct', tfrevert(gen_imgs), collections=['image'])
                merge_image = tf.summary.merge_all(key='image')
                merge_image_summary = sess.run(merge_image)

                summary_writer.add_summary(merge_image_summary)

    summary_writer.flush()
    
    if not (os.path.isdir(os.path.dirname(TENSORBOARD_PATH))):
        os.makedirs(os.path.dirname(TENSORBOARD_PATH))
    
    if os.path.exists(TENSORBOARD_PATH):
        os.unlink(TENSORBOARD_PATH)  
    
    os.symlink(summmary_path, TENSORBOARD_PATH)

print("model: {} \nsummary: {}".format(best_model_name, summmary_path))

>> Downloading stuffthingmaps_trainval2017.zip 100.0%
Successfully downloaded stuffthingmaps_trainval2017.zip 659008564 bytes.


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



['/tmp/dataset/annotations/val2017/000000496854.png', '/tmp/dataset/annotations/val2017/000000118594.png', '/tmp/dataset/annotations/val2017/000000530099.png', '/tmp/dataset/annotations/val2017/000000224051.png', '/tmp/dataset/annotations/val2017/000000419379.png', '/tmp/dataset/annotations/val2017/000000477227.png', '/tmp/dataset/annotations/val2017/000000534601.png', '/tmp/dataset/annotations/val2017/000000110282.png', '/tmp/dataset/annotations/val2017/000000076417.png', '/tmp/dataset/annotations/val2017/000000304545.png', '/tmp/dataset/annotations/val2017/000000277197.png', '/tmp/dataset/annotations/val2017/000000351559.png', '/tmp/dataset/annotations/val2017/000000154947.png', '/tmp/dataset/annotations/val2017/000000388215.png', '/tmp/dataset/annotations/val2017/000000178469.png', '/tmp/dataset/annotations/val2017/000000118367.png', '/tmp/dataset/annotations/val2017/000000125257.png', '/tmp/dataset/annotations/val2017/000000548555.png', '/tmp/dataset/annotations/val2017/00000025157

DEBUG:root:step: 0, D_loss: 10.025816917419434, G_loss: 0.6099579930305481, time: 12.800023794174194
DEBUG:root:test step: 0, D_loss: 10.025848388671875, G_loss: 0.6099569201469421
DEBUG:root:step: 1, D_loss: 10.002203941345215, G_loss: 0.6255995035171509, time: 5.0925774574279785
DEBUG:root:step: 2, D_loss: 9.978922843933105, G_loss: 0.6407955288887024, time: 4.9427406787872314
DEBUG:root:step: 3, D_loss: 9.956494331359863, G_loss: 0.6553412079811096, time: 5.078819513320923
DEBUG:root:step: 4, D_loss: 9.935044288635254, G_loss: 0.6690940856933594, time: 5.044639348983765
DEBUG:root:step: 5, D_loss: 9.914727210998535, G_loss: 0.6819822788238525, time: 5.061859369277954
DEBUG:root:step: 6, D_loss: 9.895535469055176, G_loss: 0.6939961910247803, time: 5.088647127151489
DEBUG:root:step: 7, D_loss: 9.877360343933105, G_loss: 0.7051717042922974, time: 5.1541712284088135
DEBUG:root:step: 8, D_loss: 9.860284805297852, G_loss: 0.715571403503418, time: 4.989055633544922
DEBUG:root:step: 9, D_lo

DEBUG:root:step: 76, D_loss: 8.309865951538086, G_loss: 0.920458972454071, time: 5.380645751953125
DEBUG:root:step: 77, D_loss: 8.234808921813965, G_loss: 0.9213584661483765, time: 5.127763986587524
DEBUG:root:step: 78, D_loss: 8.175457000732422, G_loss: 0.9222403764724731, time: 5.120782136917114
DEBUG:root:step: 79, D_loss: 8.11009407043457, G_loss: 0.923105001449585, time: 5.166773080825806
DEBUG:root:step: 80, D_loss: 8.032978057861328, G_loss: 0.9239529371261597, time: 5.198338270187378
DEBUG:root:test step: 80, D_loss: 8.031848907470703, G_loss: 0.9239528179168701
DEBUG:root:step: 81, D_loss: 7.961857795715332, G_loss: 0.9247846603393555, time: 5.11817479133606
DEBUG:root:step: 82, D_loss: 7.889099597930908, G_loss: 0.9256007075309753, time: 5.084054231643677
DEBUG:root:step: 83, D_loss: 7.815126419067383, G_loss: 0.9264013767242432, time: 5.084354639053345
DEBUG:root:step: 84, D_loss: 7.739950656890869, G_loss: 0.927187442779541, time: 5.10667085647583
DEBUG:root:step: 85, D_los

DEBUG:root:step: 151, D_loss: 2.172295570373535, G_loss: 0.9599541425704956, time: 5.076777458190918
DEBUG:root:step: 152, D_loss: 2.066765069961548, G_loss: 0.9602651000022888, time: 5.125593900680542
DEBUG:root:step: 153, D_loss: 1.9544568061828613, G_loss: 0.9605727195739746, time: 5.204438924789429
DEBUG:root:step: 154, D_loss: 1.844957709312439, G_loss: 0.9608769416809082, time: 5.186663389205933
DEBUG:root:step: 155, D_loss: 1.8294363021850586, G_loss: 0.9611777067184448, time: 5.181690216064453
DEBUG:root:step: 156, D_loss: 1.7835921049118042, G_loss: 0.9614753127098083, time: 5.083095550537109
DEBUG:root:step: 157, D_loss: 1.8584147691726685, G_loss: 0.9617695212364197, time: 5.093644142150879
DEBUG:root:step: 158, D_loss: 1.8201487064361572, G_loss: 0.9620605111122131, time: 5.197969436645508
DEBUG:root:step: 159, D_loss: 1.7986878156661987, G_loss: 0.962348461151123, time: 5.112685441970825
DEBUG:root:step: 160, D_loss: 1.707371711730957, G_loss: 0.9626331329345703, time: 5.1

model: /tempssd/wgan/save/2018_Jun_27_11_19_20/model/step 
summary: /tempssd/wgan/save/2018_Jun_27_11_19_20/summary
