In [1]:
import os

import numpy as np
from tqdm import trange
import tensorflow as tf

from utils import *
from ops import *
from statistic import Statistic

## Set the parameters of the network

In [2]:
hyperparams = {# network
    "model" : "pixel_cnn", # name of model [pixel_rnn, pixel_cnn]
    "batch_size" : 100, # size of a batch
    "hidden_dims" : 16, # dimesion of hidden states of LSTM or Conv layers
    "recurrent_length" : 7, # the length of LSTM or Conv layers
    "out_hidden_dims" : 32, # dimesion of hidden states of output Conv layers
    "out_recurrent_length" : 2, # the length of output Conv layers
    "use_residual" : False, # whether to use residual connections or not
    "use_dynamic_rnn" : False, # whether to use dynamic_rnn or not

    # training
    "max_epoch" : 100000, # # of step in an epoch
    "test_step" : 100, # # of step to test a model
    "save_step" : 1000, # # of step to save a model
    "learning_rate" : 1e-3, # learning rate
    "grad_clip" : 1, # value of gradient to be used for clipping
    "use_gpu" : True, # whether to use gpu for training

    # data
    "data" : "mnist", # name of dataset 
    "data_dir" : "MNIST-data", # name of data directory
    "sample_dir" : "samples", # name of sample directory

    # Debug
    "is_train" : True, # training or testing
    "display" : False, # whether to display the training results or not
    "random_seed" :  123 # random seed for python
}
p = dotdict(hyperparams)

In [3]:
if "random_seed" in p:
    tf.set_random_seed(p.random_seed)
    np.random.seed(p.random_seed)

## Prepare the dataset


In [4]:
# TODO add hyperparams to model saving
model_dir = setup_model_saving(p.model, p.data, hyperparams)
DATA_DIR = p.data_dir
SAMPLE_DIR = os.path.join(model_dir, p.sample_dir)

check_and_create_dir(DATA_DIR)
check_and_create_dir(SAMPLE_DIR)

# prepare dataset
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(DATA_DIR, one_hot=True)

next_train_batch = lambda x: mnist.train.next_batch(x)[0]
next_test_batch = lambda x: mnist.test.next_batch(x)[0]

height, width, channel = 28, 28, 1

train_step_per_epoch = mnist.train.num_examples / p.batch_size
test_step_per_epoch = mnist.test.num_examples / p.batch_size

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


## Setup network

In [5]:
def pixelRNN(height, width, channel, params):
    """
    Args
    height, width, channel - the dimensions of the input
    params -- the hyperparameters of the network
    """
    input_shape = [None, height, width, channel] if params.use_gpu else [None, channel, height, width]
    inputs = tf.placeholder(tf.float32, input_shape)
    
    # TODO remove scoping
    # input of main reccurent layers
    scope = "conv_inputs"
    conv_inputs = conv2d(inputs, params.hidden_dims, [7, 7], "A", scope=scope)
    
    # main reccurent layers
    last_hid = conv_inputs
    for idx in xrange(params.recurrent_length):
        scope = 'CONV%d' % idx
        last_hid = conv2d(last_hid, 3, [1, 1], "B", scope=scope)
        print("Building %s" % scope)

    # output reccurent layers
    for idx in xrange(params.out_recurrent_length):
        scope = 'CONV_OUT%d' % idx
        last_hid = tf.nn.relu(conv2d(last_hid, params.out_hidden_dims, [1, 1], "B", scope=scope))
        print("Building %s" % scope)

    conv2d_out_logits = conv2d(last_hid, 1, [1, 1], "B", scope='conv2d_out_logits')
    output = tf.nn.sigmoid(conv2d_out_logits)
    return inputs, output, conv2d_out_logits
inputs, output, conv2d_out_logits = pixelRNN(height, width, channel, p)

Building CONV0
Building CONV1
Building CONV2
Building CONV3
Building CONV4
Building CONV5
Building CONV6
Building CONV_OUT0
Building CONV_OUT1


In [6]:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(conv2d_out_logits, inputs, name='loss'))

optimizer = tf.train.RMSPropOptimizer(p.learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)

new_grads_and_vars = \
    [(tf.clip_by_value(gv[0], -p.grad_clip, p.grad_clip), gv[1]) for gv in grads_and_vars]
optim = optimizer.apply_gradients(new_grads_and_vars)
 
# show_all_variables()
print("Building %s finished!" % p.model)

Building pixel_cnn finished!


In [7]:
def predict(sess, images, inputs, output):
    return sess.run(output, {inputs: images})

def generate(sess, height, width, inputs, output):
    samples = np.zeros((100, height, width, 1), dtype='float32')
    # TODO add occlusions
    for i in xrange(self.height):
        for j in xrange(self.width):
            for k in xrange(self.channel):
                next_sample = binarize(predict(sess, samples, inputs, output))
                samples[:, i, j, k] = next_sample[:, i, j, k]

    return samples

## Training

In [8]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    stat = Statistic(sess, p.data, model_dir, tf.trainable_variables(), p.test_step)
    stat.load_model()
    print("Start training")
    
    initial_step = stat.get_t() if stat else 0
    iterator = trange(p.max_epoch, ncols=70, initial=initial_step)

    for epoch in iterator:
        # 1. train
        total_train_costs = []
        for idx in xrange(train_step_per_epoch):
            images = binarize(next_train_batch(p.batch_size)) \
                .reshape([p.batch_size, height, width, channel])

            _, cost = sess.run([optim, loss], feed_dict={ inputs: images })
            total_train_costs.append(cost)

        # 2. test
        total_test_costs = []
        for idx in xrange(test_step_per_epoch):
            images = binarize(next_test_batch(p.batch_size)) \
                .reshape([p.batch_size, height, width, channel])

            cost = sess.run(loss, feed_dict={ inputs : images })
            total_test_costs.append(cost)

        avg_train_cost, avg_test_cost = np.mean(total_train_costs), np.mean(total_test_costs)

        stat.on_step(avg_train_cost, avg_test_cost)

        # 3. generate samples
        samples = generate(ses, height, width, inputs, output)
        save_images(samples, height, width, 10, 10, 
            directory=SAMPLE_DIR, prefix="epoch_%s" % epoch)

        iterator.set_description("train l: %.3f, test l: %.3f" % (avg_train_cost, avg_test_cost))

Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.


[01-28 21:13:16] From statistic.py:20 in __init__.: __init__ (from tensorflow.python.training.summary_io) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.


Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.


[01-28 21:13:17] From statistic.py:30 in __init__.: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.


Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.


[01-28 21:13:17] From statistic.py:30 in __init__.: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.


FailedPreconditionError: Attempting to use uninitialized value t/t
	 [[Node: t/AssignAdd = AssignAdd[T=DT_INT32, _class=["loc:@t/t"], use_locking=false, _device="/job:localhost/replica:0/task:0/cpu:0"](t/t, t/AssignAdd/value)]]

Caused by op u't/AssignAdd', defined at:
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-ce40f4ff9787>", line 4, in <module>
    stat = Statistic(sess, p.data, model_dir, tf.trainable_variables(), p.test_step)
  File "statistic.py", line 16, in __init__
    self.t_add_op = self.t_op.assign_add(1)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 590, in assign_add
    return state_ops.assign_add(self._variable, delta, use_locking=use_locking)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 75, in assign_add
    use_locking=use_locking, name=name)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/philkuz/.virtualenvs/keras/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value t/t
	 [[Node: t/AssignAdd = AssignAdd[T=DT_INT32, _class=["loc:@t/t"], use_locking=false, _device="/job:localhost/replica:0/task:0/cpu:0"](t/t, t/AssignAdd/value)]]


In [None]:
with tf.Session() as sess:
    samples = generate(sess, height, width, inputs, output)
    save_images(samples, height, width, 10, 10, directory=SAMPLE_DIR)