From 5ef9564478d8facf7ffcc82144c9652b058850a7 Mon Sep 17 00:00:00 2001
From: "Parag K. Mital" <parag@kadenze.com>
Date: Tue, 5 Sep 2017 18:19:52 -0700
Subject: [PATCH] updating vaegan and charrnn with tf 1.3.0 changes

---
 session-5/libs/charrnn.py | 326 +++++++++++++++++++++++--------
 session-5/libs/vaegan.py  | 397 ++++++++++++++++++++++----------------
 2 files changed, 483 insertions(+), 240 deletions(-)

diff --git a/session-5/libs/charrnn.py b/session-5/libs/charrnn.py
index 2d5d2d01..81be934a 100644
--- a/session-5/libs/charrnn.py
+++ b/session-5/libs/charrnn.py
@@ -1,10 +1,19 @@
-"""Creative Applications of Deep Learning w/ Tensorflow.
-Kadenze, Inc.
-Copyright Parag K. Mital, June 2016.
-TODO:
-argparse
-better sound example/model
-prime with text input
+"""Character-level Recurrent Neural Network.
+"""
+"""
+Copyright 2017 Parag K. Mital.  See also NOTICE.md.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """
 
 import tensorflow as tf
@@ -13,6 +22,7 @@
 import sys
 import collections
 import gzip
+from libs import utils
 
 
 def build_model(txt,
@@ -22,7 +32,30 @@ def build_model(txt,
                 n_cells=100,
                 gradient_clip=10.0,
                 learning_rate=0.001):
-
+    """Summary
+
+    Parameters
+    ----------
+    txt : TYPE
+        Description
+    batch_size : int, optional
+        Description
+    sequence_length : int, optional
+        Description
+    n_layers : int, optional
+        Description
+    n_cells : int, optional
+        Description
+    gradient_clip : float, optional
+        Description
+    learning_rate : float, optional
+        Description
+
+    Returns
+    -------
+    TYPE
+        Description
+    """
     vocab = list(set(txt))
     vocab.sort()
     n_chars = len(vocab)
@@ -46,9 +79,10 @@ def build_model(txt,
     with tf.variable_scope('rnn'):
         cells = tf.contrib.rnn.MultiRNNCell([
             tf.contrib.rnn.DropoutWrapper(
-                tf.contrib.rnn.BasicLSTMCell(num_units=n_cells, forget_bias=0.0, state_is_tuple=True),
-                output_keep_prob=keep_prob)
-            for _ in range(n_layers)])
+                tf.contrib.rnn.BasicLSTMCell(
+                    num_units=n_cells, forget_bias=0.0, state_is_tuple=True),
+                output_keep_prob=keep_prob) for _ in range(n_layers)
+        ])
         initial_state = cells.zero_state(tf.shape(X)[0], tf.float32)
         # returns a length sequence length list of outputs, one for each input
         outputs, final_state = tf.contrib.rnn.static_rnn(
@@ -64,18 +98,15 @@ def build_model(txt,
             shape=[n_cells, n_chars],
             initializer=tf.contrib.layers.xavier_initializer())
         b = tf.get_variable(
-            "b",
-            shape=[n_chars],
-            initializer=tf.constant_initializer())
+            "b", shape=[n_chars], initializer=tf.constant_initializer())
         logits = tf.matmul(outputs_flat, W) + b
         probs = tf.nn.softmax(logits)
         Y_pred = tf.argmax(probs, 1)
 
     with tf.variable_scope('loss'):
-        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-            [logits],
-            [tf.reshape(tf.concat(axis=1, values=Y), [-1])],
-            [tf.ones([batch_size * sequence_length])])
+        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], [
+            tf.reshape(tf.concat(axis=1, values=Y), [-1])
+        ], [tf.ones([batch_size * sequence_length])])
         cost = tf.reduce_sum(loss) / batch_size
 
     with tf.name_scope('optimizer'):
@@ -86,29 +117,77 @@ def build_model(txt,
             gradients.append((tf.clip_by_value(grad, -clip, clip), var))
         updates = optimizer.apply_gradients(gradients)
 
-    model = {'X': X, 'Y': Y, 'logits': logits, 'probs': probs,
-             'Y_pred': Y_pred, 'keep_prob': keep_prob,
-             'cost': cost, 'updates': updates, 'initial_state': initial_state,
-             'final_state': final_state, 'decoder': decoder, 'encoder': encoder,
-             'vocab_size': n_chars}
+    model = {
+        'X': X,
+        'Y': Y,
+        'logits': logits,
+        'probs': probs,
+        'Y_pred': Y_pred,
+        'keep_prob': keep_prob,
+        'cost': cost,
+        'updates': updates,
+        'initial_state': initial_state,
+        'final_state': final_state,
+        'decoder': decoder,
+        'encoder': encoder,
+        'vocab_size': n_chars
+    }
     return model
 
 
-def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3,
-          learning_rate=0.00001, max_iter=50000, gradient_clip=5.0,
-          ckpt_name="model.ckpt", keep_prob=1.0):
-
+def train(txt,
+          batch_size=100,
+          sequence_length=150,
+          n_cells=200,
+          n_layers=3,
+          learning_rate=0.00001,
+          max_iter=50000,
+          gradient_clip=5.0,
+          ckpt_name="model.ckpt",
+          keep_prob=1.0):
+    """train
+
+    Parameters
+    ----------
+    txt : TYPE
+        Description
+    batch_size : int, optional
+        Description
+    sequence_length : int, optional
+        Description
+    n_cells : int, optional
+        Description
+    n_layers : int, optional
+        Description
+    learning_rate : float, optional
+        Description
+    max_iter : int, optional
+        Description
+    gradient_clip : float, optional
+        Description
+    ckpt_name : str, optional
+        Description
+    keep_prob : float, optional
+        Description
+
+    Returns
+    -------
+    TYPE
+        Description
+    """
     g = tf.Graph()
     with tf.Session(graph=g) as sess:
-        model = build_model(txt=txt,
-                            batch_size=batch_size,
-                            sequence_length=sequence_length,
-                            n_layers=n_layers,
-                            n_cells=n_cells,
-                            gradient_clip=gradient_clip,
-                            learning_rate=learning_rate)
-
-        init_op = tf.global_variables_initializer()
+        model = build_model(
+            txt=txt,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            n_layers=n_layers,
+            n_cells=n_cells,
+            gradient_clip=gradient_clip,
+            learning_rate=learning_rate)
+
+        init_op = tf.group(tf.global_variables_initializer(),
+                           tf.local_variables_initializer())
         saver = tf.train.Saver()
         sess.run(init_op)
         if os.path.exists(ckpt_name + '.index') or os.path.exists(ckpt_name):
@@ -122,32 +201,41 @@ def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3,
         while it_i < max_iter:
             Xs, Ys = [], []
             for batch_i in range(batch_size):
-                Xs.append([model['encoder'][ch]
-                           for ch in txt[cursor:cursor + sequence_length]])
-                Ys.append([model['encoder'][ch]
-                           for ch in txt[cursor + 1:
-                                         cursor + sequence_length + 1]])
+                Xs.append([
+                    model['encoder'][ch]
+                    for ch in txt[cursor:cursor + sequence_length]
+                ])
+                Ys.append([
+                    model['encoder'][ch]
+                    for ch in txt[cursor + 1:cursor + sequence_length + 1]
+                ])
                 cursor += sequence_length
                 if (cursor + 1) >= len(txt) - sequence_length - 1:
                     cursor = np.random.randint(0, high=sequence_length)
 
-            feed_dict = {model['X']: Xs,
-                         model['Y']: Ys,
-                         model['keep_prob']: keep_prob}
-            out = sess.run([model['cost'], model['updates']],
-                           feed_dict=feed_dict)
+            feed_dict = {
+                model['X']: Xs,
+                model['Y']: Ys,
+                model['keep_prob']: keep_prob
+            }
+            out = sess.run(
+                [model['cost'], model['updates']], feed_dict=feed_dict)
             avg_cost += out[0]
 
             if (it_i + 1) % print_step == 0:
-                p = sess.run(model['probs'], feed_dict={
-                    model['X']: np.array(Xs[-1])[np.newaxis],
-                    model['keep_prob']: 1.0})
-                print(p.shape, 'min:', np.min(p), 'max:', np.max(p),
-                      'mean:', np.mean(p), 'std:', np.std(p))
+                p = sess.run(
+                    model['probs'],
+                    feed_dict={
+                        model['X']: np.array(Xs[-1])[np.newaxis],
+                        model['keep_prob']: 1.0
+                    })
+                print(p.shape, 'min:',
+                      np.min(p), 'max:',
+                      np.max(p), 'mean:', np.mean(p), 'std:', np.std(p))
                 if isinstance(txt[0], str):
                     # Print original string
-                    print('original:', "".join(
-                        [model['decoder'][ch] for ch in Xs[-1]]))
+                    print('original:',
+                          "".join([model['decoder'][ch] for ch in Xs[-1]]))
 
                     # Print max guess
                     amax = []
@@ -176,43 +264,91 @@ def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3,
         return model
 
 
-def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3,
-          learning_rate=0.001, max_iter=5000, gradient_clip=10.0,
-          init_value=[0], keep_prob=1.0, sampling='prob', temperature=1.0):
-
+def infer(txt,
+          ckpt_name,
+          n_iterations,
+          n_cells=200,
+          n_layers=3,
+          learning_rate=0.001,
+          max_iter=5000,
+          gradient_clip=10.0,
+          init_value=[0],
+          keep_prob=1.0,
+          sampling='prob',
+          temperature=1.0):
+    """infer
+
+    Parameters
+    ----------
+    txt : TYPE
+        Description
+    ckpt_name : TYPE
+        Description
+    n_iterations : TYPE
+        Description
+    n_cells : int, optional
+        Description
+    n_layers : int, optional
+        Description
+    learning_rate : float, optional
+        Description
+    max_iter : int, optional
+        Description
+    gradient_clip : float, optional
+        Description
+    init_value : list, optional
+        Description
+    keep_prob : float, optional
+        Description
+    sampling : str, optional
+        Description
+    temperature : float, optional
+        Description
+
+    Returns
+    -------
+    TYPE
+        Description
+    """
     g = tf.Graph()
     with tf.Session(graph=g) as sess:
         sequence_length = len(init_value)
-        model = build_model(txt=txt,
-                            batch_size=1,
-                            sequence_length=sequence_length,
-                            n_layers=n_layers,
-                            n_cells=n_cells,
-                            gradient_clip=gradient_clip,
-                            learning_rate=learning_rate)
-
-        init_op = tf.global_variables_initializer()
+        model = build_model(
+            txt=txt,
+            batch_size=1,
+            sequence_length=sequence_length,
+            n_layers=n_layers,
+            n_cells=n_cells,
+            gradient_clip=gradient_clip,
+            learning_rate=learning_rate)
+
+        init_op = tf.group(tf.global_variables_initializer(),
+                           tf.local_variables_initializer())
         saver = tf.train.Saver()
         sess.run(init_op)
-        if os.path.exists(ckpt_name + '.index') or os.path.exists(ckpt_name):
+        if os.path.exists(ckpt_name):
             saver.restore(sess, ckpt_name)
             print("Model restored.")
 
         state = []
         synth = [init_value]
         for s_i in model['final_state']:
-            state += sess.run([s_i.c, s_i.h], feed_dict={
-                model['X']: [synth[-1]], model['keep_prob']: keep_prob})
+            state += sess.run(
+                [s_i.c, s_i.h],
+                feed_dict={
+                    model['X']: [synth[-1]],
+                    model['keep_prob']: keep_prob
+                })
 
         for i in range(n_iterations):
             # print('iteration: {}/{}'.format(i, n_iterations), end='\r')
-            feed_dict = {model['X']: [synth[-1]],
-                         model['keep_prob']: keep_prob}
+            feed_dict = {model['X']: [synth[-1]], model['keep_prob']: keep_prob}
             state_updates = []
             for state_i in range(n_layers):
                 feed_dict[model['initial_state'][state_i].c] = \
                     state[state_i * 2]
-                feed_dict[model['initial_state'][state_i].h] = state[state_i * 2 + 1]
+                feed_dict[model['initial_state'][state_i].h] = state[state_i * 2
+                                                                     + 1]
                 state_updates.append(model['final_state'][state_i].c)
                 state_updates.append(model['final_state'][state_i].h)
             p = sess.run(model['probs'], feed_dict=feed_dict)[0]
@@ -225,8 +361,9 @@ def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3,
                 p = np.random.multinomial(1, p.ravel())
                 p = np.argmax(p)
             # Get the current state
-            state = [sess.run(s_i, feed_dict=feed_dict)
-                     for s_i in state_updates]
+            state = [
+                sess.run(s_i, feed_dict=feed_dict) for s_i in state_updates
+            ]
             synth.append([p])
             print(model['decoder'][p], end='')
             sys.stdout.flush()
@@ -238,27 +375,60 @@ def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3,
 
 
 def test_alice(max_iter=5):
+    """Summary
+
+    Parameters
+    ----------
+    max_iter : int, optional
+        Description
+
+    Returns
+    -------
+    TYPE
+        Description
+    """
+    utils.download('https://s3.amazonaws.com/cadl/models/alice.txt.gz')
     with gzip.open('alice.txt.gz', 'rb') as fp:
         txt = fp.read().decode('utf-8')
-    train(txt, n_layers=2, n_cells=20, max_iter=max_iter)
+    return train(txt, n_layers=2, n_cells=20, max_iter=max_iter)
 
 
 def test_trump(max_iter=100):
+    """Summary
+
+    Parameters
+    ----------
+    max_iter : int, optional
+        Description
+    """
+    utils.download(
+        'https://s3.amazonaws.com/cadl/models/trump.ckpt.data-00000-of-00001')
+    utils.download('https://s3.amazonaws.com/cadl/models/trump.ckpt.meta')
+    utils.download('https://s3.amazonaws.com/cadl/models/trump.ckpt.index')
+    utils.download('https://s3.amazonaws.com/cadl/models/trump.txt')
     with open('trump.txt', 'r') as fp:
         txt = fp.read()
-    # train(txt, ckpt_name='trump.ckpt', max_iter=max_iter)
+    #train(txt, ckpt_name='trump', max_iter=max_iter)
     print(infer(txt, ckpt_name='./trump.ckpt', n_iterations=max_iter))
 
 
 def test_wtc():
+    """Summary
+    """
     from scipy.io.wavfile import write, read
     rate, aud = read('wtc.wav')
     txt = np.int8(np.round(aud / 16384.0 * 128.0))
     txt = np.squeeze(txt).tolist()
     # try with more than 100 iterations, e.g. 50k - 200k
     train(txt, sequence_length=250, n_layers=3, n_cells=512, max_iter=100)
-    synthesis = infer(txt, './model.ckpt', 8000 * 30, n_layers=3,
-                      n_cells=150, keep_prob=1.0, sampling='prob')
+    synthesis = infer(
+        txt,
+        './model.ckpt',
+        8000 * 30,
+        n_layers=3,
+        n_cells=150,
+        keep_prob=1.0,
+        sampling='prob')
     snd = np.int16(np.array(synthesis) / 128.0 * 16384.0)
     write('wtc-synth.wav', 8000, snd)
 
diff --git a/session-5/libs/vaegan.py b/session-5/libs/vaegan.py
index ca88b1cc..7434a140 100644
--- a/session-5/libs/vaegan.py
+++ b/session-5/libs/vaegan.py
@@ -1,19 +1,37 @@
+
 """Convolutional/Variational autoencoder, including demonstration of
 training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues"
 using an image pipeline.
+"""
+"""
+Copyright 2017 Parag K. Mital.  See also NOTICE.md.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-Parag K. Mital, Jan 2016
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """
 import tensorflow as tf
 import numpy as np
 import os
 from libs.dataset_utils import create_input_pipeline
 from libs.datasets import CELEB
-from libs.utils import *
+from libs import utils
 
 
-def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
-            convolutional=False, activation=tf.nn.relu,
+def encoder(x,
+            n_hidden=None,
+            dimensions=[],
+            filter_sizes=[],
+            convolutional=False,
+            activation=tf.nn.relu,
             output_activation=tf.nn.sigmoid):
     """Summary
 
@@ -40,11 +58,9 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
         Description
     """
     if convolutional:
-        x_tensor = to_tensor(x)
+        x_tensor = utils.to_tensor(x)
     else:
-        x_tensor = tf.reshape(
-            tensor=x,
-            shape=[-1, dimensions[0]])
+        x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]])
         dimensions = dimensions[1:]
     current_input = x_tensor
 
@@ -55,16 +71,14 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
         with tf.variable_scope(str(layer_i)):
             shapes.append(current_input.get_shape().as_list())
             if convolutional:
-                h, W = conv2d(
+                h, W = utils.conv2d(
                     x=current_input,
                     n_output=n_output,
                     k_h=filter_sizes[layer_i],
                     k_w=filter_sizes[layer_i],
                     padding='SAME')
             else:
-                h, W = linear(
-                    x=current_input,
-                    n_output=n_output)
+                h, W = utils.linear(x=current_input, n_output=n_output)
             h = activation(h)
             Ws.append(W)
             hs.append(h)
@@ -74,11 +88,11 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
     shapes.append(h.get_shape().as_list())
 
     with tf.variable_scope('flatten'):
-        flattened = flatten(current_input)
+        flattened = utils.flatten(current_input)
 
     with tf.variable_scope('hidden'):
         if n_hidden:
-            h, W = linear(flattened, n_hidden, name='linear')
+            h, W = utils.linear(flattened, n_hidden, name='linear')
             h = activation(h)
         else:
             h = flattened
@@ -86,9 +100,13 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
     return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes}
 
 
-def decoder(z, shapes, n_hidden=None,
-            dimensions=[], filter_sizes=[],
-            convolutional=False, activation=tf.nn.relu,
+def decoder(z,
+            shapes,
+            n_hidden=None,
+            dimensions=[],
+            filter_sizes=[],
+            convolutional=False,
+            activation=tf.nn.relu,
             output_activation=tf.nn.relu):
     """Summary
 
@@ -118,7 +136,7 @@ def decoder(z, shapes, n_hidden=None,
     """
     with tf.variable_scope('hidden/1'):
         if n_hidden:
-            h = linear(z, n_hidden, name='linear')[0]
+            h = utils.linear(z, n_hidden, name='linear')[0]
             h = activation(h)
         else:
             h = z
@@ -126,12 +144,13 @@ def decoder(z, shapes, n_hidden=None,
     with tf.variable_scope('hidden/2'):
         dims = shapes[0]
         size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
-        h = linear(h, size, name='linear')[0]
+        h = utils.linear(h, size, name='linear')[0]
         current_input = activation(h)
         if convolutional:
             current_input = tf.reshape(
                 current_input,
-                tf.stack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]]))
+                tf.stack(
+                    [tf.shape(current_input)[0], dims[1], dims[2], dims[3]]))
 
     Ws = []
     hs = []
@@ -139,16 +158,16 @@ def decoder(z, shapes, n_hidden=None,
         with tf.variable_scope('decoder/{}'.format(layer_i)):
             if convolutional:
                 shape = shapes[layer_i + 1]
-                h, W = deconv2d(x=current_input,
-                                n_output_h=shape[1],
-                                n_output_w=shape[2],
-                                n_output_ch=shape[3],
-                                n_input_ch=shapes[layer_i][3],
-                                k_h=filter_sizes[layer_i],
-                                k_w=filter_sizes[layer_i])
+                h, W = utils.deconv2d(
+                    x=current_input,
+                    n_output_h=shape[1],
+                    n_output_w=shape[2],
+                    n_output_ch=shape[3],
+                    n_input_ch=shapes[layer_i][3],
+                    k_h=filter_sizes[layer_i],
+                    k_w=filter_sizes[layer_i])
             else:
-                h, W = linear(x=current_input,
-                              n_output=n_output)
+                h, W = utils.linear(x=current_input, n_output=n_output)
             if (layer_i + 1) < len(dimensions):
                 h = activation(h)
             else:
@@ -176,8 +195,8 @@ def variational_bayes(h, n_code):
     name : TYPE
         Description
     """
-    z_mu = tf.nn.tanh(linear(h, n_code, name='mu')[0])
-    z_log_sigma = 0.5 * tf.nn.tanh(linear(h, n_code, name='log_sigma')[0])
+    z_mu = tf.nn.tanh(utils.linear(h, n_code, name='mu')[0])
+    z_log_sigma = 0.5 * tf.nn.tanh(utils.linear(h, n_code, name='log_sigma')[0])
 
     # Sample from noise distribution p(eps) ~ N(0, 1)
     epsilon = tf.random_normal(tf.stack([tf.shape(h)[0], n_code]))
@@ -187,13 +206,13 @@ def variational_bayes(h, n_code):
     # -log(p(z)/q(z|x)), bits by coding.
     # variational bound coding costs kl(p(z|x)||q(z|x))
     # d_kl(q(z|x)||p(z))
-    loss_z = -0.5 * tf.reduce_sum(
-        1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma),
-        1)
+    loss_z = -0.5 * tf.reduce_sum(1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
+                                  tf.exp(2.0 * z_log_sigma), 1)
     return z, z_mu, z_log_sigma, loss_z
 
 
-def discriminator(x, convolutional=True,
+def discriminator(x,
+                  convolutional=True,
                   filter_sizes=[5, 5, 5, 5],
                   activation=tf.nn.relu,
                   n_filters=[100, 100, 100, 100]):
@@ -207,6 +226,8 @@ def discriminator(x, convolutional=True,
         Description
     filter_sizes : list, optional
         Description
+    activation : TYPE, optional
+        Description
     n_filters : list, optional
         Description
 
@@ -215,19 +236,24 @@ def discriminator(x, convolutional=True,
     name : TYPE
         Description
     """
-    encoding = encoder(x=x,
-                       convolutional=convolutional,
-                       dimensions=n_filters,
-                       filter_sizes=filter_sizes,
-                       activation=activation)
+    encoding = encoder(
+        x=x,
+        convolutional=convolutional,
+        dimensions=n_filters,
+        filter_sizes=filter_sizes,
+        activation=activation)
 
     # flatten, then linear to 1 value
-    res = flatten(encoding['z'], name='flatten')
+    res = utils.flatten(encoding['z'], name='flatten')
     if res.get_shape().as_list()[-1] > 1:
-        res = linear(res, 1)[0]
+        res = utils.linear(res, 1)[0]
 
-    return {'logits': res, 'probs': tf.nn.sigmoid(res),
-            'Ws': encoding['Ws'], 'hs': encoding['hs']}
+    return {
+        'logits': res,
+        'probs': tf.nn.sigmoid(res),
+        'Ws': encoding['Ws'],
+        'hs': encoding['hs']
+    }
 
 
 def VAE(input_shape=[None, 784],
@@ -268,12 +294,13 @@ def VAE(input_shape=[None, 784],
     x = tf.placeholder(tf.float32, input_shape, 'x')
 
     with tf.variable_scope('encoder'):
-        encoding = encoder(x=x,
-                           n_hidden=n_hidden,
-                           convolutional=convolutional,
-                           dimensions=n_filters,
-                           filter_sizes=filter_sizes,
-                           activation=activation)
+        encoding = encoder(
+            x=x,
+            n_hidden=n_hidden,
+            convolutional=convolutional,
+            dimensions=n_filters,
+            filter_sizes=filter_sizes,
+            activation=activation)
 
     if variational:
         with tf.variable_scope('variational'):
@@ -290,23 +317,30 @@ def VAE(input_shape=[None, 784],
     n_filters += [input_shape[-1]]
 
     with tf.variable_scope('generator'):
-        decoding = decoder(z=z,
-                           shapes=shapes,
-                           n_hidden=n_hidden,
-                           dimensions=n_filters,
-                           filter_sizes=filter_sizes,
-                           convolutional=convolutional,
-                           activation=activation)
+        decoding = decoder(
+            z=z,
+            shapes=shapes,
+            n_hidden=n_hidden,
+            dimensions=n_filters,
+            filter_sizes=filter_sizes,
+            convolutional=convolutional,
+            activation=activation)
 
     x_tilde = decoding['x_tilde']
-    x_flat = flatten(x)
-    x_tilde_flat = flatten(x_tilde)
+    x_flat = utils.flatten(x)
+    x_tilde_flat = utils.flatten(x_tilde)
 
     # -log(p(x|z))
     loss_x = tf.reduce_sum(tf.squared_difference(x_flat, x_tilde_flat), 1)
-    return {'loss_x': loss_x, 'loss_z': loss_z, 'x': x, 'z': z,
-            'Ws': encoding['Ws'], 'hs': decoding['hs'],
-            'x_tilde': x_tilde}
+    return {
+        'loss_x': loss_x,
+        'loss_z': loss_z,
+        'x': x,
+        'z': z,
+        'Ws': encoding['Ws'],
+        'hs': decoding['hs'],
+        'x_tilde': x_tilde
+    }
 
 
 def VAEGAN(input_shape=[None, 784],
@@ -348,12 +382,13 @@ def VAEGAN(input_shape=[None, 784],
     z_samp = tf.placeholder(tf.float32, [None, n_code], 'z_samp')
 
     with tf.variable_scope('encoder'):
-        encoding = encoder(x=x,
-                           n_hidden=n_hidden,
-                           convolutional=convolutional,
-                           dimensions=n_filters,
-                           filter_sizes=filter_sizes,
-                           activation=activation)
+        encoding = encoder(
+            x=x,
+            n_hidden=n_hidden,
+            convolutional=convolutional,
+            dimensions=n_filters,
+            filter_sizes=filter_sizes,
+            activation=activation)
 
         with tf.variable_scope('variational'):
             z, z_mu, z_log_sigma, loss_z = variational_bayes(
@@ -366,40 +401,45 @@ def VAEGAN(input_shape=[None, 784],
     n_filters_decoder += [input_shape[-1]]
 
     with tf.variable_scope('generator'):
-        decoding_actual = decoder(z=z,
-                                  shapes=shapes,
-                                  n_hidden=n_hidden,
-                                  convolutional=convolutional,
-                                  dimensions=n_filters_decoder,
-                                  filter_sizes=filter_sizes,
-                                  activation=activation)
+        decoding_actual = decoder(
+            z=z,
+            shapes=shapes,
+            n_hidden=n_hidden,
+            convolutional=convolutional,
+            dimensions=n_filters_decoder,
+            filter_sizes=filter_sizes,
+            activation=activation)
 
     with tf.variable_scope('generator', reuse=True):
-        decoding_sampled = decoder(z=z_samp,
-                                   shapes=shapes,
-                                   n_hidden=n_hidden,
-                                   convolutional=convolutional,
-                                   dimensions=n_filters_decoder,
-                                   filter_sizes=filter_sizes,
-                                   activation=activation)
+        decoding_sampled = decoder(
+            z=z_samp,
+            shapes=shapes,
+            n_hidden=n_hidden,
+            convolutional=convolutional,
+            dimensions=n_filters_decoder,
+            filter_sizes=filter_sizes,
+            activation=activation)
 
     with tf.variable_scope('discriminator'):
-        D_real = discriminator(x,
-                               filter_sizes=filter_sizes,
-                               n_filters=n_filters,
-                               activation=activation)
+        D_real = discriminator(
+            x,
+            filter_sizes=filter_sizes,
+            n_filters=n_filters,
+            activation=activation)
 
     with tf.variable_scope('discriminator', reuse=True):
-        D_fake = discriminator(decoding_actual['x_tilde'],
-                               filter_sizes=filter_sizes,
-                               n_filters=n_filters,
-                               activation=activation)
+        D_fake = discriminator(
+            decoding_actual['x_tilde'],
+            filter_sizes=filter_sizes,
+            n_filters=n_filters,
+            activation=activation)
 
     with tf.variable_scope('discriminator', reuse=True):
-        D_samp = discriminator(decoding_sampled['x_tilde'],
-                               filter_sizes=filter_sizes,
-                               n_filters=n_filters,
-                               activation=activation)
+        D_samp = discriminator(
+            decoding_sampled['x_tilde'],
+            filter_sizes=filter_sizes,
+            n_filters=n_filters,
+            activation=activation)
 
     with tf.variable_scope('loss'):
         # Weights influence of content/style of decoder
@@ -408,9 +448,8 @@ def VAEGAN(input_shape=[None, 784],
         # Discriminator_l Log Likelihood Loss
         loss_D_llike = 0
         for h_fake, h_real in zip(D_fake['hs'][3:], D_real['hs'][3:]):
-            loss_D_llike += tf.reduce_sum(
-                0.5 * tf.squared_difference(
-                    flatten(h_fake), flatten(h_real)), 1)
+            loss_D_llike += tf.reduce_sum(0.5 * tf.squared_difference(
+                utils.flatten(h_fake), utils.flatten(h_real)), 1)
 
         # GAN Loss
         eps = 1e-12
@@ -424,12 +463,22 @@ def VAEGAN(input_shape=[None, 784],
         loss_gen = tf.reduce_mean(gamma * loss_D_llike - loss_GAN)
         loss_dis = -tf.reduce_mean(loss_GAN)
 
-    return {'x': x, 'z': z, 'x_tilde': decoding_actual['x_tilde'],
-            'z_samp': z_samp, 'x_tilde_samp': decoding_sampled['x_tilde'],
-            'loss_real': loss_real, 'loss_fake': loss_fake, 'loss_samp': loss_samp,
-            'loss_GAN': loss_GAN, 'loss_D_llike': loss_D_llike,
-            'loss_enc': loss_enc, 'loss_gen': loss_gen, 'loss_dis': loss_dis,
-            'gamma': gamma}
+    return {
+        'x': x,
+        'z': z,
+        'x_tilde': decoding_actual['x_tilde'],
+        'z_samp': z_samp,
+        'x_tilde_samp': decoding_sampled['x_tilde'],
+        'loss_real': loss_real,
+        'loss_fake': loss_fake,
+        'loss_samp': loss_samp,
+        'loss_GAN': loss_GAN,
+        'loss_D_llike': loss_D_llike,
+        'loss_enc': loss_enc,
+        'loss_gen': loss_gen,
+        'loss_dis': loss_dis,
+        'gamma': gamma
+    }
 
 
 def train_vaegan(files,
@@ -485,20 +534,21 @@ def train_vaegan(files,
     ckpt_name : str, optional
         Description
 
-    Returns
-    -------
+    No Longer Returned
+    ------------------
     name : TYPE
         Description
     """
 
-    ae = VAEGAN(input_shape=[None] + crop_shape,
-                convolutional=convolutional,
-                variational=variational,
-                n_filters=n_filters,
-                n_hidden=n_hidden,
-                n_code=n_code,
-                filter_sizes=filter_sizes,
-                activation=activation)
+    ae = VAEGAN(
+        input_shape=[None] + crop_shape,
+        convolutional=convolutional,
+        variational=variational,
+        n_filters=n_filters,
+        n_hidden=n_hidden,
+        n_code=n_code,
+        filter_sizes=filter_sizes,
+        activation=activation)
 
     batch = create_input_pipeline(
         files=files,
@@ -509,29 +559,34 @@ def train_vaegan(files,
         shape=input_shape)
 
     zs = np.random.randn(4, n_code).astype(np.float32)
-    zs = make_latent_manifold(zs, n_examples)
+    zs = utils.make_latent_manifold(zs, n_examples)
 
-    opt_enc = tf.train.AdamOptimizer(
-        learning_rate=learning_rate).minimize(
+    opt_enc = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
         ae['loss_enc'],
-        var_list=[var_i for var_i in tf.trainable_variables()
-                  if var_i.name.startswith('encoder')])
+        var_list=[
+            var_i for var_i in tf.trainable_variables()
+            if var_i.name.startswith('encoder')
+        ])
 
-    opt_gen = tf.train.AdamOptimizer(
-        learning_rate=learning_rate).minimize(
+    opt_gen = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
         ae['loss_gen'],
-        var_list=[var_i for var_i in tf.trainable_variables()
-                  if var_i.name.startswith('generator')])
+        var_list=[
+            var_i for var_i in tf.trainable_variables()
+            if var_i.name.startswith('generator')
+        ])
 
-    opt_dis = tf.train.AdamOptimizer(
-        learning_rate=learning_rate).minimize(
+    opt_dis = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
         ae['loss_dis'],
-        var_list=[var_i for var_i in tf.trainable_variables()
-                  if var_i.name.startswith('discriminator')])
+        var_list=[
+            var_i for var_i in tf.trainable_variables()
+            if var_i.name.startswith('discriminator')
+        ])
 
     sess = tf.Session()
     saver = tf.train.Saver()
-    sess.run(tf.global_variables_initializer())
+    init_op = tf.group(tf.global_variables_initializer(),
+                       tf.local_variables_initializer())
+    sess.run(init_op)
     coord = tf.train.Coordinator()
     tf.get_default_graph().finalize()
     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
@@ -549,7 +604,7 @@ def train_vaegan(files,
 
     n_files = len(files)
     test_xs = sess.run(batch) / 255.0
-    montage(test_xs, 'test_xs.png')
+    utils.montage(test_xs, 'test_xs.png')
     try:
         while not coord.should_stop() and epoch_i < n_epochs:
             if batch_i % (n_files // batch_size) == 0:
@@ -560,11 +615,10 @@ def train_vaegan(files,
             batch_i += 1
             batch_xs = sess.run(batch) / 255.0
             batch_zs = np.random.randn(batch_size, n_code).astype(np.float32)
-            real_cost, fake_cost, _ = sess.run([
-                ae['loss_real'], ae['loss_fake'], opt_enc],
-                feed_dict={
-                    ae['x']: batch_xs,
-                    ae['gamma']: 0.5})
+            real_cost, fake_cost, _ = sess.run(
+                [ae['loss_real'], ae['loss_fake'], opt_enc],
+                feed_dict={ae['x']: batch_xs,
+                           ae['gamma']: 0.5})
             real_cost = -np.mean(real_cost)
             fake_cost = -np.mean(fake_cost)
             print('real:', real_cost, '/ fake:', fake_cost)
@@ -585,42 +639,48 @@ def train_vaegan(files,
                 dis_update = True
 
             if gen_update:
-                sess.run(opt_gen, feed_dict={
-                    ae['x']: batch_xs,
-                    ae['z_samp']: batch_zs,
-                    ae['gamma']: 0.5})
+                sess.run(
+                    opt_gen,
+                    feed_dict={
+                        ae['x']: batch_xs,
+                        ae['z_samp']: batch_zs,
+                        ae['gamma']: 0.5
+                    })
             if dis_update:
-                sess.run(opt_dis, feed_dict={
-                    ae['x']: batch_xs,
-                    ae['z_samp']: batch_zs,
-                    ae['gamma']: 0.5})
+                sess.run(
+                    opt_dis,
+                    feed_dict={
+                        ae['x']: batch_xs,
+                        ae['z_samp']: batch_zs,
+                        ae['gamma']: 0.5
+                    })
 
             if batch_i % 50 == 0:
 
                 # Plot example reconstructions from latent layer
-                recon = sess.run(
-                    ae['x_tilde'], feed_dict={
-                        ae['z']: zs})
+                recon = sess.run(ae['x_tilde'], feed_dict={ae['z']: zs})
                 print('recon:', recon.min(), recon.max())
                 recon = np.clip(recon / recon.max(), 0, 1)
-                montage(recon.reshape([-1] + crop_shape),
-                        'imgs/manifold_%08d.png' % t_i)
+                utils.montage(
+                    recon.reshape([-1] + crop_shape),
+                    'imgs/manifold_%08d.png' % t_i)
 
                 # Plot example reconstructions
-                recon = sess.run(
-                    ae['x_tilde'], feed_dict={
-                        ae['x']: test_xs})
+                recon = sess.run(ae['x_tilde'], feed_dict={ae['x']: test_xs})
                 print('recon:', recon.min(), recon.max())
                 recon = np.clip(recon / recon.max(), 0, 1)
-                montage(recon.reshape([-1] + crop_shape),
-                        'imgs/reconstruction_%08d.png' % t_i)
+                utils.montage(
+                    recon.reshape([-1] + crop_shape),
+                    'imgs/reconstruction_%08d.png' % t_i)
                 t_i += 1
 
             if batch_i % 100 == 0:
                 # Save the variables to disk.
-                save_path = saver.save(sess, ckpt_name,
-                                       global_step=batch_i,
-                                       write_meta_graph=False)
+                save_path = saver.save(
+                    sess,
+                    ckpt_name,
+                    global_step=batch_i,
+                    write_meta_graph=False)
                 print("Model saved in file: %s" % save_path)
     except tf.errors.OutOfRangeError:
         print('Done training -- epoch limit reached')
@@ -636,19 +696,26 @@ def train_vaegan(files,
     sess.close()
 
 
-def test_celeb(n_epochs=100, crop_shape=[100, 100, 3],
-               n_filters=[100, 100, 100, 100], filter_sizes=[3, 3, 3, 3]):
+def test_celeb(n_epochs=100,
+               filter_sizes=[3, 3, 3, 3],
+               n_filters=[100, 100, 100, 100],
+               crop_shape=[100, 100, 3]):
     """Summary
 
-    Returns
-    -------
+    Parameters
+    ----------
+    n_epochs : int, optional
+        Description
+
+    No Longer Returned
+    ------------------
     name : TYPE
         Description
     """
     files = CELEB()
     train_vaegan(
         files=files,
-        batch_size=100,
+        batch_size=64,
         n_epochs=n_epochs,
         crop_shape=crop_shape,
         crop_factor=0.8,
@@ -666,13 +733,19 @@ def test_celeb(n_epochs=100, crop_shape=[100, 100, 3],
 def test_sita(n_epochs=100):
     """Summary
 
-    Returns
-    -------
+    Parameters
+    ----------
+    n_epochs : int, optional
+        Description
+
+    No Longer Returned
+    ------------------
     name : TYPE
         Description
     """
     if not os.path.exists('sita'):
-        os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi')
+        os.system(
+            'wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi')
         os.mkdir('sita')
         os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' +
                   ' image2 -s 160x90 sita/sita-%08d.jpg')