From 5ef9564478d8facf7ffcc82144c9652b058850a7 Mon Sep 17 00:00:00 2001 From: "Parag K. Mital" Date: Tue, 5 Sep 2017 18:19:52 -0700 Subject: [PATCH] updating vaegan and charrnn with tf 1.3.0 changes --- session-5/libs/charrnn.py | 326 +++++++++++++++++++++++-------- session-5/libs/vaegan.py | 397 ++++++++++++++++++++++---------------- 2 files changed, 483 insertions(+), 240 deletions(-) diff --git a/session-5/libs/charrnn.py b/session-5/libs/charrnn.py index 2d5d2d01..81be934a 100644 --- a/session-5/libs/charrnn.py +++ b/session-5/libs/charrnn.py @@ -1,10 +1,19 @@ -"""Creative Applications of Deep Learning w/ Tensorflow. -Kadenze, Inc. -Copyright Parag K. Mital, June 2016. -TODO: -argparse -better sound example/model -prime with text input +"""Character-level Recurrent Neural Network. +""" +""" +Copyright 2017 Parag K. Mital. See also NOTICE.md. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. """ import tensorflow as tf @@ -13,6 +22,7 @@ import sys import collections import gzip +from libs import utils def build_model(txt, @@ -22,7 +32,30 @@ def build_model(txt, n_cells=100, gradient_clip=10.0, learning_rate=0.001): - + """Summary + + Parameters + ---------- + txt : TYPE + Description + batch_size : int, optional + Description + sequence_length : int, optional + Description + n_layers : int, optional + Description + n_cells : int, optional + Description + gradient_clip : float, optional + Description + learning_rate : float, optional + Description + + Returns + ------- + TYPE + Description + """ vocab = list(set(txt)) vocab.sort() n_chars = len(vocab) @@ -46,9 +79,10 @@ def build_model(txt, with tf.variable_scope('rnn'): cells = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.DropoutWrapper( - tf.contrib.rnn.BasicLSTMCell(num_units=n_cells, forget_bias=0.0, state_is_tuple=True), - output_keep_prob=keep_prob) - for _ in range(n_layers)]) + tf.contrib.rnn.BasicLSTMCell( + num_units=n_cells, forget_bias=0.0, state_is_tuple=True), + output_keep_prob=keep_prob) for _ in range(n_layers) + ]) initial_state = cells.zero_state(tf.shape(X)[0], tf.float32) # returns a length sequence length list of outputs, one for each input outputs, final_state = tf.contrib.rnn.static_rnn( @@ -64,18 +98,15 @@ def build_model(txt, shape=[n_cells, n_chars], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable( - "b", - shape=[n_chars], - initializer=tf.constant_initializer()) + "b", shape=[n_chars], initializer=tf.constant_initializer()) logits = tf.matmul(outputs_flat, W) + b probs = tf.nn.softmax(logits) Y_pred = tf.argmax(probs, 1) with tf.variable_scope('loss'): - loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( - [logits], - [tf.reshape(tf.concat(axis=1, values=Y), [-1])], - [tf.ones([batch_size * sequence_length])]) + loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], [ + tf.reshape(tf.concat(axis=1, values=Y), [-1]) + ], [tf.ones([batch_size * sequence_length])]) cost = tf.reduce_sum(loss) / batch_size with tf.name_scope('optimizer'): @@ -86,29 +117,77 @@ def build_model(txt, gradients.append((tf.clip_by_value(grad, -clip, clip), var)) updates = optimizer.apply_gradients(gradients) - model = {'X': X, 'Y': Y, 'logits': logits, 'probs': probs, - 'Y_pred': Y_pred, 'keep_prob': keep_prob, - 'cost': cost, 'updates': updates, 'initial_state': initial_state, - 'final_state': final_state, 'decoder': decoder, 'encoder': encoder, - 'vocab_size': n_chars} + model = { + 'X': X, + 'Y': Y, + 'logits': logits, + 'probs': probs, + 'Y_pred': Y_pred, + 'keep_prob': keep_prob, + 'cost': cost, + 'updates': updates, + 'initial_state': initial_state, + 'final_state': final_state, + 'decoder': decoder, + 'encoder': encoder, + 'vocab_size': n_chars + } return model -def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3, - learning_rate=0.00001, max_iter=50000, gradient_clip=5.0, - ckpt_name="model.ckpt", keep_prob=1.0): - +def train(txt, + batch_size=100, + sequence_length=150, + n_cells=200, + n_layers=3, + learning_rate=0.00001, + max_iter=50000, + gradient_clip=5.0, + ckpt_name="model.ckpt", + keep_prob=1.0): + """train + + Parameters + ---------- + txt : TYPE + Description + batch_size : int, optional + Description + sequence_length : int, optional + Description + n_cells : int, optional + Description + n_layers : int, optional + Description + learning_rate : float, optional + Description + max_iter : int, optional + Description + gradient_clip : float, optional + Description + ckpt_name : str, optional + Description + keep_prob : float, optional + Description + + Returns + ------- + TYPE + Description + """ g = tf.Graph() with tf.Session(graph=g) as sess: - model = build_model(txt=txt, - batch_size=batch_size, - sequence_length=sequence_length, - n_layers=n_layers, - n_cells=n_cells, - gradient_clip=gradient_clip, - learning_rate=learning_rate) - - init_op = tf.global_variables_initializer() + model = build_model( + txt=txt, + batch_size=batch_size, + sequence_length=sequence_length, + n_layers=n_layers, + n_cells=n_cells, + gradient_clip=gradient_clip, + learning_rate=learning_rate) + + init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) saver = tf.train.Saver() sess.run(init_op) if os.path.exists(ckpt_name + '.index') or os.path.exists(ckpt_name): @@ -122,32 +201,41 @@ def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3, while it_i < max_iter: Xs, Ys = [], [] for batch_i in range(batch_size): - Xs.append([model['encoder'][ch] - for ch in txt[cursor:cursor + sequence_length]]) - Ys.append([model['encoder'][ch] - for ch in txt[cursor + 1: - cursor + sequence_length + 1]]) + Xs.append([ + model['encoder'][ch] + for ch in txt[cursor:cursor + sequence_length] + ]) + Ys.append([ + model['encoder'][ch] + for ch in txt[cursor + 1:cursor + sequence_length + 1] + ]) cursor += sequence_length if (cursor + 1) >= len(txt) - sequence_length - 1: cursor = np.random.randint(0, high=sequence_length) - feed_dict = {model['X']: Xs, - model['Y']: Ys, - model['keep_prob']: keep_prob} - out = sess.run([model['cost'], model['updates']], - feed_dict=feed_dict) + feed_dict = { + model['X']: Xs, + model['Y']: Ys, + model['keep_prob']: keep_prob + } + out = sess.run( + [model['cost'], model['updates']], feed_dict=feed_dict) avg_cost += out[0] if (it_i + 1) % print_step == 0: - p = sess.run(model['probs'], feed_dict={ - model['X']: np.array(Xs[-1])[np.newaxis], - model['keep_prob']: 1.0}) - print(p.shape, 'min:', np.min(p), 'max:', np.max(p), - 'mean:', np.mean(p), 'std:', np.std(p)) + p = sess.run( + model['probs'], + feed_dict={ + model['X']: np.array(Xs[-1])[np.newaxis], + model['keep_prob']: 1.0 + }) + print(p.shape, 'min:', + np.min(p), 'max:', + np.max(p), 'mean:', np.mean(p), 'std:', np.std(p)) if isinstance(txt[0], str): # Print original string - print('original:', "".join( - [model['decoder'][ch] for ch in Xs[-1]])) + print('original:', + "".join([model['decoder'][ch] for ch in Xs[-1]])) # Print max guess amax = [] @@ -176,43 +264,91 @@ def train(txt, batch_size=100, sequence_length=150, n_cells=200, n_layers=3, return model -def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3, - learning_rate=0.001, max_iter=5000, gradient_clip=10.0, - init_value=[0], keep_prob=1.0, sampling='prob', temperature=1.0): - +def infer(txt, + ckpt_name, + n_iterations, + n_cells=200, + n_layers=3, + learning_rate=0.001, + max_iter=5000, + gradient_clip=10.0, + init_value=[0], + keep_prob=1.0, + sampling='prob', + temperature=1.0): + """infer + + Parameters + ---------- + txt : TYPE + Description + ckpt_name : TYPE + Description + n_iterations : TYPE + Description + n_cells : int, optional + Description + n_layers : int, optional + Description + learning_rate : float, optional + Description + max_iter : int, optional + Description + gradient_clip : float, optional + Description + init_value : list, optional + Description + keep_prob : float, optional + Description + sampling : str, optional + Description + temperature : float, optional + Description + + Returns + ------- + TYPE + Description + """ g = tf.Graph() with tf.Session(graph=g) as sess: sequence_length = len(init_value) - model = build_model(txt=txt, - batch_size=1, - sequence_length=sequence_length, - n_layers=n_layers, - n_cells=n_cells, - gradient_clip=gradient_clip, - learning_rate=learning_rate) - - init_op = tf.global_variables_initializer() + model = build_model( + txt=txt, + batch_size=1, + sequence_length=sequence_length, + n_layers=n_layers, + n_cells=n_cells, + gradient_clip=gradient_clip, + learning_rate=learning_rate) + + init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) saver = tf.train.Saver() sess.run(init_op) - if os.path.exists(ckpt_name + '.index') or os.path.exists(ckpt_name): + if os.path.exists(ckpt_name): saver.restore(sess, ckpt_name) print("Model restored.") state = [] synth = [init_value] for s_i in model['final_state']: - state += sess.run([s_i.c, s_i.h], feed_dict={ - model['X']: [synth[-1]], model['keep_prob']: keep_prob}) + state += sess.run( + [s_i.c, s_i.h], + feed_dict={ + model['X']: [synth[-1]], + model['keep_prob']: keep_prob + }) for i in range(n_iterations): # print('iteration: {}/{}'.format(i, n_iterations), end='\r') - feed_dict = {model['X']: [synth[-1]], - model['keep_prob']: keep_prob} + feed_dict = {model['X']: [synth[-1]], model['keep_prob']: keep_prob} state_updates = [] for state_i in range(n_layers): feed_dict[model['initial_state'][state_i].c] = \ state[state_i * 2] - feed_dict[model['initial_state'][state_i].h] = state[state_i * 2 + 1] + feed_dict[model['initial_state'][state_i].h] = state[state_i * 2 + + 1] state_updates.append(model['final_state'][state_i].c) state_updates.append(model['final_state'][state_i].h) p = sess.run(model['probs'], feed_dict=feed_dict)[0] @@ -225,8 +361,9 @@ def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3, p = np.random.multinomial(1, p.ravel()) p = np.argmax(p) # Get the current state - state = [sess.run(s_i, feed_dict=feed_dict) - for s_i in state_updates] + state = [ + sess.run(s_i, feed_dict=feed_dict) for s_i in state_updates + ] synth.append([p]) print(model['decoder'][p], end='') sys.stdout.flush() @@ -238,27 +375,60 @@ def infer(txt, ckpt_name, n_iterations, n_cells=200, n_layers=3, def test_alice(max_iter=5): + """Summary + + Parameters + ---------- + max_iter : int, optional + Description + + Returns + ------- + TYPE + Description + """ + utils.download('https://s3.amazonaws.com/cadl/models/alice.txt.gz') with gzip.open('alice.txt.gz', 'rb') as fp: txt = fp.read().decode('utf-8') - train(txt, n_layers=2, n_cells=20, max_iter=max_iter) + return train(txt, n_layers=2, n_cells=20, max_iter=max_iter) def test_trump(max_iter=100): + """Summary + + Parameters + ---------- + max_iter : int, optional + Description + """ + utils.download( + 'https://s3.amazonaws.com/cadl/models/trump.ckpt.data-00000-of-00001') + utils.download('https://s3.amazonaws.com/cadl/models/trump.ckpt.meta') + utils.download('https://s3.amazonaws.com/cadl/models/trump.ckpt.index') + utils.download('https://s3.amazonaws.com/cadl/models/trump.txt') with open('trump.txt', 'r') as fp: txt = fp.read() - # train(txt, ckpt_name='trump.ckpt', max_iter=max_iter) + #train(txt, ckpt_name='trump', max_iter=max_iter) print(infer(txt, ckpt_name='./trump.ckpt', n_iterations=max_iter)) def test_wtc(): + """Summary + """ from scipy.io.wavfile import write, read rate, aud = read('wtc.wav') txt = np.int8(np.round(aud / 16384.0 * 128.0)) txt = np.squeeze(txt).tolist() # try with more than 100 iterations, e.g. 50k - 200k train(txt, sequence_length=250, n_layers=3, n_cells=512, max_iter=100) - synthesis = infer(txt, './model.ckpt', 8000 * 30, n_layers=3, - n_cells=150, keep_prob=1.0, sampling='prob') + synthesis = infer( + txt, + './model.ckpt', + 8000 * 30, + n_layers=3, + n_cells=150, + keep_prob=1.0, + sampling='prob') snd = np.int16(np.array(synthesis) / 128.0 * 16384.0) write('wtc-synth.wav', 8000, snd) diff --git a/session-5/libs/vaegan.py b/session-5/libs/vaegan.py index ca88b1cc..7434a140 100644 --- a/session-5/libs/vaegan.py +++ b/session-5/libs/vaegan.py @@ -1,19 +1,37 @@ + """Convolutional/Variational autoencoder, including demonstration of training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues" using an image pipeline. +""" +""" +Copyright 2017 Parag K. Mital. See also NOTICE.md. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -Parag K. Mital, Jan 2016 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. """ import tensorflow as tf import numpy as np import os from libs.dataset_utils import create_input_pipeline from libs.datasets import CELEB -from libs.utils import * +from libs import utils -def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], - convolutional=False, activation=tf.nn.relu, +def encoder(x, + n_hidden=None, + dimensions=[], + filter_sizes=[], + convolutional=False, + activation=tf.nn.relu, output_activation=tf.nn.sigmoid): """Summary @@ -40,11 +58,9 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], Description """ if convolutional: - x_tensor = to_tensor(x) + x_tensor = utils.to_tensor(x) else: - x_tensor = tf.reshape( - tensor=x, - shape=[-1, dimensions[0]]) + x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]]) dimensions = dimensions[1:] current_input = x_tensor @@ -55,16 +71,14 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], with tf.variable_scope(str(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: - h, W = conv2d( + h, W = utils.conv2d( x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], padding='SAME') else: - h, W = linear( - x=current_input, - n_output=n_output) + h, W = utils.linear(x=current_input, n_output=n_output) h = activation(h) Ws.append(W) hs.append(h) @@ -74,11 +88,11 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], shapes.append(h.get_shape().as_list()) with tf.variable_scope('flatten'): - flattened = flatten(current_input) + flattened = utils.flatten(current_input) with tf.variable_scope('hidden'): if n_hidden: - h, W = linear(flattened, n_hidden, name='linear') + h, W = utils.linear(flattened, n_hidden, name='linear') h = activation(h) else: h = flattened @@ -86,9 +100,13 @@ def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes} -def decoder(z, shapes, n_hidden=None, - dimensions=[], filter_sizes=[], - convolutional=False, activation=tf.nn.relu, +def decoder(z, + shapes, + n_hidden=None, + dimensions=[], + filter_sizes=[], + convolutional=False, + activation=tf.nn.relu, output_activation=tf.nn.relu): """Summary @@ -118,7 +136,7 @@ def decoder(z, shapes, n_hidden=None, """ with tf.variable_scope('hidden/1'): if n_hidden: - h = linear(z, n_hidden, name='linear')[0] + h = utils.linear(z, n_hidden, name='linear')[0] h = activation(h) else: h = z @@ -126,12 +144,13 @@ def decoder(z, shapes, n_hidden=None, with tf.variable_scope('hidden/2'): dims = shapes[0] size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] - h = linear(h, size, name='linear')[0] + h = utils.linear(h, size, name='linear')[0] current_input = activation(h) if convolutional: current_input = tf.reshape( current_input, - tf.stack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]])) + tf.stack( + [tf.shape(current_input)[0], dims[1], dims[2], dims[3]])) Ws = [] hs = [] @@ -139,16 +158,16 @@ def decoder(z, shapes, n_hidden=None, with tf.variable_scope('decoder/{}'.format(layer_i)): if convolutional: shape = shapes[layer_i + 1] - h, W = deconv2d(x=current_input, - n_output_h=shape[1], - n_output_w=shape[2], - n_output_ch=shape[3], - n_input_ch=shapes[layer_i][3], - k_h=filter_sizes[layer_i], - k_w=filter_sizes[layer_i]) + h, W = utils.deconv2d( + x=current_input, + n_output_h=shape[1], + n_output_w=shape[2], + n_output_ch=shape[3], + n_input_ch=shapes[layer_i][3], + k_h=filter_sizes[layer_i], + k_w=filter_sizes[layer_i]) else: - h, W = linear(x=current_input, - n_output=n_output) + h, W = utils.linear(x=current_input, n_output=n_output) if (layer_i + 1) < len(dimensions): h = activation(h) else: @@ -176,8 +195,8 @@ def variational_bayes(h, n_code): name : TYPE Description """ - z_mu = tf.nn.tanh(linear(h, n_code, name='mu')[0]) - z_log_sigma = 0.5 * tf.nn.tanh(linear(h, n_code, name='log_sigma')[0]) + z_mu = tf.nn.tanh(utils.linear(h, n_code, name='mu')[0]) + z_log_sigma = 0.5 * tf.nn.tanh(utils.linear(h, n_code, name='log_sigma')[0]) # Sample from noise distribution p(eps) ~ N(0, 1) epsilon = tf.random_normal(tf.stack([tf.shape(h)[0], n_code])) @@ -187,13 +206,13 @@ def variational_bayes(h, n_code): # -log(p(z)/q(z|x)), bits by coding. # variational bound coding costs kl(p(z|x)||q(z|x)) # d_kl(q(z|x)||p(z)) - loss_z = -0.5 * tf.reduce_sum( - 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), - 1) + loss_z = -0.5 * tf.reduce_sum(1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - + tf.exp(2.0 * z_log_sigma), 1) return z, z_mu, z_log_sigma, loss_z -def discriminator(x, convolutional=True, +def discriminator(x, + convolutional=True, filter_sizes=[5, 5, 5, 5], activation=tf.nn.relu, n_filters=[100, 100, 100, 100]): @@ -207,6 +226,8 @@ def discriminator(x, convolutional=True, Description filter_sizes : list, optional Description + activation : TYPE, optional + Description n_filters : list, optional Description @@ -215,19 +236,24 @@ def discriminator(x, convolutional=True, name : TYPE Description """ - encoding = encoder(x=x, - convolutional=convolutional, - dimensions=n_filters, - filter_sizes=filter_sizes, - activation=activation) + encoding = encoder( + x=x, + convolutional=convolutional, + dimensions=n_filters, + filter_sizes=filter_sizes, + activation=activation) # flatten, then linear to 1 value - res = flatten(encoding['z'], name='flatten') + res = utils.flatten(encoding['z'], name='flatten') if res.get_shape().as_list()[-1] > 1: - res = linear(res, 1)[0] + res = utils.linear(res, 1)[0] - return {'logits': res, 'probs': tf.nn.sigmoid(res), - 'Ws': encoding['Ws'], 'hs': encoding['hs']} + return { + 'logits': res, + 'probs': tf.nn.sigmoid(res), + 'Ws': encoding['Ws'], + 'hs': encoding['hs'] + } def VAE(input_shape=[None, 784], @@ -268,12 +294,13 @@ def VAE(input_shape=[None, 784], x = tf.placeholder(tf.float32, input_shape, 'x') with tf.variable_scope('encoder'): - encoding = encoder(x=x, - n_hidden=n_hidden, - convolutional=convolutional, - dimensions=n_filters, - filter_sizes=filter_sizes, - activation=activation) + encoding = encoder( + x=x, + n_hidden=n_hidden, + convolutional=convolutional, + dimensions=n_filters, + filter_sizes=filter_sizes, + activation=activation) if variational: with tf.variable_scope('variational'): @@ -290,23 +317,30 @@ def VAE(input_shape=[None, 784], n_filters += [input_shape[-1]] with tf.variable_scope('generator'): - decoding = decoder(z=z, - shapes=shapes, - n_hidden=n_hidden, - dimensions=n_filters, - filter_sizes=filter_sizes, - convolutional=convolutional, - activation=activation) + decoding = decoder( + z=z, + shapes=shapes, + n_hidden=n_hidden, + dimensions=n_filters, + filter_sizes=filter_sizes, + convolutional=convolutional, + activation=activation) x_tilde = decoding['x_tilde'] - x_flat = flatten(x) - x_tilde_flat = flatten(x_tilde) + x_flat = utils.flatten(x) + x_tilde_flat = utils.flatten(x_tilde) # -log(p(x|z)) loss_x = tf.reduce_sum(tf.squared_difference(x_flat, x_tilde_flat), 1) - return {'loss_x': loss_x, 'loss_z': loss_z, 'x': x, 'z': z, - 'Ws': encoding['Ws'], 'hs': decoding['hs'], - 'x_tilde': x_tilde} + return { + 'loss_x': loss_x, + 'loss_z': loss_z, + 'x': x, + 'z': z, + 'Ws': encoding['Ws'], + 'hs': decoding['hs'], + 'x_tilde': x_tilde + } def VAEGAN(input_shape=[None, 784], @@ -348,12 +382,13 @@ def VAEGAN(input_shape=[None, 784], z_samp = tf.placeholder(tf.float32, [None, n_code], 'z_samp') with tf.variable_scope('encoder'): - encoding = encoder(x=x, - n_hidden=n_hidden, - convolutional=convolutional, - dimensions=n_filters, - filter_sizes=filter_sizes, - activation=activation) + encoding = encoder( + x=x, + n_hidden=n_hidden, + convolutional=convolutional, + dimensions=n_filters, + filter_sizes=filter_sizes, + activation=activation) with tf.variable_scope('variational'): z, z_mu, z_log_sigma, loss_z = variational_bayes( @@ -366,40 +401,45 @@ def VAEGAN(input_shape=[None, 784], n_filters_decoder += [input_shape[-1]] with tf.variable_scope('generator'): - decoding_actual = decoder(z=z, - shapes=shapes, - n_hidden=n_hidden, - convolutional=convolutional, - dimensions=n_filters_decoder, - filter_sizes=filter_sizes, - activation=activation) + decoding_actual = decoder( + z=z, + shapes=shapes, + n_hidden=n_hidden, + convolutional=convolutional, + dimensions=n_filters_decoder, + filter_sizes=filter_sizes, + activation=activation) with tf.variable_scope('generator', reuse=True): - decoding_sampled = decoder(z=z_samp, - shapes=shapes, - n_hidden=n_hidden, - convolutional=convolutional, - dimensions=n_filters_decoder, - filter_sizes=filter_sizes, - activation=activation) + decoding_sampled = decoder( + z=z_samp, + shapes=shapes, + n_hidden=n_hidden, + convolutional=convolutional, + dimensions=n_filters_decoder, + filter_sizes=filter_sizes, + activation=activation) with tf.variable_scope('discriminator'): - D_real = discriminator(x, - filter_sizes=filter_sizes, - n_filters=n_filters, - activation=activation) + D_real = discriminator( + x, + filter_sizes=filter_sizes, + n_filters=n_filters, + activation=activation) with tf.variable_scope('discriminator', reuse=True): - D_fake = discriminator(decoding_actual['x_tilde'], - filter_sizes=filter_sizes, - n_filters=n_filters, - activation=activation) + D_fake = discriminator( + decoding_actual['x_tilde'], + filter_sizes=filter_sizes, + n_filters=n_filters, + activation=activation) with tf.variable_scope('discriminator', reuse=True): - D_samp = discriminator(decoding_sampled['x_tilde'], - filter_sizes=filter_sizes, - n_filters=n_filters, - activation=activation) + D_samp = discriminator( + decoding_sampled['x_tilde'], + filter_sizes=filter_sizes, + n_filters=n_filters, + activation=activation) with tf.variable_scope('loss'): # Weights influence of content/style of decoder @@ -408,9 +448,8 @@ def VAEGAN(input_shape=[None, 784], # Discriminator_l Log Likelihood Loss loss_D_llike = 0 for h_fake, h_real in zip(D_fake['hs'][3:], D_real['hs'][3:]): - loss_D_llike += tf.reduce_sum( - 0.5 * tf.squared_difference( - flatten(h_fake), flatten(h_real)), 1) + loss_D_llike += tf.reduce_sum(0.5 * tf.squared_difference( + utils.flatten(h_fake), utils.flatten(h_real)), 1) # GAN Loss eps = 1e-12 @@ -424,12 +463,22 @@ def VAEGAN(input_shape=[None, 784], loss_gen = tf.reduce_mean(gamma * loss_D_llike - loss_GAN) loss_dis = -tf.reduce_mean(loss_GAN) - return {'x': x, 'z': z, 'x_tilde': decoding_actual['x_tilde'], - 'z_samp': z_samp, 'x_tilde_samp': decoding_sampled['x_tilde'], - 'loss_real': loss_real, 'loss_fake': loss_fake, 'loss_samp': loss_samp, - 'loss_GAN': loss_GAN, 'loss_D_llike': loss_D_llike, - 'loss_enc': loss_enc, 'loss_gen': loss_gen, 'loss_dis': loss_dis, - 'gamma': gamma} + return { + 'x': x, + 'z': z, + 'x_tilde': decoding_actual['x_tilde'], + 'z_samp': z_samp, + 'x_tilde_samp': decoding_sampled['x_tilde'], + 'loss_real': loss_real, + 'loss_fake': loss_fake, + 'loss_samp': loss_samp, + 'loss_GAN': loss_GAN, + 'loss_D_llike': loss_D_llike, + 'loss_enc': loss_enc, + 'loss_gen': loss_gen, + 'loss_dis': loss_dis, + 'gamma': gamma + } def train_vaegan(files, @@ -485,20 +534,21 @@ def train_vaegan(files, ckpt_name : str, optional Description - Returns - ------- + No Longer Returned + ------------------ name : TYPE Description """ - ae = VAEGAN(input_shape=[None] + crop_shape, - convolutional=convolutional, - variational=variational, - n_filters=n_filters, - n_hidden=n_hidden, - n_code=n_code, - filter_sizes=filter_sizes, - activation=activation) + ae = VAEGAN( + input_shape=[None] + crop_shape, + convolutional=convolutional, + variational=variational, + n_filters=n_filters, + n_hidden=n_hidden, + n_code=n_code, + filter_sizes=filter_sizes, + activation=activation) batch = create_input_pipeline( files=files, @@ -509,29 +559,34 @@ def train_vaegan(files, shape=input_shape) zs = np.random.randn(4, n_code).astype(np.float32) - zs = make_latent_manifold(zs, n_examples) + zs = utils.make_latent_manifold(zs, n_examples) - opt_enc = tf.train.AdamOptimizer( - learning_rate=learning_rate).minimize( + opt_enc = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( ae['loss_enc'], - var_list=[var_i for var_i in tf.trainable_variables() - if var_i.name.startswith('encoder')]) + var_list=[ + var_i for var_i in tf.trainable_variables() + if var_i.name.startswith('encoder') + ]) - opt_gen = tf.train.AdamOptimizer( - learning_rate=learning_rate).minimize( + opt_gen = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( ae['loss_gen'], - var_list=[var_i for var_i in tf.trainable_variables() - if var_i.name.startswith('generator')]) + var_list=[ + var_i for var_i in tf.trainable_variables() + if var_i.name.startswith('generator') + ]) - opt_dis = tf.train.AdamOptimizer( - learning_rate=learning_rate).minimize( + opt_dis = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( ae['loss_dis'], - var_list=[var_i for var_i in tf.trainable_variables() - if var_i.name.startswith('discriminator')]) + var_list=[ + var_i for var_i in tf.trainable_variables() + if var_i.name.startswith('discriminator') + ]) sess = tf.Session() saver = tf.train.Saver() - sess.run(tf.global_variables_initializer()) + init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) + sess.run(init_op) coord = tf.train.Coordinator() tf.get_default_graph().finalize() threads = tf.train.start_queue_runners(sess=sess, coord=coord) @@ -549,7 +604,7 @@ def train_vaegan(files, n_files = len(files) test_xs = sess.run(batch) / 255.0 - montage(test_xs, 'test_xs.png') + utils.montage(test_xs, 'test_xs.png') try: while not coord.should_stop() and epoch_i < n_epochs: if batch_i % (n_files // batch_size) == 0: @@ -560,11 +615,10 @@ def train_vaegan(files, batch_i += 1 batch_xs = sess.run(batch) / 255.0 batch_zs = np.random.randn(batch_size, n_code).astype(np.float32) - real_cost, fake_cost, _ = sess.run([ - ae['loss_real'], ae['loss_fake'], opt_enc], - feed_dict={ - ae['x']: batch_xs, - ae['gamma']: 0.5}) + real_cost, fake_cost, _ = sess.run( + [ae['loss_real'], ae['loss_fake'], opt_enc], + feed_dict={ae['x']: batch_xs, + ae['gamma']: 0.5}) real_cost = -np.mean(real_cost) fake_cost = -np.mean(fake_cost) print('real:', real_cost, '/ fake:', fake_cost) @@ -585,42 +639,48 @@ def train_vaegan(files, dis_update = True if gen_update: - sess.run(opt_gen, feed_dict={ - ae['x']: batch_xs, - ae['z_samp']: batch_zs, - ae['gamma']: 0.5}) + sess.run( + opt_gen, + feed_dict={ + ae['x']: batch_xs, + ae['z_samp']: batch_zs, + ae['gamma']: 0.5 + }) if dis_update: - sess.run(opt_dis, feed_dict={ - ae['x']: batch_xs, - ae['z_samp']: batch_zs, - ae['gamma']: 0.5}) + sess.run( + opt_dis, + feed_dict={ + ae['x']: batch_xs, + ae['z_samp']: batch_zs, + ae['gamma']: 0.5 + }) if batch_i % 50 == 0: # Plot example reconstructions from latent layer - recon = sess.run( - ae['x_tilde'], feed_dict={ - ae['z']: zs}) + recon = sess.run(ae['x_tilde'], feed_dict={ae['z']: zs}) print('recon:', recon.min(), recon.max()) recon = np.clip(recon / recon.max(), 0, 1) - montage(recon.reshape([-1] + crop_shape), - 'imgs/manifold_%08d.png' % t_i) + utils.montage( + recon.reshape([-1] + crop_shape), + 'imgs/manifold_%08d.png' % t_i) # Plot example reconstructions - recon = sess.run( - ae['x_tilde'], feed_dict={ - ae['x']: test_xs}) + recon = sess.run(ae['x_tilde'], feed_dict={ae['x']: test_xs}) print('recon:', recon.min(), recon.max()) recon = np.clip(recon / recon.max(), 0, 1) - montage(recon.reshape([-1] + crop_shape), - 'imgs/reconstruction_%08d.png' % t_i) + utils.montage( + recon.reshape([-1] + crop_shape), + 'imgs/reconstruction_%08d.png' % t_i) t_i += 1 if batch_i % 100 == 0: # Save the variables to disk. - save_path = saver.save(sess, ckpt_name, - global_step=batch_i, - write_meta_graph=False) + save_path = saver.save( + sess, + ckpt_name, + global_step=batch_i, + write_meta_graph=False) print("Model saved in file: %s" % save_path) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') @@ -636,19 +696,26 @@ def train_vaegan(files, sess.close() -def test_celeb(n_epochs=100, crop_shape=[100, 100, 3], - n_filters=[100, 100, 100, 100], filter_sizes=[3, 3, 3, 3]): +def test_celeb(n_epochs=100, + filter_sizes=[3, 3, 3, 3], + n_filters=[100, 100, 100, 100], + crop_shape=[100, 100, 3]): """Summary - Returns - ------- + Parameters + ---------- + n_epochs : int, optional + Description + + No Longer Returned + ------------------ name : TYPE Description """ files = CELEB() train_vaegan( files=files, - batch_size=100, + batch_size=64, n_epochs=n_epochs, crop_shape=crop_shape, crop_factor=0.8, @@ -666,13 +733,19 @@ def test_celeb(n_epochs=100, crop_shape=[100, 100, 3], def test_sita(n_epochs=100): """Summary - Returns - ------- + Parameters + ---------- + n_epochs : int, optional + Description + + No Longer Returned + ------------------ name : TYPE Description """ if not os.path.exists('sita'): - os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi') + os.system( + 'wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi') os.mkdir('sita') os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' + ' image2 -s 160x90 sita/sita-%08d.jpg')