In [1]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.contrib.slim import fully_connected as fc
import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
import os, time, itertools, pickle, random, glob, imageio
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import matplotlib.pyplot as plt
import math

In [3]:
def dataset_load_album(datapath, labelpath, imgsize):
    """
    datapath 'jpeg' file of album image
    labelpath 'png' file of spectrogram 
    """
    datalist = os.listdir(datapath)
    datasize = len(datalist)
    
    data_ = np.zeros((datasize, imgsize, imgsize, 3))
    label_ = np.zeros((datasize, imgsize, imgsize, 3))
    nonelist = []
    for i, fname in enumerate(datalist):
        if glob.glob(os.path.join(labelpath, fname[:-4] + '*')):
        #f os.path.isfile(os.path.join(labelpath, fname[:-4] + 'png')):
            #print('there is file')
            img_d = Image.open(os.path.join(datapath, fname)
                              ).resize((imgsize, imgsize))
            img_l = Image.open(os.path.join(labelpath, fname[:-4]+'png')
                              ).convert('RGB').resize((imgsize, imgsize))
            data_[i] = np.asarray(img_d)
            label_[i] = np.asarray(img_l)
            img_d.close()
            img_l.close()
            
            
        else:
            nonelist.append(i)
            
    data = np.delete(data_, nonelist, 0)
    label = np.delete(label_, nonelist, 0)
    return data, label

In [4]:
# Load albumdata

img_size = 128

datapath = os.path.join('input', 'img')
audiopath = os.path.join('audio','spec')

samples, labels = dataset_load_album(
    datapath, audiopath, img_size)

samples = samples/255
labels = labels/255

input_dim = img_size * img_size * 3
num_sample = samples.shape[0]

In [21]:
class VariantionalAutoencoder(object):

    def __init__(self, sess, input_dim =input_dim, learning_rate=1e-4, batch_size=64, n_z=16):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_z = n_z
        self.input_dim = input_dim

        self.sess = sess
        self.build()
        



    # Build the netowrk and the loss functions
    def build(self):
        
        self.x = tf.placeholder(
            name='x', dtype=tf.float32, shape=[None, self.input_dim])

        # Encode
        # x -> z_mean, z_sigma -> z
        f1 = fc(self.x, 2048, scope='enc_fc1', activation_fn=tf.nn.relu)
        f2 = fc(f1, 1024, scope='enc_fc2', activation_fn=tf.nn.relu)
        f3 = fc(f2, 512, scope='enc_fc3', activation_fn=tf.nn.relu)
        f4 = fc(f3, 256, scope='enc_fc4', activation_fn=tf.nn.relu)
        f5 = fc(f4, 128, scope='enc_fc5', activation_fn=tf.nn.relu)
        f6 = fc(f5, 64, scope='enc_fc6', activation_fn=tf.nn.relu)
        self.z_mu = fc(f6, self.n_z, scope='enc_fc7_mu', 
                       activation_fn=None)
        self.z_log_sigma_sq = fc(f6, self.n_z, scope='enc_fc7_sigma', 
                                 activation_fn=None)
        eps = tf.random_normal(
            shape=tf.shape(self.z_log_sigma_sq),
            mean=0, stddev=1, dtype=tf.float32)
        self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps

        # Decode
        # z -> x_hat
        g1 = fc(self.z, 64, scope='dec_fc1', activation_fn=tf.nn.relu)
        g2 = fc(g1, 128, scope='dec_fc2', activation_fn=tf.nn.relu)
        g3 = fc(g2, 256, scope='dec_fc3', activation_fn=tf.nn.relu)
        g4 = fc(g3, 1024, scope='dec_fc4', activation_fn=tf.nn.relu)
        g5 = fc(g4, 1024, scope='dec_fc5', activation_fn=tf.nn.relu)
        g6 = fc(g5, 2048, scope='dec_fc6', activation_fn=tf.nn.relu)
        self.x_hat = fc(g6, self.input_dim, scope='dec_fc7', 
                        activation_fn=tf.sigmoid)

        # Loss
        # Reconstruction loss
        # Minimize the cross-entropy loss
        # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat)
        epsilon = 1e-10
        recon_loss = -tf.reduce_sum(
            self.x * tf.log(epsilon+self.x_hat) + 
            (1-self.x) * tf.log(epsilon+1-self.x_hat), 
            axis=1
        )
        self.recon_loss = tf.reduce_mean(recon_loss)

        # Latent loss
        # KL divergence: measure the difference between two distributions
        # Here we measure the divergence between 
        # the latent distribution and N(0, 1)
        latent_loss = -0.5 * tf.reduce_sum(
            1 + self.z_log_sigma_sq - tf.square(self.z_mu) - 
            tf.exp(self.z_log_sigma_sq), axis=1)
        self.latent_loss = tf.reduce_mean(latent_loss)

        self.total_loss = self.recon_loss + self.latent_loss
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.total_loss)
        
        self.losses = {
            'recon_loss': self.recon_loss,
            'latent_loss': self.latent_loss,
            'total_loss': self.total_loss,
        }        
        return

    # Execute the forward and the backward pass
    def run_single_step(self, x):
        _, losses = self.sess.run(
            [self.train_op, self.losses],
            feed_dict={self.x: x}
        )
        return losses

    # x -> x_hat
    def reconstructor(self, x, sess):
        x_hat = self.sess.run(self.x_hat, feed_dict={self.x: x})
        return x_hat

    # z -> x
    def generator(self, z):
        x_hat = self.sess.run(self.x_hat, feed_dict={self.z: z})
        return x_hat
    
    # x -> z
    def transformer(self, x):
        z = self.sess.run(self.z, feed_dict={self.x: x})
        return z

In [22]:
def trainer_album(model_object, sess, sample, input_dim =input_dim, learning_rate=1e-4, 
            batch_size=16, num_epoch=100, n_z=16, log_step=5,
                 num_sample = num_sample):
    sess.run(tf.global_variables_initializer())
    model = model_object(sess,
        learning_rate=learning_rate, batch_size=batch_size, n_z=n_z,
    input_dim =input_dim)
    
    
    step = 0

    for epoch in range(num_epoch):
        start_time = time.time()
        for iter in range(num_sample // batch_size):
            step += 1
            # Get a batch
            batch = sample[iter * batch_size : (iter + 1) * batch_size]
            # Execute the forward and backward pass 
            # Report computed losses
            #print('batch',batch)
            losses = model.run_single_step(batch)
        end_time = time.time()
        
        if epoch % log_step == 0:
            log_str = '[Epoch {}] '.format(epoch)
            for k, v in losses.items():
                log_str += '{}: {:.3f}  '.format(k, v)
            log_str += '({:.3f} sec/epoch)'.format(end_time - start_time)
            print(log_str)
            
    print('Done!')
    return model

In [23]:
tf.reset_default_graph()

with tf.Session(
    config=tf.ConfigProto(
        allow_soft_placement=True,log_device_placement=True)) as sess:
    with tf.device('/cpu:0'):
    #with tf.device('/GPU:0'): 
        sess.run(tf.global_variables_initializer())    
        model_2d_vae = trainer_album(VariantionalAutoencoder, sess,
                                 samples.reshape(-1,input_dim),
                                 input_dim =input_dim,
                                 batch_size=32, n_z=2, num_sample=samples.shape[0])
        saver = tf.train.Saver()
        saver.save(sess, "model.ckpt")

FailedPreconditionError: Attempting to use uninitialized value enc_fc7_sigma/biases
	 [[node enc_fc7_sigma/biases/read (defined at <ipython-input-21-7875f09ee181>:32) ]]

Caused by op 'enc_fc7_sigma/biases/read', defined at:
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\asyncio\base_events.py", line 539, in run_forever
    self._run_once()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\asyncio\base_events.py", line 1775, in _run_once
    handle._run()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-4f600888d6f5>", line 12, in <module>
    batch_size=32, n_z=2, num_sample=samples.shape[0])
  File "<ipython-input-22-3ec0aba4e80e>", line 7, in trainer_album
    input_dim =input_dim)
  File "<ipython-input-21-7875f09ee181>", line 10, in __init__
    self.build()
  File "<ipython-input-21-7875f09ee181>", line 32, in build
    activation_fn=None)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\framework\python\ops\arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\layers\python\layers\layers.py", line 1855, in fully_connected
    outputs = layer.apply(inputs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1227, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\layers\base.py", line 530, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 538, in __call__
    self._maybe_build(inputs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1603, in _maybe_build
    self.build(input_shapes)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\keras\layers\core.py", line 958, in build
    trainable=True)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\layers\base.py", line 435, in add_weight
    getter=vs.get_variable)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 349, in add_weight
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\training\checkpointable\base.py", line 607, in _add_variable_with_custom_getter
    **kwargs_for_getter)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1479, in get_variable
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1220, in get_variable
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 530, in get_variable
    return custom_getter(**custom_getter_kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\layers\python\layers\layers.py", line 1750, in layer_variable_getter
    return _model_variable_getter(getter, *args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\layers\python\layers\layers.py", line 1741, in _model_variable_getter
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\framework\python\ops\arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\framework\python\ops\variables.py", line 350, in model_variable
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\framework\python\ops\arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\contrib\framework\python\ops\variables.py", line 277, in variable
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 499, in _true_getter
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 911, in _get_single_variable
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 213, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 176, in _variable_v1_call
    aggregation=aggregation)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 155, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2495, in default_variable_creator
    expected_shape=expected_shape, import_scope=import_scope)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 217, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 1395, in __init__
    constraint=constraint)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\variables.py", line 1557, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\util\dispatch.py", line 180, in wrapper
    return target(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\array_ops.py", line 81, in identity
    ret = gen_array_ops.identity(input, name=name)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 3889, in identity
    "Identity", input=input, name=name)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\Takato\Anaconda3\envs\cs599pro\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value enc_fc7_sigma/biases
	 [[node enc_fc7_sigma/biases/read (defined at <ipython-input-21-7875f09ee181>:32) ]]


In [None]:
# Test the trained model: generation
# Sample noise vectors from N(0, 1)
z = np.random.normal(size=[model_2d_vae.batch_size, model_2d_vae.n_z])
x_generated = model_2d_vae.generator(z)

n = np.sqrt(model_2d_vae.batch_size).astype(np.int32)
I_generated = np.empty((img_size*n, img_size*n, 3))
#print(x_generated.shape)
for i in range(n):
    #print(i)
    I_generated[i*img_size:(i+1)*img_size, i*img_size:(i+1)*img_size, :
               ] = x_generated[i].reshape(img_size, img_size, 3)

plt.figure(figsize=(8, 8))
plt.imshow(I_generated)

In [None]:
test_transformation(model_2d_vae, mnist)

In [None]:
# Test the trained model: uniformly samlpe in the latent space
n = 20
x = np.linspace(-2, 2, n)
y = np.linspace(-2, 2, n)

I_latent = np.empty((h*n, w*n))
for i, yi in enumerate(x):
    for j, xi in enumerate(y):
        z = np.array([[xi, yi]]*model_2d_vae.batch_size)
        x_hat = model_2d_vae.generator(z)
        I_latent[(n-i-1)*h:(n-i)*h, j*w:(j+1)*w] = x_hat[0].reshape(h, w)

plt.figure(figsize=(8, 8))        
plt.imshow(I_latent, cmap="gray")

In [None]:
samples.shape

In [None]:
samples.reshape(-1,32*32*3)[0]

In [None]:
x_generated[0].reshape(32,32,3)