In [18]:
import os
from tqdm import tqdm
from scipy import misc
import cv2
directory = '/home/dlrc/datasets/training_data/legos'
def img_paths(directory):
    return [os.path.join(root, f) for root, _, files in os.walk(directory)
            for f in files if f != "target.png"]

paths = img_paths(directory)
images = []
for img_path in tqdm(paths):
  img = misc.imread(img_path, mode='RGB')
  img = cv2.resize(img, (32,32))
  images.append(img)
images = np.asarray(images)


100%|██████████| 13600/13600 [00:05<00:00, 2557.57it/s]


In [19]:
images.shape

(13600, 32, 32, 3)

In [26]:
#!/usr/bin/env python
"""Variational auto-encoder for MNIST data.
References
----------
http://edwardlib.org/tutorials/decoder
http://edwardlib.org/tutorials/inference-networks
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import numpy as np
import os
import tensorflow as tf

from edward.models import Bernoulli, Normal
from edward.util import Progbar
from keras.layers import Dense, Conv2D, Flatten
from scipy.misc import imsave


def generator(array, batch_size):
  """Generate batch with respect to array's first axis."""
  start = 0  # pointer to where we are in iteration
  while True:
    stop = start + batch_size
    diff = stop - array.shape[0]
    if diff <= 0:
      batch = array[start:stop]
      start += batch_size
    else:
      batch = np.concatenate((array[start:], array[:diff]))
      start = diff
    batch = batch.astype(np.float32) / 255.0  # normalize pixel intensities
    yield batch


data_dir = "/tmp/data"
out_dir = "/tmp/out"
if not os.path.exists(out_dir):
  os.makedirs(out_dir)
M = 32  # batch size during training
d = 6  # latent dimension

# DATA. MNIST batches are fed at training time.
x_train = images[:-500]
x_test = images[-500:-1]

x_train_generator = generator(x_train, M)

print("Training data shape: ", x_train.shape)


# MODEL
# Define a subgraph of the full model, corresponding to a minibatch of
# size M.
z = Normal(loc=tf.zeros([M, d]), scale=tf.ones([M, d]))
hidden = Dense(256, activation='relu')(z.value())
x = Bernoulli(logits=Dense(32 * 32*3)(hidden))

# INFERENCE
# Define a subgraph of the variational model, corresponding to a
# minibatch of size M.
x_ph = tf.placeholder(tf.float32, [M, 32, 32,3])

conv1 = Conv2D(64, 3, strides=2)(x_ph)
flat1 = Flatten()(conv1)
hidden = Dense(256, activation='relu')(tf.cast(flat1, tf.float32))
qz = Normal(loc=Dense(d)(hidden),
            scale=Dense(d, activation='softplus')(hidden))

# Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x.
inference = ed.KLqp({z: qz}, data={x: flat1})
optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer)

tf.global_variables_initializer().run()

n_epoch = 100
n_iter_per_epoch = x_train.shape[0] // M
for epoch in range(1, n_epoch + 1):
  print("Epoch: {0}".format(epoch))
  avg_loss = 0.0

  pbar = Progbar(n_iter_per_epoch)
  for t in range(1, n_iter_per_epoch + 1):
    pbar.update(t)
    x_batch = next(x_train_generator)
    info_dict = inference.update(feed_dict={x_ph: x_batch})
    avg_loss += info_dict['loss']

  # Print a lower bound to the average marginal likelihood for an
  # image.
  avg_loss = avg_loss / n_iter_per_epoch
  avg_loss = avg_loss / M
  print("-log p(x) <= {:0.3f}".format(avg_loss))

  # Prior predictive check.
  images = x.eval()
  for m in range(M):
    imsave(os.path.join(out_dir, '%d.png') % m, images[m].reshape(32, 32))

Training data shape:  (13100, 32, 32, 3)


TypeError: Key-value pair in data does not have same dtype: <dtype: 'int32'>, <dtype: 'float32'>