In [1]:
import os
import sys
import tensorflow
import numpy as np

import tensorflow as tf

import matplotlib
matplotlib.use('TKAgg')
from matplotlib import pyplot as plt

from tensorflow.examples.tutorials.mnist import input_data

mnist_image_shape = [28, 28, 1]

def load_dataset():
    return input_data.read_data_sets('MNIST_data')

def get_next_batch(dataset, batch_size):
    # dataset should be mnist.(train/val/test)
    batch, _ = dataset.next_batch(batch_size)
    batch_shape = [batch_size] + mnist_image_shape
    return np.reshape(batch, batch_shape)

def visualize(_original, _reconstructions, num_visualize):
    vis_folder = './vis/'
    if not os.path.exists(vis_folder):
          os.makedirs(vis_folder)

    original = _original[:num_visualize]
    reconstructions = _reconstructions[:num_visualize]
    
    count = 1
    for (orig, rec) in zip(original, reconstructions):
        orig = np.reshape(orig, (mnist_image_shape[0],
                                 mnist_image_shape[1]))
        rec = np.reshape(rec, (mnist_image_shape[0],
                               mnist_image_shape[1]))
        f, ax = plt.subplots(1,2)
        ax[0].imshow(orig, cmap='gray')
        ax[1].imshow(rec, cmap='gray')
        plt.savefig(vis_folder + "test_%d.png" % count)
        count += 1


In [2]:
def get_deconv2d_output_dims(input_dims, filter_dims, stride_dims, padding):
    # Returns the height and width of the output of a deconvolution layer.
    batch_size, input_h, input_w, num_channels_in = input_dims
    filter_h, filter_w, num_channels_out  = filter_dims
    stride_h, stride_w = stride_dims

    # Compute the height in the output, based on the padding.
    if padding == 'SAME':
      out_h = input_h * stride_h
    elif padding == 'VALID':
      out_h = (input_h - 1) * stride_h + filter_h

    # Compute the width in the output, based on the padding.
    if padding == 'SAME':
      out_w = input_w * stride_w
    elif padding == 'VALID':
      out_w = (input_w - 1) * stride_w + filter_w

    return [batch_size, out_h, out_w, num_channels_out]

In [3]:
def conv(input, name, filter_dims, stride_dims, padding='SAME',
         non_linear_fn=tf.nn.relu):
    input_dims = input.get_shape().as_list()
    assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in
    assert(len(filter_dims) == 3) # height, width and num_channels out
    assert(len(stride_dims) == 2) # stride height and width

    num_channels_in = input_dims[-1] #input image number/batch size
    filter_h, filter_w, num_channels_out = filter_dims
    stride_h, stride_w = stride_dims

    # Define a variable scope for the conv layer
    with tf.variable_scope(name) as scope:
        # Create filter weight variable
        #tf.Variable(tf.truncated_normal_initializer(
        #[filter_h, filter_w, num_channels_in, num_channels_out], mean=0.01, stddev=0.1))
        kernel = tf.get_variable(
            'conv_kernels', 
            [filter_h, filter_w, num_channels_in, num_channels_out],
            initializer=tf.truncated_normal_initializer(mean=0.01, stddev=0.1))
        # Create bias variable
        bias = tf.get_variable(
            'conv_bias',
            [num_channels_out],
            initializer = tf.zeros_initializer()
        )
        # Define the convolution flow graph
        conv = conv = tf.nn.conv2d(input, filter=kernel, strides=[1, stride_h, stride_w, 1], padding=padding)
        # Add bias to conv output
        conv = tf.nn.bias_add(conv, bias)
        # Apply non-linearity (if asked) and return output
        return non_linear_fn(conv, name=scope.name) if non_linear_fn else conv

def deconv(input, name, filter_dims, stride_dims, padding='SAME',
           non_linear_fn=tf.nn.relu):
    input_dims = input.get_shape().as_list()
    assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in
    assert(len(filter_dims) == 3) # height, width and num_channels out
    assert(len(stride_dims) == 2) # stride height and width

    num_channels_in = input_dims[-1]
    filter_h, filter_w, num_channels_out = filter_dims
    stride_h, stride_w = stride_dims
    # Let's step into this function
    output_dims = get_deconv2d_output_dims(input_dims,
                                           filter_dims,
                                           stride_dims,
                                           padding)

    # Define a variable scope for the deconv layer
    with tf.variable_scope(name) as scope:
        # Create filter weight variable
        # Note that num_channels_out and in positions are flipped for deconv.
        kernel = tf.get_variable(
            'deconv_kernels', 
            [filter_h, filter_w, num_channels_out, num_channels_in],
            initializer=tf.truncated_normal_initializer(mean=0.01, stddev=0.1))
        # Create bias variable
        bias = tf.get_variable(
            'deconv_bias',
            [num_channels_out],
            initializer = tf.zeros_initializer()
        )
        # Define the deconv flow graph
        deconv = tf.nn.conv2d_transpose(input, kernel, output_dims, [1, stride_h, stride_w, 1], padding=padding)
        # Add bias to deconv output
        deconv = tf.nn.bias_add(deconv, bias)
        # Apply non-linearity (if asked) and return output
        return non_linear_fn(deconv, name=scope.name) if non_linear_fn else deconv

def max_pool(input, name, filter_dims, stride_dims, padding='SAME'):
    assert(len(filter_dims) == 2) # filter height and width
    assert(len(stride_dims) == 2) # stride height and width

    filter_h, filter_w = filter_dims
    stride_h, stride_w = stride_dims
    
    # Define the max pool flow graph and return output
    return tf.nn.max_pool(input, [1, filter_h, filter_w, 1], [1, stride_h, stride_w, 1], padding=padding)

def fc(input, name, out_dim, non_linear_fn=tf.nn.relu):
    assert(type(out_dim) == int)

    # Define a variable scope for the FC layer
    with tf.variable_scope(name) as scope:
        input_dims = input.get_shape().as_list()
        # the input to the fc layer should be flattened
        if len(input_dims) == 4:
            # for eg. the output of a conv layer
            batch_size, input_h, input_w, num_channels = input_dims
            # ignore the batch dimension
            in_dim = input_h * input_w * num_channels
            flat_input = tf.reshape(input, [batch_size, in_dim])
        else:
            in_dim = input_dims[-1]
            flat_input = input

        # Create weight variable
        weights = tf.get_variable('weight', [in_dim, out_dim],
                                 initializer=tf.truncated_normal_initializer())
        # Create bias variable
        bias = tf.get_variable(
            'fc_bias',
            [out_dim],
            initializer = tf.zeros_initializer()
        )
        # Define FC flow graph
        output = tf.nn.bias_add(tf.matmul(flat_input, weights), bias)
        # Apply non-linearity (if asked) and return output
        return non_linear_fn(output, name=scope.name) if non_linear_fn else output


In [4]:

def encoder(input): #28 x 28 x 1
    # Create a conv network with 3 conv layers and 1 FC layer
    # Conv 1: filter: [3, 3, 1], stride: [2, 2], relu
    conv1 = conv(input, 'conv1', [3, 3, 1], [2, 2])
    # Conv 2: filter: [3, 3, 8], stride: [2, 2], relu
    conv2 = conv(conv1, 'conv2', [3, 3, 8], [2, 2])
    # Conv 3: filter: [3, 3, 8], stride: [2, 2], relu
    conv3 = conv(conv2, 'conv3', [3, 3, 8], [2, 2])
    # FC: output_dim: 100, no non-linearity
    return fc(conv3, "fc", 100, non_linear_fn=None)

def decoder(input):
    # Create a deconv network with 1 FC layer and 3 deconv layers
    # FC: output dim: 128, relu
    fc_de = fc(input, "decode_fc", 128)
    # Reshape to [batch_size, 4, 4, 8]
    fc_de = tf.reshape(fc_de, [-1, 4, 4, 8] ) #4 x 4 x 8
    # Deconv 1: filter: [3, 3, 8], stride: [2, 2], relu
    deconv1 = deconv(fc_de, "deconv1", [3, 3, 8], [2, 2])
    # Deconv 2: filter: [8, 8, 1], stride: [2, 2], padding: valid, relu
    deconv2 = deconv(deconv1, "deconv2", [8, 8, 1], [2, 2], padding = 'VALID') #22 x 22 x 1
    # Deconv 3: filter: [7, 7, 1], stride: [1, 1], padding: valid, sigmoid
    deconv3 = deconv(deconv2, "deconv3", [7, 7, 1], [1, 1], padding = 'VALID', non_linear_fn = tf.sigmoid)
    return deconv3

def autoencoder(input_shape):
    # Define place holder with input shape
    x = tf.placeholder(tf.float32, input_shape)
    # Define variable scope for autoencoder
    with tf.variable_scope('autoencoder') as scope:
        # Pass input to encoder to obtain encoding
        encoding = encoder(x)
        # Pass encoding into decoder to obtain reconstructed image
        decoding = decoder(encoding)
        # Return input image (placeholder) and reconstructed image
        return x, decoding


In [5]:
batch_size = 100
batch_shape = (batch_size, 28, 28, 1)
num_visualize = 10

lr = 0.01
num_epochs = 50

def calculate_loss(original, reconstructed):
    return tf.div(tf.reduce_sum(tf.square(tf.subtract(reconstructed,
                                                 original))), 
                  tf.constant(float(batch_size)))

def train(dataset):
    input_image, reconstructed_image = autoencoder(batch_shape)
    loss = calculate_loss(input_image, reconstructed_image)
    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)

    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        dataset_size = len(dataset.train.images)
        print ("Dataset size:", dataset_size)
        num_iters = (num_epochs * dataset_size)//batch_size
        print ("Num iters:", num_iters)
        for step in range(num_iters):
            input_batch  = get_next_batch(dataset.train, batch_size)
            loss_val,  _ = session.run([loss, optimizer], 
                                       feed_dict={input_image: input_batch})
            if step % 1000 == 0:
                print ("Loss at step", step, ":", loss_val)

        test_batch = get_next_batch(dataset.test, batch_size)
        reconstruction = session.run(reconstructed_image,
                                     feed_dict={input_image: test_batch})
        visualize(test_batch, reconstruction, num_visualize)


dataset = load_dataset()
train(dataset)
    

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Dataset size: 55000
Num iters: 27500
Loss at step 0 : 180.951
Loss at step 1000 : 75.1098
Loss at step 2000 : 76.5848
Loss at step 3000 : 73.2039
Loss at step 4000 : 77.1216
Loss at step 5000 : 71.9121
Loss at step 6000 : 73.1769
Loss at step 7000 : 69.1595
Loss at step 8000 : 75.7361
Loss at step 9000 : 77.3723
Loss at step 10000 : 72.8158
Loss at step 11000 : 72.4551
Loss at step 12000 : 75.6894
Loss at step 13000 : 74.042
Loss at step 14000 : 73.343
Loss at step 15000 : 73.55
Loss at step 16000 : 71.3511
Loss at step 17000 : 72.8096
Loss at step 18000 : 75.4073
Loss at step 19000 : 75.2738
Loss at step 20000 : 79.7986
Loss at step 21000 : 73.9973
Loss at step 22000 : 73.4759
Loss at step 23000 : 74.1126
Loss at step 24000 : 76.2016
Loss at step 25000 : 74.1117
Loss at step 26000 : 78.4489
Los

TclError: no display name and no $DISPLAY environment variable