### 1. utils:

In [1]:
import os
import sys
import tensorflow
import numpy as np

import matplotlib
matplotlib.use('TKAgg')
from matplotlib import pyplot as plt

from tensorflow.examples.tutorials.mnist import input_data

mnist_image_shape = [28, 28, 1]

def load_dataset():
    return input_data.read_data_sets('./data/MNIST_data')

def get_next_batch(dataset, batch_size):
    # dataset should be mnist.(train/val/test)
    batch, _ = dataset.next_batch(batch_size)
    batch_shape = [batch_size] + mnist_image_shape
    return np.reshape(batch, batch_shape)

def visualize(_original, _reconstructions, num_visualize):
    vis_folder = './vis/'
    if not os.path.exists(vis_folder):
          os.makedirs(vis_folder)

    original = _original[:num_visualize]
    reconstructions = _reconstructions[:num_visualize]
    
    count = 1
    for (orig, rec) in zip(original, reconstructions):
        orig = np.reshape(orig, (mnist_image_shape[0],
                                 mnist_image_shape[1]))
        rec = np.reshape(rec, (mnist_image_shape[0],
                               mnist_image_shape[1]))
        f, ax = plt.subplots(1,2)
        ax[0].imshow(orig, cmap='gray')
        ax[1].imshow(rec, cmap='gray')
        plt.savefig(vis_folder + "test_%d.png" % count)
        count += 1

### 2. layer_utils:

In [2]:
import tensorflow as tf

def get_deconv2d_output_dims(input_dims, filter_dims, stride_dims, padding):
    # Returns the height and width of the output of a deconvolution layer.
    batch_size, input_h, input_w, num_channels_in = input_dims
    filter_h, filter_w, num_channels_out  = filter_dims
    stride_h, stride_w = stride_dims

    # Compute the height in the output, based on the padding.
    if padding == 'SAME':
        out_h = input_h * stride_h
    elif padding == 'VALID':
        out_h = (input_h - 1) * stride_h + filter_h

    # Compute the width in the output, based on the padding.
    if padding == 'SAME':
        out_w = input_w * stride_w
    elif padding == 'VALID':
        out_w = (input_w - 1) * stride_w + filter_w

    return [batch_size, out_h, out_w, num_channels_out]


### 3. layers:

In [3]:
#import tensorflow as tf

#from layer_utils import get_deconv2d_output_dims

def conv(my_input, name, filter_dims, stride_dims, padding='SAME',
         non_linear_fn=tf.nn.relu):
    input_dims = my_input.get_shape().as_list()
    assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in
    assert(len(filter_dims) == 3) # height, width and num_channels out
    assert(len(stride_dims) == 2) # stride height and width

    num_channels_in = input_dims[-1]
    filter_h, filter_w, num_channels_out = filter_dims
    stride_h, stride_w = stride_dims

    # Define a variable scope for the conv layer
    with tf.variable_scope(name) as scope:
        # Create filter weight variable
        kernel = tf.get_variable(name='filter', shape=[filter_h, filter_w, num_channels_in, num_channels_out ],\
                                 dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
        # Create bias variable
        biases = tf.get_variable(name='biases', shape=[num_channels_out],dtype=tf.float32, initializer=tf.constant_initializer(0.0))
        # Define the convolution flow graph
        conv1= tf.nn.conv2d(input=my_input, filter=kernel, strides=[1, stride_h, stride_w, 1], padding=padding)
        # Add bias to conv output
        conv2 = tf.add(conv1, biases, name = scope.name)
        # Apply non-linearity (if asked) and return output
        if non_linear_fn != None:
            conv2d = non_linear_fn(conv2)
            return conv2d
        else:
            return conv2
    
def deconv(my_input, name, filter_dims, stride_dims, padding='SAME',
           non_linear_fn=tf.nn.relu):
    input_dims = my_input.get_shape().as_list()
    assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in
    assert(len(filter_dims) == 3) # height, width and num_channels out
    assert(len(stride_dims) == 2) # stride height and width

    num_channels_in = input_dims[-1]
    filter_h, filter_w, num_channels_out = filter_dims
    stride_h, stride_w = stride_dims
    # Let's step into this function
    output_dims = get_deconv2d_output_dims(input_dims,
                                           filter_dims,
                                           stride_dims,
                                           padding)

    # Define a variable scope for the deconv layer
    with tf.variable_scope(name) as scope:
        # Create filter weight variable
        # Note that num_channels_out and in positions are flipped for deconv.
        kernel = tf.get_variable(name='filter', shape=[filter_h, filter_w, num_channels_out, num_channels_in ], \
                                 dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.01))
        # Create bias variable
        biases = tf.get_variable(name='biases', shape=[num_channels_out], dtype=tf.float32,initializer=tf.constant_initializer(0.0))
        # Define the deconv flow graph
        deconv1= tf.nn.conv2d_transpose(value=my_input, filter= kernel, strides=[1, stride_h, stride_w, 1], \
                                        output_shape= output_dims, padding=padding)
        # Add bias to deconv output
        deconv2 = tf.add(deconv1, biases, name=scope.name)
        # Apply non-linearity (if asked) and return output
        if non_linear_fn != None:
            deconv2d = non_linear_fn(deconv2)
            return deconv2d
        else:
            return deconv2

def max_pool(my_input, name, filter_dims, stride_dims, padding='SAME'):
    assert(len(filter_dims) == 2) # filter height and width
    assert(len(stride_dims) == 2) # stride height and width

    filter_h, filter_w = filter_dims
    stride_h, stride_w = stride_dims
    
    # Define the max pool flow graph and return output
    pool =  tf.nn.max_pool(value=my_input, ksize=[1, filter_h, filter_w, 1], strides=[1, stride_h, stride_w, 1],\
                            padding= padding, name= name)
    return pool
def fc(my_input, name, out_dim, non_linear_fn=tf.nn.relu):
    assert(type(out_dim) == int)

    # Define a variable scope for the FC layer
    with tf.variable_scope(name) as scope:
        input_dims = my_input.get_shape().as_list()
        # the input to the fc layer should be flattened
        if len(input_dims) == 4:
            # for eg. the output of a conv layer
            batch_size, input_h, input_w, num_channels = input_dims
            # ignore the batch dimension
            in_dim = input_h * input_w * num_channels
            flat_input = tf.reshape(my_input, [batch_size, in_dim])
        else:
            in_dim = input_dims[-1]
            flat_input = my_input

        # Create weight variable
        weight = tf.get_variable(name='weight', shape=[in_dim, out_dim],dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
        # Create bias variable
        bias = tf.get_variable(name='bias', shape=[out_dim], dtype=tf.float32,initializer=tf.constant_initializer(0.0))
        # Define FC flow graph
        fc1 = tf.add(tf.matmul(flat_input, weight), bias, name= scope.name)
        # Apply non-linearity (if asked) and return output
        if non_linear_fn != None:
            fc = non_linear_fn(fc1)
            return fc
        else:
            return fc1

### 4.autoencoder:

> **original structure never converges, more complicated decoder is used here**

In [4]:
#import tensorflow as tf

#from layers import *

def encoder(my_input):
    # Create a conv network with 3 conv layers and 1 FC layer
    # Conv 1: filter: [3, 3, 1], stride: [2, 2], relu
    conv1 = conv(my_input, 'conv1', [3, 3, 32], [2, 2], padding='SAME',
         non_linear_fn=tf.nn.relu)
    # Conv 2: filter: [3, 3, 8], stride: [2, 2], relu
    conv2 = conv(conv1, 'conv2', [3, 3, 32], [2, 2], padding='SAME',
         non_linear_fn=tf.nn.relu)
    # Conv 3: filter: [3, 3, 8], stride: [2, 2], relu
    conv3 = conv(conv2, 'conv3', [3, 3, 16], [2, 2], padding='SAME',
         non_linear_fn=tf.nn.relu)
    # FC: output_dim: 100, no non-linearity
    fc_encoder = fc(conv3, 'fc_encoder', 128, non_linear_fn=None)
    #raise NotImplementedError
    return fc_encoder
def decoder(my_input):
    # Create a deconv network with 1 FC layer and 3 deconv layers
    # FC: output dim: 128, relu
    fc_decoder = fc(my_input, 'fc_decoder', 144, non_linear_fn=tf.nn.relu)
    # Reshape to [batch_size, 4, 4, 8]
    reshape = tf.reshape(fc_decoder, [-1, 3, 3,16])
    # Deconv 1: filter: [3, 3, 8], stride: [2, 2], relu
    deconv1 = deconv(reshape, 'deconv1',  [3, 3, 32], [2, 2], padding='VALID',
           non_linear_fn=tf.nn.relu)
    # Deconv 2: filter: [8, 8, 1], stride: [2, 2], padding: valid, relu
    deconv2 = deconv(deconv1, 'deconv2',  [3, 3, 32], [2, 2], padding='SAME',
           non_linear_fn=tf.nn.relu)
    # Deconv 3: filter: [7, 7, 1], stride: [1, 1], padding: valid, sigmoid
    deconv3 = deconv(deconv2, 'deconv3',  [3, 3, 1], [2, 2], padding='SAME',
           non_linear_fn=tf.nn.sigmoid)
    #raise NotImplementedError
    return deconv3
    
def autoencoder(input_shape):
    # Define place holder with input shape
    imag = tf.placeholder(tf.float32, input_shape, name ='imag_in')
    # Define variable scope for autoencoder
    with tf.variable_scope('autoencoder') as scope:
        # Pass input to encoder to obtain encoding
        encoding = encoder(imag)
        # Pass encoding into decoder to obtain reconstructed image
        reconstructed_imag = decoder(encoding)
        # Return input image (placeholder) and reconstructed image
    return imag, reconstructed_imag


### 5.tain:

In [5]:
#import tensorflow as tf

#from utils import *
#from autoencoder import *

batch_size = 256
batch_shape = (batch_size, 28, 28, 1)
num_visualize = 10

lr = 0.001
num_epochs = 50

def calculate_loss(original, reconstructed):
    return tf.reduce_mean(tf.reduce_sum(tf.square(tf.subtract(reconstructed, original)), axis=[1,2,3]))

def train(dataset):
    input_image, reconstructed_image = autoencoder(batch_shape)
    loss = calculate_loss(input_image, reconstructed_image)
    optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
    
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        writer = tf.summary.FileWriter('my_graph', session.graph)
        writer.close()
        session.run(init)
        
        dataset_size = len(dataset.train.images)
        print ("Dataset size:", dataset_size)
        num_iters = (num_epochs * dataset_size)//batch_size
        print ("Num iters:", num_iters)
        for step in range(num_iters):
            input_batch  = get_next_batch(dataset.train, batch_size)
            loss_val,  _ = session.run([loss, optimizer], 
                                       feed_dict={input_image: input_batch})
            if step % 1000 == 0:
                print ("Loss at step", step, ":", loss_val)

        test_batch = get_next_batch(dataset.test, batch_size)
        reconstruction = session.run(reconstructed_image,
                                     feed_dict={input_image: test_batch})
        visualize(test_batch, reconstruction, num_visualize)  
        

In [6]:
tf.reset_default_graph()
dataset = load_dataset()
train(dataset)


Extracting ./data/MNIST_data\train-images-idx3-ubyte.gz
Extracting ./data/MNIST_data\train-labels-idx1-ubyte.gz
Extracting ./data/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ./data/MNIST_data\t10k-labels-idx1-ubyte.gz
Dataset size: 55000
Num iters: 10742
Loss at step 0 : 181.34
Loss at step 1000 : 19.1241
Loss at step 2000 : 11.0623
Loss at step 3000 : 8.3195
Loss at step 4000 : 8.27414
Loss at step 5000 : 6.92209
Loss at step 6000 : 6.52266
Loss at step 7000 : 5.86308
Loss at step 8000 : 5.50316
Loss at step 9000 : 5.55846
Loss at step 10000 : 4.8272
