In [None]:
%matplotlib inline
from __future__ import print_function
import numpy as np
import os
import tensorflow as tf
import sys
import matplotlib.pyplot as plt
from IPython.display import Image
from scipy import ndimage

In [None]:
# http://stackoverflow.com/questions/29772158/make-ipython-notebook-print-in-real-time
oldsysstdout = sys.stdout
class flushfile():
    def __init__(self, f):
        self.f = f
    def __getattr__(self,name): 
        return object.__getattribute__(self.f, name)
    def write(self, x):
        self.f.write(x)
        self.f.flush()
    def flush(self):
        self.f.flush()
sys.stdout = flushfile(sys.stdout)

# Enumerate Images
Image names are sequential, so add every tenth image to the validation set based on filename.

In [None]:
training = []
test = []

for root, dirs, files in os.walk('captures'):
    for name in files:
        path = os.path.join(root, name)
        low_name = name.lower()
        # Find all the image files, split into test and training.
        if low_name.endswith(".png"):
            if low_name.endswith("0.png"):
                test.append(path)
            else:
                training.append(path)

print("Training:", len(training), "Test:", len(test))

In [None]:
training[:5]

In [None]:
test[:5]

# Image Processing
Each image file contains a color image (top half), and an encoded depth image (bottom half)
<img src="testing/IMG_2114.PNG">
* Note: The image may also contain the orientation data. If so it is encoded in the first two pixels of the depth image. If the first pixel is red, the second has the x, y, z, w quaternion components encoded in the r,g,b,a values.

In [None]:
def split(image):
    """Split the image data into the top and bottom half."""
    split_height = image.shape[0] / 2
    return image[:split_height], image[split_height:]

In [None]:
def decode_depth(image):
    """12 bits of depth in millimeters is encoded with 6 bits in red and 3 bits in each of green and blue."""
    BYTE_MAX = 255
    CHANNEL_MAX = 8.0
    MAX_RED_VALUE = BYTE_MAX - CHANNEL_MAX
    CHANNELS_MAX = CHANNEL_MAX * CHANNEL_MAX
    orientation = [1, 0, 0, 0] # default orientation if not present in image.
    
    if np.array_equal(image[0, 0], [BYTE_MAX, 0, 0, BYTE_MAX]):
        # Orientation quaternion is present.
        pixel = image[0, 1]
        for c in range(len(orientation)):
            orientation[c] = ((2.0 * pixel[c]) / BYTE_MAX) - 1

        # Clear out the pixels so they don't get interepreted as depth.
        image[0, 0] = [0, 0, 0, BYTE_MAX]
        image[0, 1] = [0, 0, 0, BYTE_MAX]

    red = image[:, :, 0]
    green = image[:, :, 1]
    blue = image[:, :, 2]

    depth = ((MAX_RED_VALUE - red) * CHANNELS_MAX) + ((green - red) * CHANNEL_MAX) + (blue - red)
    
    # Zero in the red channel indicates the sensor provided no data.
    depth[np.where(red == 0)] = float('nan')
    return depth, orientation

# Fill NaNs with localized stat values using mipmaps
Combined this: http://stackoverflow.com/questions/14549696/mipmap-of-image-in-numpy

With this: http://stackoverflow.com/questions/5480694/numpy-calculate-averages-with-nans-removed

In [None]:
def mipmap_imputer(image, strategy=np.mean, scales=None):
    scales = scales if scales else [(5,5), (3,2), (2,2), (2,2), (2,2), (2,2), (2,2), (1,2)]
    mipmaps = []
    mipmap = image
    for y, x in scales:
        mipmap = mipmap.copy()
        size = mipmap.shape
        reshaped = mipmap.reshape(size[0] / y, y, size[1] / x, x)
        masked = np.ma.masked_array(reshaped, np.isnan(reshaped))
        mipmap = strategy(strategy(masked, axis=3), axis=1).filled(np.nan)
        mipmaps.append(mipmap)
    
    for index, mipmap in reversed(list(enumerate(mipmaps))):
        y, x = scales[index]
        expanded = mipmap
        if x > 1:
            expanded = np.repeat(expanded, x, axis=1).reshape(expanded.shape[0], expanded.shape[1] * x)
        if y > 1:
            expanded = np.repeat(expanded, y, axis=0).reshape(expanded.shape[0] * y, expanded.shape[1])
        target = mipmaps[index - 1] if index > 0 else image.copy()

        nans = np.where(np.isnan(target))
        target[nans] = expanded[nans]
    return target

# Node/Layer Types:
* Matrix
 * Dimensions (height, width, depth)
* Relu
* Dropout
 * Fraction
* Conv
 * Dimensions (height, width, channels)
 * Stride (height, width)
 * Padding Type (same, valid)
* Pool
 * Type (max, avg)
 * Size (height, width)
 * Stride (height, width) 

In [None]:
from six.moves import cPickle as pickle
pickle_file = '../ud730/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

In [None]:
datasets = {
    "image_size": 28,
    "label_count": 10,
    "channel_count": 1
}
datasets["total_image_size"] = datasets["image_size"] * datasets["image_size"]

def reformat(dataset, labels, name):
    dataset = dataset.reshape((-1, datasets["image_size"], datasets["image_size"], datasets["channel_count"])).astype(np.float32)
    # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(datasets["label_count"]) == labels[:,None]).astype(np.float32)
    print(name + " set", dataset.shape, labels.shape)
    return dataset, labels
datasets["train"], datasets["train_labels"] = reformat(train_dataset, train_labels, "Training")
datasets["valid"], datasets["valid_labels"] = reformat(valid_dataset, valid_labels, "Validation")
datasets["test"], datasets["test_labels"] = reformat(test_dataset, test_labels, "Test")

print(datasets.keys())

In [None]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

In [None]:
def run_graph(graph_info, data, step_count, report_every=50):
    with tf.Session(graph=graph_info["graph"]) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        batch_size = graph_info["batch_size"]
        for step in xrange(step_count + 1):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = data["train"][offset:(offset + batch_size), :, :, :]
            batch_labels = data["train_labels"][offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            targets = [graph_info["optimizer"], graph_info["loss"], graph_info["predictions"]]
            feed_dict = {graph_info["train"] : batch_data, graph_info["labels"] : batch_labels}
            _, l, predictions = session.run(targets, feed_dict=feed_dict)
            if (step % report_every == 0):
                print("Minibatch loss at step", step, ":", l)
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(graph_info["valid"].eval(), data["valid_labels"]))
        print("Test accuracy: %.1f%%" % accuracy(graph_info["test"].eval(), data["test_labels"]))

In [None]:
# Parameter setup functions

def no_parameters(options):
    return ()

def setup_matrix(options):
    initialize_matrix = options["init"]
    size = options["size"]
    matrix = tf.Variable(initialize_matrix(size))
    if options["bias"]:
        initialize_bias = options["bias_init"]
        bias = tf.Variable(initialize_bias(size[-1:]))
        return (matrix, bias)
    return (matrix,)

In [None]:
def same_output_shape(input_shape, options):
    return input_shape

def matrix_output_shape(input_shape, options):
    matrix_size = options["size"]
    return (int(input_shape[0]), matrix_size[1])

def flatten_output_shape(input_shape, options):
    return (int(input_shape[0]), int(input_shape[1] * input_shape[2] * input_shape[3]))

def unflatten_output_shape(input_shape, options):
    size = options["size"]
    pixels = size[0] * size[1]
    return (int(input_shape[0]), size[0], size[1], int(input_shape[1] / pixels))

def conv_output_shape(input_shape, options):
    size = options["size"]
    same_padding = options["padding"] == "SAME"
    stride = options["stride"]
    
    if len(size) > 2 and input_shape[3] != size[2]:
        print("Matrix size incompatible!")

    height = size[0]
    width = size[1]
    out_depth = size[3] if len(size) > 2 else int(input_shape[3])
    
    input_height = input_shape[1]
    input_width = input_shape[2]
    
    if not same_padding:
        input_height -= height
        input_width -= width
    
    return (
        int(input_shape[0]),
        (input_height + stride[0] - 1) / stride[0],
        (input_width + stride[1] - 1)/ stride[1],
        out_depth
    )

print(conv_output_shape([1, 25, 40, 1], {"size":(4, 5), "stride": (2, 2), "padding":"VALID"}))
print(conv_output_shape([1, 25, 40, 1], {"size":(4, 5), "stride": (2, 2), "padding":"SAME"}))
print(conv_output_shape([1, 25, 40, 1], {"size":(4, 5), "stride": (3, 3), "padding":"VALID"}))

In [None]:
# Node connection functions

def apply_matrix(input_node, train, parameters, options):
    application = tf.matmul(input_node, parameters[0])
    if len(parameters) > 1:
        return application + parameters[1]
    return application

def apply_relu(input_node, train, parameters, options):
    return tf.nn.relu(input_node)

def apply_dropout(input_node, train, parameters, options):
    if train:
        return tf.nn.dropout(input_node, options["dropout_rate"], seed=options["seed"])
    else:
        return input_node

def apply_conv(input_node, train, parameters, options):
    stride = options["stride"]
    output = tf.nn.conv2d(input_node, parameters[0], [1, stride[0], stride[1], 1], padding=options["padding"])
    
    if options["bias"]:
        output = output + parameters[1]
        
    return output

def apply_pool(input_node, train, parameters, options):
    if options["pool_type"] == "max":
        pool_function = tf.nn.max_pool
    else:
        pool_function = tf.nn.avg_pool
    stride = [1, options["stride"][0], options["stride"][1], 1]
    size = [1, options["size"][0], options["size"][1], 1]
    return pool_function(input_node, size, stride, padding=options["padding"])

def apply_flatten(input_node, train, parameters, options):
    return tf.reshape(input_node, flatten_output_shape(input_node.get_shape(), options))

def apply_unflatten(input_node, train, parameters, options):
    return tf.reshape(input_node, unflatten_output_shape(input_node.get_shape(), options))

In [None]:
def shape_test(shape, options):
    graph = tf.Graph()
    with graph.as_default():
        input = tf.placeholder(tf.float32, shape=shape)
        parameters = setup_matrix(options)
        result = apply_conv(input, False, parameters, options)
        print(result.get_shape())
    
default_init = lambda size: tf.truncated_normal(size, stddev=0.1)
shape_test([1, 25, 40, 1], {"size":(4, 5, 1, 1), "stride": (2, 2), "padding":"VALID", "bias":False, "init":default_init})
shape_test([1, 25, 40, 1], {"size":(4, 5, 1, 1), "stride": (2, 2), "padding":"SAME", "bias":False, "init":default_init})
shape_test([1, 25, 40, 1], {"size":(4, 5, 1, 1), "stride": (3, 3), "padding":"VALID", "bias":False, "init":default_init})

In [None]:
class Layer(object):
    """Setup and keep track of graph parameters and nodes for a layer."""
    def __init__(self, options, parameter_setup, node_setup):
        self.options = options
        self.parameter_setup = parameter_setup
        self.node_setup = node_setup
        self.parameters = None
        
    def setup_parameters(self):
        self.parameters = self.parameter_setup(self.options)
        
    def connect(self, input_node, train):
        node = self.node_setup(input_node, train, self.parameters, self.options)
        return node

In [None]:
# Layer setup functions.

def create_matrix_layer(inputs, channels, init=lambda size: tf.truncated_normal(size, stddev=0.1)):
    in_size = inputs if isinstance(inputs, tuple) else (inputs,)
    size = in_size + (channels,)
    options = {
        "size": size,
        "bias": True,
        "init": lambda size: init(size),
        "bias_init": lambda size: init((channels,))
    }
    return Layer(options, setup_matrix, apply_matrix)

def create_relu_layer():
    return Layer({}, no_parameters, apply_relu)

def create_dropout_layer(rate, seed):
    options = {
        "dropout_rate": rate,
        "seed": seed
    }
    return Layer(options, no_parameters, apply_dropout)

def create_conv_layer(patch_size, stride, in_channels, out_channels, bias=True, padding="SAME"):
    init = lambda size: tf.truncated_normal(size, stddev=0.1)
    options = {
        "size": patch_size + (in_channels, out_channels),
        "bias": bias,
        "init": init,
        "bias_init": init,
        "stride": stride,
        "padding": padding
    }
    return Layer(options, setup_matrix, apply_conv)

def create_pool_layer(strategy, patch_size, stride, channels, padding="SAME"):
    options = {
        "pool_type": strategy,
        "size": patch_size,
        "stride": stride,
        "padding": padding
    }
    return Layer(options, no_parameters, apply_pool)

def create_flatten_layer():
    options = {}
    return Layer(options, no_parameters, apply_flatten)

def create_unflatten_layer(size):
    options = {
        "size": size
    }
    return Layer(options, no_parameters, apply_unflatten)

In [None]:
class EvoLayer(object):
    """Set up an evolvable layer representation."""
    def __init__(self, options):
        self.options = options
        
    def output_size(self, input_size):
        return input_size
        
    def can_mutate(self):
        return False
    
    def mutate(self):
        return self
    
    def make_layer(self):
        return None

In [None]:
def convnet_two_layer(batch_size, patch_size, depth, hidden_size, data):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        train = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, channel_count))
        labels= tf.placeholder(tf.float32, shape=(batch_size, label_count))
        valid = tf.constant(data["valid"])
        test  = tf.constant(data["test"])
        
        stride = 2
        
        layers = [
            create_conv_layer((patch_size, patch_size), (stride, stride), 1, depth),
            create_relu_layer(),
            create_conv_layer((patch_size, patch_size), (stride, stride), depth, depth),
            create_relu_layer(),
            create_flatten_layer(),
            create_matrix_layer(image_size * image_size * depth / pow(stride, 4), hidden_size),
            create_relu_layer(),
            create_matrix_layer(hidden_size, label_count)
        ]
        
        for layer in layers:
            layer.setup_parameters()
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        logits = model([train], True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "valid": tf.nn.softmax(model([valid], False)),
            "test":  tf.nn.softmax(model([test], False))
        }
    return info

In [None]:
graph_2conv = convnet_two_layer(batch_size=16, patch_size=5, depth=16, hidden_size=64, data=datasets)

run_graph(graph_2conv, datasets, 1000)

In [None]:
optimal_steps = 200000

graph_connive = convnet_optimize(
    batch_size=16, patch_sizes=[5,14], strides=[2,7], depths=[16,128],
    hidden_sizes=[128,64],
    rate_alpha=0.02, decay_rate=0.9, decay_steps=optimal_steps/4,
    beta_loss=0.0005,
    dropout_rate=0.5,
    base_seed=45645,
    data=datasets)

run_graph(graph_connive, datasets, optimal_steps, report_every=1000)