In [None]:
%matplotlib inline
from __future__ import print_function
import gc
import os
import random
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython.display import Image
from scipy import ndimage

import improc

In [None]:
reload (improc)

In [None]:
# http://stackoverflow.com/questions/29772158/make-ipython-notebook-print-in-real-time
oldsysstdout = sys.stdout
class flushfile():
    def __init__(self, f):
        self.f = f
    def __getattr__(self,name): 
        return object.__getattribute__(self.f, name)
    def write(self, x):
        self.f.write(x)
        self.f.flush()
    def flush(self):
        self.f.flush()
sys.stdout = flushfile(sys.stdout)

# Enumerate Images
Image names are sequential, so add every tenth image to the validation set based on filename.

In [None]:
training = []
test = []

for root, dirs, files in os.walk('captures'):
    for name in files:
        path = os.path.join(root, name)
        low_name = name.lower()
        # Find all the image files, split into test and training.
        if low_name.endswith(".png"):
            if low_name.endswith("0.png"):
                test.append(path)
            else:
                training.append(path)

print("Training:", len(training), "Test:", len(test))

In [None]:
training[:5]

In [None]:
test[:5]

# Image Processing
Each image file contains a color image (top half), and an encoded depth image (bottom half)
<img src="testing/IMG_2114.PNG">
* Note: The image may also contain the orientation data. If so it is encoded in the first two pixels of the depth image. If the first pixel of the depth image is red, the second has the x, y, z, w quaternion components encoded in the r,g,b,a values.

The improc module contains functions for splitting the image, decoding the depth back into floating point millimeters, and for filling in gaps.

In [None]:
def prepare_image(image_path, strategy=np.mean):
    combined_image = ndimage.imread(image_path).astype(np.float32)
    color_image, depth_image = improc.split(combined_image)
    depths, attitude = improc.decode_depth(depth_image)
    color_image = color_image[:,:,0:3] / improc.BYTE_MAX # Discard alpha and normalize
    cleaned = improc.mipmap_imputer(depths, strategy) / improc.MAX_DEPTH # fill holes and normalize
    return (color_image, depths, attitude, cleaned)

In [None]:
prepared_image = prepare_image("testing/IMG_2114.PNG")
plt.imshow(prepared_image[0])
print(prepared_image[0].shape)

In [None]:
plt.imshow(prepared_image[1])
print(prepared_image[1].shape)
print(prepared_image[2])

In [None]:
plt.imshow(prepared_image[3])
print(prepared_image[3].shape)
print(prepared_image[3].dtype)

In [None]:
def prepare_images(paths, height=480, width=640, channels=3):
    inputs = np.empty(shape=(len(paths), height, width, channels), dtype=np.float32)
    targets = np.empty(shape=(len(paths), height * width), dtype=np.float32)
    for i, path in enumerate(paths):
        example = prepare_image(path)
        inputs[i] = example[0]
        targets[i] = example[3].reshape((height * width))
    return (inputs, targets)

In [None]:
data = {
    "image_size": prepared_image[0].shape,
    "depth_size": prepared_image[3].shape,
    "training_files": training,
    "test_files": test
}

In [None]:
def setup_graph(
    batch_size,
    image_shape,
    target_shape,
    layer_stack,
    rate_alpha=0.05,
    decay_rate=1.0,
    decay_steps=1000
):
    graph = tf.Graph()
    with graph.as_default():
        input_shape = (batch_size,) + image_shape
        output_shape = (batch_size,) + target_shape
        train   = tf.placeholder(tf.float32, shape=input_shape)
        targets = tf.placeholder(tf.float32, shape=output_shape)
        verify  = tf.placeholder(tf.float32, shape=input_shape)

        layers = layer_stack.construct(input_shape)
        l2_loss = 0
        
        for layer in layers:
            layer.setup_parameters()
            l2_loss = layer.update_loss(l2_loss)
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        results = model([train], True)
        loss = tf.reduce_mean(tf.squared_difference(results, targets)) + l2_loss
        
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(rate_alpha, global_step, decay_steps, decay_rate)
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "targets": targets,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step),

            # Predictions for training and verification (validation or test)
            "predictions": results,
            "verify": verify,
            "verify_predictions": model([verify], False)
        }
    return info

In [None]:
def run_graph(graph_info, data, step_count, report_every=50, verbose=True, accuracy_minimum=None):
    with tf.Session(graph=graph_info["graph"]) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        batch_size = graph_info["batch_size"]
        valid_accuracy = 0
        for step in xrange(step_count + 1):
            # Pick an offset within the training data, which has been randomized.
            offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = data["train"][offset:(offset + batch_size), :, :, :]
            batch_labels = data["train_targets"][offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            targets = [graph_info["optimizer"], graph_info["loss"], graph_info["predictions"]]
            feed_dict = {graph_info["train"] : batch_data, graph_info["targets"] : batch_labels}
            _, l, predictions = session.run(targets, feed_dict=feed_dict)
            if np.isnan(l):
                print("Error computing loss")
                return 0
            if (step % report_every == 0):
                if verbose:
                    print("Minibatch loss at step", step, ":", l)
                    print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                valid_accuracy = batch_accuracy(session, graph_info, data["valid"], data["valid_labels"], batch_size)
                print("Validation accuracy: %.1f%%" % valid_accuracy)
                if accuracy_minimum and step > 0 and valid_accuracy < accuracy_minimum:
                    print("Early out.")
                    break
        if verbose:
            test_accuracy = batch_accuracy(session, graph_info, data["test"], data["test_labels"], batch_size)
            print("Test accuracy: %.1f%%" % test_accuracy)
        return valid_accuracy