In [None]:
%matplotlib inline
from __future__ import print_function
import gc
import math
import os
import random
import sys
import traceback
import sklearn.metrics
import skimage.color
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython.display import Image
from scipy import ndimage

import improc
import convnet
import mutate
import convevo
import darwin

In [None]:
reload (improc)
reload (convnet)
reload (mutate)
reload (convevo)
reload (darwin)

In [None]:
# http://stackoverflow.com/questions/29772158/make-ipython-notebook-print-in-real-time
oldsysstdout = sys.stdout
class flushfile():
    def __init__(self, f):
        self.f = f
    def __getattr__(self,name): 
        return object.__getattribute__(self.f, name)
    def write(self, x):
        self.f.write(x)
        self.f.flush()
    def flush(self):
        self.f.flush()
sys.stdout = flushfile(sys.stdout)

# Enumerate Images
Image names are sequential, so add every tenth image to the validation set based on filename.

In [None]:
training = []
test = []

for root, dirs, files in os.walk('captures'):
    for name in files:
        path = os.path.join(root, name)
        low_name = name.lower()
        # Find all the image files, split into test and training.
        if low_name.endswith(".png"):
            if low_name.endswith("0.png"):
                test.append(path)
            else:
                training.append(path)

print("Training:", len(training), "Test:", len(test))
print(training[:2])
print(test[:2])

# Image Processing
Each image file contains a color image (top half), and an encoded depth image (bottom half)
<img src="testing/IMG_2114.PNG">
* Note: The image may also contain the orientation data. If so it is encoded in the first two pixels of the depth image. If the first pixel of the depth image is red, the second has the x, y, z, w quaternion components encoded in the r,g,b,a values.

The improc module contains functions for splitting the image, decoding the depth back into floating point millimeters, and for filling in gaps.

In [None]:
COLOR_CHANNELS = 3

def load_image(image_path):
    combined_image = ndimage.imread(image_path).astype(np.float32)
    color_image, depth_image = improc.split(combined_image)
    color_image = color_image[:, :, 0 : COLOR_CHANNELS] / improc.BYTE_MAX # Discard alpha and normalize
    depths, attitude = improc.decode_depth(depth_image)
    return (color_image, depths, attitude)

In [None]:
def compute_average_depth():
    depth_averages = []

    for path in training:
        _, depth, _ = load_image(path)
        depth_averages.append(np.nanmean(depth))
        if len(depth_averages) % 1000 == 0:
            print("Image", len(depth_averages))
    return np.nanmean(depth_averages)

# Precomputed via compute_average_depth()
MEAN_DEPTH = np.float32(1688.97)

print(MEAN_DEPTH)

# Depth labels
Want more precision for nearby things, so use progressively expanding buckets for labels, so if smallest bucket has size s and each succesive bucket is larger by a factor F then:

improc.MAX_DEPTH == sF<sup>0</sup> + sF<sup>1</sup> + sF<sup>2</sup> + ... + sF<sup>label count - 1</sup>

So, plug into sum of geometric series formula:

improc.MAX_DEPTH == s * (1 - F<sup>label count</sup>) / (1 - F)

Since there are two unknowns we can choose either the factor or the bucket size. A factor of 1.8 resulted in buckets that seemed about right.

In [None]:
def size_for_factor(factor, buckets):
    return improc.MAX_DEPTH * (1 - factor) / (1 - factor ** buckets)

def depth_label_boundaries(factor, buckets):
    boundaries = []
    size_sum = 0
    bucket_size = size_for_factor(factor, buckets)
    for i in range(buckets):
        size_sum += bucket_size
        boundaries.append(size_sum)
        bucket_size *= factor
    return boundaries

DEPTH_LABEL_COUNT = 10
DEPTH_BUCKET_SCALE_FACTOR = 1.8
DEPTH_BOUNDARIES = depth_label_boundaries(DEPTH_BUCKET_SCALE_FACTOR, DEPTH_LABEL_COUNT)

def depth_label(depth, labels=None):
    if labels is None:
        labels = np.zeros(shape=(DEPTH_LABEL_COUNT), dtype=np.float32)
    for i, boundary in enumerate(DEPTH_BOUNDARIES):
        if depth < boundary:
            labels[i] = 1
            break
    return labels

print("Mean depth label:", depth_label(MEAN_DEPTH))

In [None]:
L_MAX = 100
AB_SCALE_MAX = 127

def prepare_images(paths, height, width, channels=COLOR_CHANNELS, strategy=np.mean):
    inputs = np.empty(shape=(len(paths), height, width, channels), dtype=np.float32)
    targets = np.empty(shape=(len(paths), height * width), dtype=np.float32)
    scales = None
    for i, path in enumerate(paths):
        pixels, depths, _ = load_image(path)
        image_shape = pixels.shape
        h_offset = (image_shape[0] - height) / 2
        w_offset = (image_shape[1] - width) / 2
        if scales is None:
            scales = improc.compute_scales(image_shape[0], image_shape[1])
        lab_image = skimage.color.rgb2lab(pixels[h_offset : h_offset + height, w_offset : w_offset + width])
        inputs[i] = (lab_image / [L_MAX / 2, AB_SCALE_MAX, AB_SCALE_MAX]) - [1, 0, 0]
        depths = improc.mipmap_imputer(depths, strategy, scales) # fill holes
        depths = depths[h_offset : h_offset + height, w_offset : w_offset + width]
        depths = 2 * (depths - MEAN_DEPTH) / improc.MAX_DEPTH
        targets[i] = depths.reshape(height * width)
    return (inputs, targets)

In [None]:
example_image, example_depth, example_attitude = load_image("testing/IMG_2114.PNG")
plt.imshow(example_image)
print(example_image.shape, example_image.dtype)

def prepare_dummy(image_path):
    return prepared_image

In [None]:
plt.imshow(example_depth)
print(example_depth.shape, example_depth.dtype)
print(example_attitude)

In [None]:
prepared_labs, prepared_depths = prepare_images(["testing/IMG_2114.PNG"], 100, 100)
plt.imshow(prepared_labs[0])
print(prepared_labs[0].shape, prepared_labs[0].dtype)

In [None]:
plt.imshow(prepared_depths[0].reshape(100,100))
print(prepared_depths[0].shape, prepared_depths[0].dtype)
print(np.min(prepared_depths[0]), np.max(prepared_depths[0]))

# Data Management

In [None]:
image_height = 480
image_width = 640

data_files = {
    "image_size": (image_height, image_width, COLOR_CHANNELS),
    "depth_size": image_height * image_width,
    "train_files": np.array(training),
    "test_files": np.array(test)
}

del training
del test

In [None]:
def setup_cross_validation(data, train_count, valid_count, test_count=None, chunk_size=None, seed=None):
    cross_data = data.copy()
    
    if seed:
        np.random.seed(seed)
        
    if chunk_size:
        cross_data["image_size"] = chunk_size
        cross_data["depth_size"] = chunk_size[0] * chunk_size[1]

    paths = cross_data["train_files"][:]
    permutation = np.random.permutation(paths.shape[0])
    paths = paths[permutation]

    cross_data["train_files"] = paths[:train_count]
    cross_data["valid_files"] = paths[train_count:train_count + valid_count]
    
    if test_count is not None:
        cross_data["test_files"] = data["test_files"][:test_count]
    
    return cross_data

# Graph Setup

In [None]:
def setup_graph(
    batch_size,
    image_shape,
    target_shape,
    layer_stack,
    rate_alpha=0.05,
    decay_rate=1.0,
    decay_steps=1000
):
    graph = tf.Graph()
    with graph.as_default():
        input_shape = (batch_size,) + image_shape
        output_shape = (batch_size,) + target_shape
        train   = tf.placeholder(tf.float32, shape=input_shape)
        targets = tf.placeholder(tf.float32, shape=output_shape)
        verify  = tf.placeholder(tf.float32, shape=input_shape)

        layers = layer_stack.construct(input_shape)
        l2_loss = 0
        
        for layer in layers:
            layer.setup_parameters()
            l2_loss = layer.update_loss(l2_loss)
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        results = model([train], True)
        loss = tf.reduce_mean(tf.squared_difference(results, targets)) + l2_loss
        
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(rate_alpha, global_step, decay_steps, decay_rate)
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "targets": targets,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step),

            # Predictions for training and verification (validation or test)
            "predictions": results,
            "verify": verify,
            "verify_predictions": model([verify], False)
        }
    return info

# Graph Execution

In [None]:
def prediction_error(predictions, targets):
    return sklearn.metrics.mean_squared_error(predictions, targets)

In [None]:
def batch_error(session, graph_info, inputs, targets, batch_size):
    total_error = 0    
    batch_count = len(inputs) / batch_size
    for b in xrange(batch_count):
        offset = b * batch_size
        end = offset + batch_size
        batch_data = inputs[offset:end]
        predictions = session.run([graph_info["verify_predictions"]], feed_dict={graph_info["verify"]: batch_data})[0]
        total_error += prediction_error(predictions, targets[offset:end]) / np.float32(batch_count)
    return total_error, predictions[-1], targets[1]

In [None]:
def run_graph(graph_info, data, step_count, report_every=50, verbose=True, compute_test=False, error_maximum=None):
    with tf.Session(graph=graph_info["graph"]) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        batch_size = graph_info["batch_size"]
        height, width, channels = data["image_size"]
        max_error = 1
        valid_error = max_error
        valid_data = None
        training_files = data["train_files"]
        for step in xrange(step_count + 1):
            # Pick an offset within the training data, which has been randomized.
            offset = (step * batch_size) % (training_files.shape[0] - batch_size)
            # Generate a minibatch.
            batch_files = training_files[offset:(offset + batch_size)]
            batch_inputs, batch_targets = prepare_images(batch_files, height, width, channels)
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            targets = [graph_info["optimizer"], graph_info["loss"], graph_info["predictions"]]
            feed_dict = {graph_info["train"] : batch_inputs, graph_info["targets"] : batch_targets}
            _, l, predictions = session.run(targets, feed_dict=feed_dict)
            if np.isnan(l):
                print("Error computing loss")
                return 0, None
            if (step % report_every == 0):
                if verbose:
                    print("Minibatch loss at step", step, ":", l)
                if valid_data is None:
                    valid_data = prepare_images(data["valid_files"], height, width, channels)
                valid_error, _, _ = batch_error(session, graph_info, valid_data[0], valid_data[1], batch_size)
                print("Validation error: %.3f" % valid_error)
                if error_maximum and step > 0 and valid_error < error_maximum:
                    print("Early out.")
                    break
        results = (predictions[0], batch_targets[0], batch_inputs[0])
        if compute_test:
            test_inputs, test_targets = prepare_images(data["test_files"], height, width, channels)
            test_results = batch_error(session, graph_info, test_inputs, test_targets, batch_size)
            print("Test error: %.3f" % test_results[0])
            results = results + test_results
        return max_error - min(valid_error, max_error), results

In [None]:
def create_stack(convolutions, flatten, hidden_sizes, output_size, init_mean, init_scale):
    stack = convevo.LayerStack(flatten=flatten)
    default_init = lambda: convevo.Initializer("normal", mean=init_mean, scale=init_scale)

    for operation, patch_size, stride, depth, padding, relu in convolutions:
        stack.add_layer(convevo.ImageLayer(operation, patch_size, stride, depth, "SAME", default_init()), relu=relu)
    for hidden_size in hidden_sizes:
        stack.add_layer(convevo.HiddenLayer(hidden_size, bias=True, initializer=default_init()), relu=True)
    if output_size is not None:
        stack.add_layer(convevo.HiddenLayer(output_size, bias=True, initializer=default_init()), relu=False)
    
    return stack

In [None]:
batch_size = 4
sample_height = 480
sample_width = 640
prototype_cross = setup_cross_validation(data_files, 9800, 200, 1000, (sample_height, sample_width, COLOR_CHANNELS))
conv_layers = [
    ("conv",       5, 1, 10, "SAME", False),
    ("conv",      10, 1, 20, "SAME", False),
    ("conv",      25, 1, 25, "SAME", False),
    ("conv_bias", 25, 1, 25, "SAME", False),
    ("conv_bias", 20, 1, 30, "SAME", True),
    ("conv_bias",  5, 1,  1, "SAME", False)
]
prototype = create_stack(conv_layers, True, [], None, 0.0, 1.0)
prototype.reseed(random.Random(24601))
prototype_graph = setup_graph(batch_size, prototype_cross["image_size"], (prototype_cross["depth_size"],), prototype)

In [None]:
score, results = run_graph(prototype_graph, prototype_cross, 4000, 500, True)
print(score)

In [None]:
plt.imshow(results[0].reshape(sample_height,sample_width))
print(np.min(results[0]),np.max(results[0]))

In [None]:
plt.imshow(results[1].reshape(sample_height,sample_width))

In [None]:
plt.imshow(results[2])

In [None]:
test_width = 400
test_height = 400
images, depth = prepare_images(prototype_cross["train_files"][:1], test_width, test_height, COLOR_CHANNELS)

In [None]:
plt.imshow(images[0])
print(np.min(images[0][:,:,0]),np.max(images[0][:,:,0]))
print(np.min(images[0][:,:,1]),np.max(images[0][:,:,1]))
print(np.min(images[0][:,:,2]),np.max(images[0][:,:,2]))

In [None]:
plt.imshow(depth[0].reshape(test_width, test_height))
print(np.min(depth[0]),np.max(images[0]))

In [None]:
print(images.shape)
print(depth.shape)
prediction_error(images[:,:,:,0].reshape(1, test_width * test_height), depth)

In [None]:
prediction_error(depth, np.zeros(shape=(1, test_width * test_height)))