In [None]:
%matplotlib inline
from __future__ import print_function
import numpy as np
import os
import tensorflow as tf
import sys
import matplotlib.pyplot as plt
from IPython.display import Image
from scipy import ndimage

In [None]:
# http://stackoverflow.com/questions/29772158/make-ipython-notebook-print-in-real-time
oldsysstdout = sys.stdout
class flushfile():
    def __init__(self, f):
        self.f = f
    def __getattr__(self,name): 
        return object.__getattribute__(self.f, name)
    def write(self, x):
        self.f.write(x)
        self.f.flush()
    def flush(self):
        self.f.flush()
sys.stdout = flushfile(sys.stdout)

# Enumerate Images
Image names are sequential, so add every tenth image to the validation set based on filename.

In [None]:
training = []
validation = []

for root, dirs, files in os.walk('captures'):
    for name in files:
        path = os.path.join(root, name)
        low_name = name.lower()
        # Find all the image files, split into validation and training.
        if low_name.endswith(".png"):
            if low_name.endswith("0.png"):
                validation.append(path)
            else:
                training.append(path)

print("Training:", len(training), "Validation:", len(validation))

In [None]:
training[:5]

In [None]:
validation[:5]

# Image Processing
Each image file contains a color image (top half), and an encoded depth image (bottom half)
<img src="captures/testing/IMG_2114.PNG">
* Note: The image may also contain the orientation data. If so it is encoded in the first two pixels of the depth image. If the first pixel is red, the second has the x, y, z, w quaternion components encoded in the r,g,b,a values.

In [None]:
def split(image):
    """Split the image data into the top and bottom half."""
    split_height = image.shape[0] / 2
    return image[:split_height], image[split_height:]

In [None]:
def decode_depth(image):
    """12 bits of depth in millimeters is encoded with 6 bits in red and 3 bits in each of green and blue."""
    BYTE_MAX = 255
    CHANNEL_MAX = 8.0
    MAX_RED_VALUE = BYTE_MAX - CHANNEL_MAX
    CHANNELS_MAX = CHANNEL_MAX * CHANNEL_MAX
    orientation = [1, 0, 0, 0] # default orientation if not present in image.
    
    if np.array_equal(image[0, 0], [BYTE_MAX, 0, 0, BYTE_MAX]):
        # Orientation quaternion is present.
        pixel = image[0, 1]
        for c in range(len(orientation)):
            orientation[c] = ((2.0 * pixel[c]) / BYTE_MAX) - 1

        # Clear out the pixels so they don't get interepreted as depth.
        image[0, 0] = [0, 0, 0, BYTE_MAX]
        image[0, 1] = [0, 0, 0, BYTE_MAX]

    red = image[:, :, 0]
    green = image[:, :, 1]
    blue = image[:, :, 2]

    depth = ((MAX_RED_VALUE - red) * CHANNELS_MAX) + ((green - red) * CHANNEL_MAX) + (blue - red)
    
    # Zero in the red channel indicates the sensor provided no data.
    depth[np.where(red == 0)] = float('nan')
    return depth, orientation

# Fill NaNs with localized stat values using mipmaps
Combined this: http://stackoverflow.com/questions/14549696/mipmap-of-image-in-numpy

With this: http://stackoverflow.com/questions/5480694/numpy-calculate-averages-with-nans-removed

In [None]:
def mipmap_imputer(image, strategy=np.mean, scales=None):
    scales = scales if scales else [(5,5), (3,2), (2,2), (2,2), (2,2), (2,2), (2,2), (1,2)]
    mipmaps = []
    mipmap = image
    for y, x in scales:
        mipmap = mipmap.copy()
        size = mipmap.shape
        reshaped = mipmap.reshape(size[0] / y, y, size[1] / x, x)
        masked = np.ma.masked_array(reshaped, np.isnan(reshaped))
        mipmap = strategy(strategy(masked, axis=3), axis=1).filled(np.nan)
        mipmaps.append(mipmap)
    
    for index, mipmap in reversed(list(enumerate(mipmaps))):
        y, x = scales[index]
        expanded = mipmap
        if x > 1:
            expanded = np.repeat(expanded, x, axis=1).reshape(expanded.shape[0], expanded.shape[1] * x)
        if y > 1:
            expanded = np.repeat(expanded, y, axis=0).reshape(expanded.shape[0] * y, expanded.shape[1])
        target = mipmaps[index - 1] if index > 0 else image.copy()

        nans = np.where(np.isnan(target))
        target[nans] = expanded[nans]
    return target