# Depth Prediction from RGB and Infrared Input

This model predicts a depth image given a rgb and an infrared input image of the same resolution.



## Import the necessary moduls

In [12]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from keras.utils import Sequence # for data generator class
from keras.models import Model
from keras.layers import Input, Conv2D, Activation, BatchNormalization, Dropout, concatenate, Conv2DTranspose
from keras.utils import plot_model

## Training data situation
Training data (as well as test data) will lie in directories with the following structure:

<pre>
data
|-- train
    |-- Color
        |-- 1.jpg
        |-- 2.jpg
        ...
        |-- n.jpg
    |-- Infrared
        |-- 1.png
        |-- 2.png
        ...
        |-- n.png
    |-- Depth
        |-- 1.png
        |-- 2.png
        ...
        |-- n.png
|-- test
    |-- Color
    |-- Infrared
    |-- Depth
</pre>

## The Data Generator
Because there are many training and test images, it is reasonable to utilize a data loader, which reads training data batch wise. Because the default keras data loader (`ImageDataGenerator`) does not work with two input parameters, we need to write our own. For this, the tutorial from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly is utilized.

In [13]:
class DataGenerator(Sequence):
    'Assumes that examples in the provided folder are named from 1 to n, with n being the number of images'
    def __init__(self, path_to_data_set='data/train', batch_size=32, image_size=(480,640), shuffle=True):
        self.path_to_data = path_to_data_set
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.training_size = self.__get_training_data_size(self.path_to_data)
        self.on_epoch_end()
        
    def __get_training_data_size(self, path_to_data):
        'gets the number of samples'
        path_color = os.path.join(path_to_data,'Color')
        if os.path.isdir(path_color):
            size = len([color for color in os.listdir(path_color) if os.path.isfile(os.path.join(path_color, color))])
            return size
        else:
            return 0
        
    def __len__(self):
        'Number of batches per epoche'
        return int(np.floor(self.training_size / self.batch_size))
    
    def on_epoch_end(self):
        'Update indices (and their ordering) after each epoch'
        # image names start with 1, np.arange(n,m) returns values from n to (m-1)
        self.indices = np.arange(1, self.training_size+1)
        if self.shuffle == True:
            np.random.shuffle(self.indices)
            
    def __data_generation(self, list_images):
        'Generates data of size batch_size' # X = (batch_size, 480, 640, 1)
        X1 = np.empty((self.batch_size, *self.image_size, 3), dtype=np.uint8) # color images
        X2 = np.empty((self.batch_size, *self.image_size), dtype=np.uint16) # ir image
        y = np.empty((self.batch_size, *self.image_size), dtype=np.uint16)  # depth image
        
        # Generate data
        for idx, name in enumerate(list_images):
            # load images in arrays
            img = cv2.imread(os.path.join(self.path_to_data, 'Color', str(name)+".jpg"), cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            X1[idx,] = img.astype(np.uint8)
            
            img = cv2.imread(os.path.join(self.path_to_data, 'Infrared', str(name)+".png"), cv2.IMREAD_ANYDEPTH)
            X2[idx,] = img.astype(np.uint16)
            
            img = cv2.imread(os.path.join(self.path_to_data, 'Depth', str(name)+".png"), cv2.IMREAD_ANYDEPTH)
            y[idx,] = img.astype(np.uint16)
        
        return X1, X2, y
    
    def __getitem__(self, index):
        'Generate one batch of data, X1 contains 8-bit RGB images, X2 16-bit infrared images and y corresponding 16-bit depth images'
        # Generate indices of data
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        
        # Generate data
        X1, X2, y = self.__data_generation(indices)
        
        return [X1, X2], y
    
    
            

## Initialize the DataLoader

In [14]:
training_generator = DataGenerator(
    path_to_data_set='data/train',
    batch_size=64,
    image_size=(480,640),
    shuffle=True
    )

validation_generator = DataGenerator(
    path_to_data_set='data/validation',
    batch_size=64,
    image_size=(480.640),
    shuffle=True
    )

## The actual model
This section defines the network architecture of the neural network. It consists of different parts:
- input layers
- fusion layer
- VGG16-like encoder network (configuration D) (see https://arxiv.org/pdf/1409.1556.pdf)
- mirrored decoder network
- output

In [15]:
# Color branch
input_color = Input(shape=(480,640,3), name="Color_Input")
x = Model(inputs=input_color, outputs=input_color)

# Infrared branch
input_ir = Input(shape=(480,640,1), name="Infrared_Input")
y = Model(inputs=input_ir, outputs=input_ir)

# combine both branches
combined = concatenate([x.output, y.output], name="Concatenate")

# VGG16 style encoder (configuration D)
z = Conv2D(64, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-64_1")(combined)
z = Conv2D(64, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="Conv3-64")(z)

z = Conv2D(128, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-128_1")(z)
z = Conv2D(128, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="Conv3-128")(z)

z = Conv2D(256, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-256_1")(z)
z = Conv2D(256, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-256_2")(z)
z = Conv2D(256, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="Conv3-256_3")(z)

z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_1")(z)
z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_2")(z)
z = Conv2D(512, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="Conv3-512_3")(z)

z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_4")(z)
z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_5")(z)
z = Conv2D(512, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="Conv3-512_6")(z)

# end of encoder part
# start of decoder part

z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_7")(z)
z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_8")(z)
z = Conv2DTranspose(512, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="DeConv3-512_1")(z)

z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_9")(z)
z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_10")(z)
z = Conv2DTranspose(512, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="DeConv3-512_2")(z)

z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_11")(z)
z = Conv2D(512, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-512_12")(z)
z = Conv2DTranspose(512, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="DeConv3-512_3")(z)

z = Conv2D(256, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-256_4")(z)
z = Conv2D(256, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-256_5")(z)
z = Conv2DTranspose(256, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="DeConv3-256")(z)

z = Conv2D(128, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-128_2")(z)
z = Conv2DTranspose(128, kernel_size=(3,3), strides=(2,2), padding="same", activation="relu", name="DeConv3-128")(z)

z = Conv2D(64, kernel_size=(3,3), padding="same", activation="relu", name="Conv3-64_2")(z)

# end of decoder part
# output layer
z = Conv2D(1, kernel_size=(3,3), padding="same", name="Conv3-1")(z)

model = Model(inputs=[x.input, y.input], outputs=z)

model.compile(
    optimizer="adam",
    loss="mae",
    metrics=['mae', 'mse'])



# TODO: MaxPool and UpPool replaced with __extra__ conv layer? or simply add strides to last conv layer?
# TODO: add BatchNormalization layers


## Visualize the network

In [6]:
plot_model(model, to_file=os.path.join('Images','model.png'), show_shapes=True)
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Color_Input (InputLayer)        (None, 480, 640, 3)  0                                            
__________________________________________________________________________________________________
Infrared_Input (InputLayer)     (None, 480, 640, 1)  0                                            
__________________________________________________________________________________________________
Concatenate (Concatenate)       (None, 480, 640, 4)  0           Color_Input[0][0]                
                                                                 Infrared_Input[0][0]             
__________________________________________________________________________________________________
Conv3-64_1 (Conv2D)             (None, 480, 640, 64) 2368        Concatenate[0][0]                
__________

## Train the Model

In [None]:
model.fit_generator(
    generator=training_generator,
    validation_data=validation_generator,
    use_multiprocessing=True,
    workers=6)