In [1]:
from keras import backend as K
import numpy as np

def softmax2d(x):
    # Todo: figure out why we have to save x.shape as a tuple instead of using it directly
    # using it directly causes a keras problem

    old_shape=(-1,int(x.shape[1]),int(x.shape[2]),int(x.shape[3]))
    new_shape=(-1,1,int(x.shape[1])*int(x.shape[2]),old_shape[3])
    nsx = K.reshape(x,new_shape)
    e = K.exp(nsx - K.max(nsx, axis=-2, keepdims=True))
    s = K.sum(e, axis=-2, keepdims=True)
    ns = e / s
    ex = K.reshape(ns, old_shape)
    return ex


# returns x,y coordinates[0-1) of maximum value for each channel
# input should pass through softmax2d before input to this funciton
def softargmax(x):
    xc = K.variable(np.arange(int(x.shape[1]))/(int(x.shape[1])-1))
    x1 = K.variable(np.ones([x.shape[1]]))
    yc = K.variable(np.arange(int(x.shape[2]))/(int(x.shape[2])-1))
    y1 = K.variable(np.ones([x.shape[2]]))
    xx = K.dot(yc, x)
    xx = K.dot(x1, xx)
    xy = K.dot(y1, x)
    xy = K.dot(xc, xy)
    nc=K.stack([xx,xy],axis=-1)
    return nc

Using TensorFlow backend.


In [13]:
from keras.layers import Dense,Input,BatchNormalization,Dropout,Conv2D,AvgPool2D,Flatten,Lambda,Concatenate
from keras.models import Model
import keras
from keras.applications.vgg16 import VGG16

commonkwargs = {"activation": 'relu','padding': 'valid'}
height = 160
width = 320
image_size = [width, height]
classes = 2

#three input layers
img_robot_input = Input(shape=(height, width, 3), name='image_robot_input')
img_classifier_input = Input(shape=(height, width, 3), name='image_classifier_input')
pos_input = Input(shape=(2,), name='position_model_input')
img_input = Input(shape=(height, width, 3), name='image_input')

#conv net and attention layers
x=Conv2D(64, (3,3), strides=(2, 2), **commonkwargs, name='block1_conv1')(img_input)
x=BatchNormalization(name='batch_norm1')(x)
x=Conv2D(64, (3,3), strides=(1, 1), **commonkwargs, name='block1_conv2')(x)
x=BatchNormalization(name='batch_norm2')(x)
x=Conv2D(32, (3,3), strides=(1, 1), **commonkwargs, name='block2_conv1')(x)
x=BatchNormalization(name='batch_norm3')(x)
x=Conv2D(32, (3,3), strides=(1, 1), **commonkwargs, name='block2_conv2')(x)
x=BatchNormalization(name='batch_norm4')(x)
x=Conv2D(16, (3,3), strides=(1, 1), **commonkwargs, name='block3_conv1')(x)
x = Lambda(softmax2d, name='image_softmax2d')(x)
x = Dropout(.25)(x)
x = Lambda(softargmax, name="expected_feature_location")(x)
feature_layer = Flatten(name='flattend_feature')(x)
sub_model = Model(img_input, feature_layer, name='feature_model')
sub_model.get_layer('block1_conv1').set_weights(VGG16(weights='imagenet', 
                                                      include_top=False).get_layer('block1_conv1').get_weights())

#classifier layers
classifier_dense1 = Dense(16, activation='relu', name='classifier_dense1')(sub_model(img_classifier_input))
classifier_dense2 = Dense(8, activation='relu', name='classifier_dense2')(classifier_dense1)
classifier_output = Dense(classes, activation='softmax', name='classifier_output')(classifier_dense2)

#robot layers
robot_dense1 = Dense(16, activation='relu', name='robot_dense1')(keras.layers.concatenate([sub_model(img_robot_input), pos_input]))
robot_dense2 = Dense(8, activation='relu', name='robot_dense2')(robot_dense1)
robot_output = Dense(1, activation='linear', name='robot_output')(robot_dense2)

model = Model([img_robot_input, pos_input, img_classifier_input], [robot_output, classifier_output])
model.compile(optimizer='adam', lr=3e-4, loss={'robot_output': 'mse', 'classifier_output': 'categorical_crossentropy'}, 
              loss_weights={'robot_output': 1, 'classifier_output': 0.5}, metrics=['accuracy'])
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
image_robot_input (InputLayer)   (None, 160, 320, 3)   0                                            
____________________________________________________________________________________________________
feature_model (Model)            (None, 32)            71824       image_classifier_input[0][0]     
                                                                   image_robot_input[0][0]          
____________________________________________________________________________________________________
position_model_input (InputLayer (None, 2)             0                                            
____________________________________________________________________________________________________
image_classifier_input (InputLay (None, 160, 320, 3)   0                                   

In [15]:
import pydot, graphviz
from keras.utils import plot_model
plot_model(model, to_file='model.png')