In [3]:
import keras

input_shape = (200,4)
inp = keras.layers.Input(shape=input_shape)
conv = keras.layers.Convolution1D(nb_filter=15, filter_length=15, subsample_length=1)(inp)
relu_post_conv = keras.layers.Activation("relu")(conv)
gap = keras.layers.pooling.GlobalAveragePooling1D()(relu_post_conv)
dense = keras.layers.Dense(1)(gap)
sigmoid_out = keras.layers.Activation("sigmoid")(dense)
grad_layer = keras.layers.core.Merge(mode='grads')([inp, dense])
model = keras.models.Model(input=inp, output=[sigmoid_out, grad_layer])
model.compile(optimizer="adam", loss=["binary_crossentropy", "mse"])

In [5]:
import numpy as np

num_samples = 1000
X = np.random.random([num_samples]+list(input_shape))
Y_labels = (np.random.random([num_samples, 1]) > 0.5)*1.0
Y_grads = np.random.random([num_samples]+list(input_shape))
#make a multually exclusive set of samples for the actual labels and the grads
sample_weights_labels = (np.random.random([num_samples]) > 0.5)*1.0
sample_weights_grads = 1.0 - sample_weights_labels

model.fit(x=X, y=[Y_labels, Y_grads], sample_weight=[sample_weights_labels, sample_weights_grads])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f64f9fdf690>

In [8]:
#do a sanity check to make sure that the grads are actually the grads

model_predicted_grads = model.predict(X)[1]

import theano
from theano import tensor as T
from keras import backend as K

actual_grad_func = theano.function([inp], T.grad(K.sum(dense), inp), allow_input_downcast=True)
actual_grads = actual_grad_func(X)

print(np.max(np.abs(model_predicted_grads-actual_grads)))

0.0
