In [38]:
from keras.layers import Dense, Input, LSTM
from keras.models import Model
from keras.layers.wrappers import TimeDistributed, Bidirectional
from tensorflow.keras import backend as K
from stochastic_rnn_keras import StochasticLSTM
import numpy as np
import tensorflow as tf
from keras.callbacks import Callback

In [2]:
%load_ext tensorboard

In [3]:
class ModelPrintDropout(Callback):
    def on_epoch_end(self, epoch, logs=None):
        for layer in self.model.layers:
            if hasattr(layer, 'dropout'):
                if layer.dropout == 1.0:
                    print(layer.name, 'dropout =', layer.p.numpy())
                else:
                    print(layer.name, 'dropout =', layer.p)
            if hasattr(layer, 'recurrent_dropout'):
                if layer.recurrent_dropout == 1.0:
                    print(layer.name, 'recurrent_dropout =', layer.p_r.numpy())
                else:
                    print(layer.name, 'recurrent_dropout =', layer.p_r)

print_dropout = ModelPrintDropout()

# Add extra output head

In [4]:
inputs = Input(shape=(None, 10), name='input')
rnn = StochasticLSTM(200, dropout=1.0, recurrent_dropout=1.0)(inputs)
comp = Dense(60, activation='relu', name='encoder',kernel_regularizer='l2', bias_regularizer='l2')(rnn)
outputs = Dense(5, activation='tanh', name='mean',kernel_regularizer='l2', bias_regularizer='l2')(comp)
model = Model(inputs=inputs, outputs=outputs)

In [5]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, None, 10)          0         
_________________________________________________________________
stochastic_lstm_1 (Stochasti (None, 200)               168802    
_________________________________________________________________
encoder (Dense)              (None, 60)                12060     
_________________________________________________________________
mean (Dense)                 (None, 5)                 305       
Total params: 181,167
Trainable params: 181,167
Non-trainable params: 0
_________________________________________________________________


In [6]:
model.compile(optimizer='adam', loss='mse')

In [7]:
outputs2 = Dense(5, activation='tanh', name='log_var',kernel_regularizer='l2', bias_regularizer='l2')(model.layers[-2].output)
model2 = Model(inputs=model.input, outputs=[model.output, outputs2])

In [8]:
model2.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, None, 10)     0                                            
__________________________________________________________________________________________________
stochastic_lstm_1 (StochasticLS (None, 200)          168802      input[0][0]                      
__________________________________________________________________________________________________
encoder (Dense)                 (None, 60)           12060       stochastic_lstm_1[0][0]          
__________________________________________________________________________________________________
mean (Dense)                    (None, 5)            305         encoder[0][0]                    
____________________________________________________________________________________________

In [9]:
v = np.random.randn(3, 20, 10).astype('float32')

In [10]:
X = np.random.randn(100, 20, 10).astype('float32')
y = np.mean((np.sin(X) + np.cos(X)) @ np.eye(10, 5), axis=1)

In [11]:
model.predict(v)

array([[ 0.10506961,  0.08045112, -0.07071632,  0.00119417, -0.03733012],
       [-0.03830958,  0.00890239,  0.01948961,  0.00721035,  0.00987304],
       [ 0.03608369,  0.16203481,  0.00865258,  0.02871969, -0.0263401 ]],
      dtype=float32)

In [12]:
model2.predict(v)

[array([[ 0.10506961,  0.08045112, -0.07071632,  0.00119417, -0.03733012],
        [-0.03830958,  0.00890239,  0.01948961,  0.00721035,  0.00987304],
        [ 0.03608369,  0.16203481,  0.00865258,  0.02871969, -0.0263401 ]],
       dtype=float32),
 array([[-0.03209821,  0.05664485, -0.03966241, -0.01880117, -0.03987407],
        [ 0.01455253, -0.06375345,  0.05477182, -0.01301498, -0.08147104],
        [-0.00689477, -0.04546592,  0.08746171, -0.04841   , -0.06112893]],
       dtype=float32)]

In [14]:
model.fit(x=X, y=y, epochs=5, callbacks=[print_dropout])

Epoch 1/5
stochastic_lstm_1 dropout = 0.42584634
stochastic_lstm_1 recurrent_dropout = 0.5724356
Epoch 2/5
stochastic_lstm_1 dropout = 0.42202845
stochastic_lstm_1 recurrent_dropout = 0.56844944
Epoch 3/5
stochastic_lstm_1 dropout = 0.41823038
stochastic_lstm_1 recurrent_dropout = 0.5644913
Epoch 4/5
stochastic_lstm_1 dropout = 0.41445062
stochastic_lstm_1 recurrent_dropout = 0.5605382
Epoch 5/5
stochastic_lstm_1 dropout = 0.41072127
stochastic_lstm_1 recurrent_dropout = 0.5566091


<keras.callbacks.callbacks.History at 0x64314a290>

# Loss function with 2 outputs

In [15]:
target_input = Input(shape=(5,))
mean_pred, log_var_pred = model2.output
model3 = Model(inputs=[model2.input, target_input], outputs=model2.output)
model3.add_loss(0.5 * K.mean(K.exp(-log_var_pred) * (mean_pred - target_input)**2 + log_var_pred))
model3.compile(optimizer='adam')

  'be expecting any data to be passed to {0}.'.format(name))
  'be expecting any data to be passed to {0}.'.format(name))


In [16]:
model2.predict(v)

[array([[0.6688282 , 0.59050834, 0.5649838 , 0.6878786 , 0.6550612 ],
        [0.70221424, 0.5707112 , 0.7092871 , 0.67028546, 0.63442016],
        [0.7387352 , 0.67431253, 0.7245059 , 0.6961546 , 0.7112113 ]],
       dtype=float32),
 array([[-0.15367752,  0.17192887, -0.33005428, -0.3917558 , -0.21335055],
        [-0.11235338,  0.01655272, -0.12579389, -0.2817375 , -0.3511674 ],
        [-0.17450507,  0.13017642, -0.15992749, -0.2954721 , -0.34417424]],
       dtype=float32)]

In [17]:
model3.fit(x=[X, y], epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.callbacks.History at 0x6444c2d50>

In [18]:
model3.predict([X, y])[0][:3]

array([[0.70690054, 0.6415864 , 0.85384536, 0.74387   , 0.731544  ],
       [0.636916  , 0.62277   , 0.8279238 , 0.6629631 , 0.6979196 ],
       [0.717683  , 0.65856576, 0.8336605 , 0.67078227, 0.7173283 ]],
      dtype=float32)

In [19]:
model2.predict(X)[0][:3]

array([[0.70690054, 0.6415864 , 0.85384536, 0.74387   , 0.731544  ],
       [0.636916  , 0.62277   , 0.8279238 , 0.6629631 , 0.6979196 ],
       [0.717683  , 0.65856576, 0.8336605 , 0.67078227, 0.7173283 ]],
      dtype=float32)

# Sample from model several times

In [20]:
from keras.layers.wrappers import TimeDistributed
from keras.layers import Lambda

In [21]:
def get_mc_model(model, sample:int=10):
    td_input = Lambda(lambda x: K.tile(K.expand_dims(x, axis=1), [1, sample, 1, 1]), name='expand_input')(model.input)
    return Model(inputs=model.input, outputs=[TimeDistributed(Model(inputs=model.input, outputs=output))(td_input) for output in model.outputs])

In [22]:
model4 = get_mc_model(model2)

In [23]:
model4.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, None, 10)     0                                            
__________________________________________________________________________________________________
expand_input (Lambda)           (None, 10, None, 10) 0           input[0][0]                      
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 10, 5)        181167      expand_input[0][0]               
__________________________________________________________________________________________________
time_distributed_2 (TimeDistrib (None, 10, 5)        181167      expand_input[0][0]               
Total params: 181,472
Trainable params: 181,472
Non-trainable params: 0
____________________

In [24]:
%%timeit
model2.predict(v)
model2.predict(v)
model2.predict(v)
model2.predict(v)

51.1 ms ± 13.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%%timeit
model4.predict(v)

59.3 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
np.allclose(model2.predict(v)[0], model4.predict(v)[0][:,0,:])

True

- Using TimeDistributed is faster, at the cost of complexity
- Aggregate samples on axis=1