In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import timeit
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers
import array

%load_ext tensorboard

print("Using TensorFlow version %s" % tf.__version__)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Using TensorFlow version 2.3.0


In [10]:
# padding
raw_inputs = [
    [711, 632, 71],
    [73, 8, 3215, 55, 927],
    [83, 91, 1, 645, 1253, 927],
]

# By default, this will pad using 0s; it is configurable via the
# "value" parameter.
# Note that you could "pre" padding (at the beginning) or
# "post" padding (at the end).
# We recommend using "post" padding when working with RNN layers
# (in order to be able to use the
# CuDNN implementation of the layers).
padded_inputs = tf.keras.preprocessing.sequence.pad_sequences(
    raw_inputs, padding="post"
)
print(padded_inputs)

[[ 711  632   71    0    0    0]
 [  73    8 3215   55  927    0]
 [  83   91    1  645 1253  927]]


In [12]:
# three ways of masking
# keras.layers.Masking, keras.layers.Embedding with mask_zero=True, mask argument in RNN layers

embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)
masked_output = embedding(padded_inputs)

print(masked_output)
print(masked_output._keras_mask)

# all these False entries corresponding to some given timestep are ignored

masking_layer = layers.Masking()
# Simulate the embedding lookup by expanding the 2D input to 3D,
# with embedding dimension of 10.
unmasked_embedding = tf.cast(
    tf.tile(tf.expand_dims(padded_inputs, axis=-1), [1, 1, 10]), tf.float32
)

masked_embedding = masking_layer(unmasked_embedding)
print(masked_embedding._keras_mask)

tf.Tensor(
[[[ 1.8644333e-03 -4.2547870e-02  3.1563725e-02  4.9641617e-03
   -5.5298917e-03 -4.7941137e-02  3.1341378e-02  1.6163591e-02
   -2.0095600e-02 -3.2767653e-03  9.6793175e-03  4.6932686e-02
    1.7245412e-03  2.6263747e-02 -3.7171770e-02  2.3374334e-03]
  [ 1.2230683e-02 -2.4099840e-02 -3.5484720e-02 -1.6741179e-02
    1.1204578e-02  6.8305843e-03 -3.5956550e-02 -4.1473173e-02
    4.4587795e-02  2.3508668e-03 -2.0592213e-03 -4.9225748e-02
    1.0493267e-02 -2.8892606e-04  1.7629776e-02 -2.5625562e-02]
  [-3.0196786e-02 -1.5295614e-02  2.7550314e-02  1.6206790e-02
   -3.5814047e-02 -1.8701386e-02 -1.8178247e-02 -3.6727883e-02
   -2.4148751e-02  1.1653304e-02 -2.4332022e-02  2.6472118e-02
    2.9094789e-02 -2.6530469e-02 -4.3845393e-02 -1.9853700e-02]
  [ 4.7012497e-02 -5.4558143e-03 -9.2979558e-03 -3.6697555e-02
   -4.3919802e-02  1.7334830e-02  2.9253129e-02 -1.9433126e-03
   -1.5680861e-02  2.3198929e-02 -1.0682009e-02  4.6916034e-02
    4.2791020e-02 -6.0172193e-03  3.79665

In [4]:
# Mask propagation in Functional or Sequential API
# Sequential:
model = keras.Sequential(
    [layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True), layers.LSTM(32),])

#Functional API model:
inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)
outputs = layers.LSTM(32)(x)

model = keras.Model(inputs, outputs)

In [None]:
# Passing mask tensors directly to layer
# subclassing layer.Layer example:
# pass mask producing layer o/p (layer like keras.Embedding or keras.Masking) to mask consuming layer (like LSTM)
# mask producing layer has compute_mask(input, previous_mask) method while mask consuming layer has mask argument in __call__ method

class MyLayer(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MyLayer, self).__init__(**kwargs)
        self.embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)
        self.lstm = layers.LSTM(32)

    def call(self, inputs):
        x = self.embedding(inputs)
        # Note that you could also prepare a `mask` tensor manually.
        # It only needs to be a boolean tensor
        # with the right shape, i.e. (batch_size, timesteps).
        mask = self.embedding.compute_mask(inputs)
        output = self.lstm(x, mask=mask)  # The layer will ignore the masked values
        return output


layer = MyLayer()
x = np.random.random((32, 10)) * 100
x = x.astype("int32")
layer(x)




In [25]:
# this is just a custom special layer which breaks input into 2 in temporal dimension. But here, this would mean that we need to break even the correposnding initial masking into two.
# this is what is dine below in compute_mask (this function gerenartes mask)

class TemporalSplit(keras.layers.Layer):
    """Split the input tensor into 2 tensors along the time dimension."""

    def call(self, inputs):
        # Expect the input to be 3D and mask to be 2D, split the input tensor into 2
        # subtensors along the time axis (axis 1).
        return tf.split(inputs, 2, axis=1)

    def compute_mask(self, inputs, mask=None):
        # Also split the mask into 2 if it presents.
        if mask is None:
            return None
        return tf.split(mask, 2, axis=1)

print("padded_inputs: ",padded_inputs)
print("tf.expand_dims(padded_inputs, axis=-1): ", tf.expand_dims(padded_inputs, axis=-1).shape.as_list())
print("unmasked_embedding ",unmasked_embedding)
print("masked_embedding: ",masked_embedding)
first_half, second_half = TemporalSplit()(masked_embedding)
print(first_half._keras_mask)
print(second_half._keras_mask)

padded_inputs:  [[ 711  632   71    0    0    0]
 [  73    8 3215   55  927    0]
 [  83   91    1  645 1253  927]]
tf.expand_dims(padded_inputs, axis=-1):  [3, 6, 1]
unmasked_embedding  tf.Tensor(
[[[7.110e+02 7.110e+02 7.110e+02 7.110e+02 7.110e+02 7.110e+02 7.110e+02
   7.110e+02 7.110e+02 7.110e+02]
  [6.320e+02 6.320e+02 6.320e+02 6.320e+02 6.320e+02 6.320e+02 6.320e+02
   6.320e+02 6.320e+02 6.320e+02]
  [7.100e+01 7.100e+01 7.100e+01 7.100e+01 7.100e+01 7.100e+01 7.100e+01
   7.100e+01 7.100e+01 7.100e+01]
  [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
   0.000e+00 0.000e+00 0.000e+00]
  [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
   0.000e+00 0.000e+00 0.000e+00]
  [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
   0.000e+00 0.000e+00 0.000e+00]]

 [[7.300e+01 7.300e+01 7.300e+01 7.300e+01 7.300e+01 7.300e+01 7.300e+01
   7.300e+01 7.300e+01 7.300e+01]
  [8.000e+00 8.000e+00 8.000e+00 8.000e+00 8.000e+0

In [29]:
# another example where 
class CustomEmbedding(keras.layers.Layer):
    def __init__(self, input_dim, output_dim, mask_zero=False, **kwargs):
        super(CustomEmbedding, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.mask_zero = mask_zero

    def build(self, input_shape):
        self.embeddings = self.add_weight(
            shape=(self.input_dim, self.output_dim),
            initializer="random_normal",
            dtype="float32",
        )

    def call(self, inputs):
        return tf.nn.embedding_lookup(self.embeddings, inputs)
        #return inputs

    def compute_mask(self, inputs, mask=None):
        if not self.mask_zero:
            return None
        return tf.not_equal(inputs, 0)


layer = CustomEmbedding(10, 32, mask_zero=True)
x = np.random.random((3, 10)) * 9
x = x.astype("int32")
print(x)

y = layer(x)
print(y)
mask = layer.compute_mask(x)

print(mask)

[[3 4 0 3 1 5 6 2 2 5]
 [3 7 0 2 0 2 6 6 8 7]
 [8 7 0 0 6 1 8 2 4 7]]
tf.Tensor(
[[[ 4.13454995e-02  2.81021427e-02  2.26016715e-02  6.42389283e-02
   -3.23462002e-02 -4.42980342e-02 -2.27454957e-02 -2.91165058e-02
    5.44538982e-02  8.35291576e-05  6.42639622e-02  3.31923887e-02
    3.04439403e-02  5.26229516e-02  1.24696814e-01  9.28672627e-02
   -4.00146917e-02 -3.49870659e-02 -2.29416844e-02  1.63964462e-02
    4.85911779e-02  3.69289778e-02 -3.32369166e-03 -2.82045156e-02
   -1.81165636e-02  2.23863740e-02  7.40339831e-02  6.03724131e-03
   -6.53247461e-02 -7.41132721e-02  5.03686629e-03  2.99828202e-02]
  [ 4.62367274e-02 -2.31569330e-03  1.26677454e-02 -6.43004775e-02
   -1.84597075e-02 -3.14814299e-02  2.62522455e-02 -6.01142757e-02
   -2.25735269e-02  8.46739411e-02  3.78728360e-02 -4.62870039e-02
    7.70708686e-03  4.48298175e-03  1.32348416e-02 -1.82713345e-02
   -9.80684757e-02 -4.85746516e-03 -2.42117178e-02  1.23369507e-01
    1.90173667e-02 -8.59398842e-02 -3.05462535e

In [30]:
# by default any custom layer would destroy the current mask. In order to force it not to:
# you need to set self.supports_masking=True in the constructor.

class MyActivation(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MyActivation, self).__init__(**kwargs)
        # Signal that the layer is safe for mask propagation
        self.supports_masking = True

    def call(self, inputs):
        return tf.nn.relu(inputs)
    
    
inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)
x = MyActivation()(x)  # Will pass the mask along
print("Mask found:", x._keras_mask)
outputs = layers.LSTM(32)(x)  # Will receive the mask

model = keras.Model(inputs, outputs)

Mask found: Tensor("embedding_6/NotEqual:0", shape=(None, None), dtype=bool)


In [32]:
# writing layers that do masking:
class TemporalSoftmax(keras.layers.Layer):
    def call(self, inputs, mask=None):
        broadcast_float_mask = tf.expand_dims(tf.cast(mask, "float32"), -1)
        inputs_exp = tf.exp(inputs) * broadcast_float_mask
        inputs_sum = tf.reduce_sum(
            inputs_exp * broadcast_float_mask, axis=-1, keepdims=True
        )
        return inputs_exp / inputs_sum


inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(input_dim=10, output_dim=32, mask_zero=True)(inputs)
x = layers.Dense(1)(x)
outputs = TemporalSoftmax()(x)

model = keras.Model(inputs, outputs)
y = model(np.random.randint(0, 10, size=(32, 100)), np.random.random((32, 100, 1)))