In [1]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Layer as KerasLayer
from tensorflow.keras.layers import InputLayer, GRU, LSTM, Bidirectional

In [2]:
class Prototype(KerasLayer):
    def __init__(self, k, **kwargs):
        """
        Parameters
        ----------
        k : int
            Number of prototype vectors to create.
        """
        super(Prototype, self).__init__(**kwargs)
        self.k = k

    def build(self, input_shape):
        print(f'Called `build` with input_shape: {input_shape}')

        # what initializer should we use?
        self.prototypes = self.add_weight(
            name='prototypes',
            shape=(1, self.k, input_shape[-1]),
            initializer='random_normal',
            trainable=True
        )

    def call(self, x):
        # add reg losses here?

        # L2 distances from prototypes
        x = tf.expand_dims(x, -2)
        d2 = tf.norm(x - self.prototypes, ord=2, axis=-1)

        # return exponentially squashed
        return tf.exp(-d2)

In [3]:
def rnn(input_shape=(None, 6),
        layer_type='lstm',
        layer_args={},
        layers=[32,64],
        dropout_rate=None):
    """
    Recurrent NN Encoder constructor function.
    One layer of `layer_type` will be created for each int in `layers`.
    All except the final recurrent layer will return sequences.
    """
    num_layers = len(layers)
    assert num_layers > 0, 'Must have at least one layer'

    layer_fn = GRU if 'gru' in layer_type.lower() else LSTM

    # Construct model
    model = Sequential([InputLayer(input_shape=input_shape)])

    for i, layer_units in enumerate(layers):
        return_seq = False if (i == (num_layers - 1)) else True

        next_layer = layer_fn(layer_units, return_sequences=return_seq, name=layer_type+str(i), **layer_args)

        model.add(Bidirectional(next_layer))

    return model

In [4]:
enc_model = rnn()
enc_model.input_shape, enc_model.output_shape

((None, None, 6), (None, 128))

In [5]:
enc_model.add(Prototype(16))
enc_model.input_shape, enc_model.output_shape

Called `build` with input_shape: (None, 128)
`x` is now: Tensor("prototype/ExpandDims:0", shape=(None, 1, 128), dtype=float32)


((None, None, 6), (None, 16))

In [6]:
enc_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, None, 64)          9984      
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               66048     
_________________________________________________________________
prototype (Prototype)        (None, 16)                2048      
Total params: 78,080
Trainable params: 78,080
Non-trainable params: 0
_________________________________________________________________


In [7]:
x = tf.convert_to_tensor(np.random.rand(10,8,6))

In [8]:
enc_model(x)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

`x` is now: [[[-0.00283582  0.01728691 -0.0740431  ...  0.07896393 -0.02567625
   -0.06429633]]

 [[ 0.00809463  0.03288684 -0.07838279 ...  0.05920749 -0.02930295
   -0.05421176]]

 [[-0.03309038  0.02737395 -0.0384291  ...  0.07808799 -0.00481567
   -0.07680376]]

 ...

 [[-0.0286273   0.03764081 -0.04810949 ...  0.06464016  0.00786803
   -0.07525987]]

 [[-0.03038388  0.02889684 -0.05141699 ...  0.06042515 -0.00281157
   -0.06354368]]

 [[-0.01513464  0.01721616 -0.06005955 ...  0.08614102 -0.02160347
   -0.07351992]]]


<tf.Tensor: id=3312, shape=(10, 16), dtype=float32, numpy=
array([[0.47671127, 0.47191462, 0.45663172, 0.48426074, 0.4807453 ,
        0.4670902 , 0.48034397, 0.45819017, 0.47455812, 0.4826773 ,
        0.5079952 , 0.47092006, 0.46108443, 0.43865827, 0.428171  ,
        0.4758821 ],
       [0.49069038, 0.4959423 , 0.47992632, 0.50275016, 0.49121913,
        0.48237574, 0.49310982, 0.47888592, 0.4844642 , 0.49402687,
        0.51913023, 0.48373684, 0.48320827, 0.45336345, 0.44548222,
        0.4957009 ],
       [0.47904727, 0.47409672, 0.46658087, 0.49456388, 0.48433197,
        0.46448818, 0.50406414, 0.45822912, 0.485006  , 0.4970835 ,
        0.5234908 , 0.4791789 , 0.47352928, 0.4408275 , 0.43995106,
        0.48136073],
       [0.48616424, 0.49252707, 0.4771477 , 0.50048476, 0.48837665,
        0.47548854, 0.5087809 , 0.47938898, 0.47887725, 0.49530175,
        0.5285961 , 0.48416895, 0.48002526, 0.44420543, 0.4444706 ,
        0.49788713],
       [0.49068144, 0.49738473, 0.4816463

## Diversity regularization

In [12]:
x = tf.convert_to_tensor([[1.,1.], [1.,2.], [2.,2.]])

r = tf.expand_dims(tf.reduce_sum(x*x, 1), -1)

D = r - 2 * tf.matmul(x, x, transpose_b=True) + tf.transpose(r)

Rd = tf.nn.relu(tf.sqrt(D) - 1.)

tf.reduce_sum(Rd) / 2.

<tf.Tensor: id=104, shape=(), dtype=float32, numpy=0.41421354>

In [11]:
x = tf.convert_to_tensor([[1.,1.], [1.,2.], [2.,2.]])

r = tf.expand_dims(tf.reduce_sum(x*x, 1), -1)

D = r - 2 * tf.matmul(x, x, transpose_b=True) + tf.transpose(r)



<tf.Tensor: id=83, shape=(3, 3), dtype=float32, numpy=
array([[-0., -1., -2.],
       [-1., -0., -1.],
       [-2., -1., -0.]], dtype=float32)>

In [7]:
x*x

<tf.Tensor: id=47, shape=(3, 2), dtype=float32, numpy=
array([[1., 1.],
       [1., 4.],
       [4., 4.]], dtype=float32)>

In [18]:
x is x

True

In [9]:
def make2D(t):
    """Make a Tensor `t` 2D, raise ValueError if impossible."""
    ndim = tf.rank(t).numpy()
    if ndim == 2:
        return t
    elif ndim == 1:
        return tf.expand_dims(t, 0)
    else:
        t = tf.squeeze(t)
        if tf.rank(t).numpy() != 2:
            raise ValueError(f'Tensor cant be made 2D: {t}')
        else:
            return t


def distance_matrix(a, b):
    """Return the distance matrix between rows of `a` and `b`

    They must both be squeezable or expand_dims-able to 2D,
    and have compatible shapes (same number of columns).
    """
    a_was_b = a is b

    a = make2D(a)
    rA = tf.expand_dims(tf.reduce_sum(a * a, -1), -1)

    if a_was_b:
        b, rB = a, rA
    else:
        b = make2D(b)
        rB = tf.expand_dims(tf.reduce_sum(b * b, -1), -1)

    D = rA - 2 * tf.matmul(a, b, transpose_b=True) + tf.transpose(rB)

    return tf.sqrt(D)

In [10]:
y = tf.convert_to_tensor([[1.,2.], [2.,2.]])

d = distance_matrix(x, y)
d

<tf.Tensor: id=69, shape=(3, 2), dtype=float32, numpy=
array([[1.       , 1.4142135],
       [0.       , 1.       ],
       [1.       , 0.       ]], dtype=float32)>

In [19]:
tf.reduce_sum(tf.reduce_min(d, 1))

<tf.Tensor: id=130, shape=(), dtype=float32, numpy=1.0>

In [31]:
x0 = tf.expand_dims(x, 0)
y1 = tf.reshape(y, [2, 1, 2])
x0, y1

(<tf.Tensor: id=173, shape=(1, 3, 2), dtype=float32, numpy=
 array([[[1., 1.],
         [1., 2.],
         [2., 2.]]], dtype=float32)>,
 <tf.Tensor: id=175, shape=(2, 1, 2), dtype=float32, numpy=
 array([[[1., 2.]],
 
        [[2., 2.]]], dtype=float32)>)

In [32]:
tf.norm(x0 - y1, ord=2, axis=-1)

<tf.Tensor: id=181, shape=(2, 3), dtype=float32, numpy=
array([[1.       , 0.       , 1.       ],
       [1.4142135, 1.       , 0.       ]], dtype=float32)>

In [35]:
distance_matrix(x0, y1)

<tf.Tensor: id=207, shape=(3, 2), dtype=float32, numpy=
array([[1.       , 1.4142135],
       [0.       , 1.       ],
       [1.       , 0.       ]], dtype=float32)>