In [69]:
import tensorflow as tf
import numpy as np

## tf.keras.layers.RNN
```
tf.keras.layers.RNN(
    cell,
    return_sequences=False,
    return_state=False,
    go_backwards=False,
    stateful=False,
    unroll=False,
    zero_output_for_mask=False,
    **kwargs
)
```
* args
  - cell  
    A RNN cell instance or a list of RNN cell instances
    
  - 
* Input shape:
    3-D tensor with shape `(batch_size, timesteps, features)`.

* Output shape:
    - If `return_state`: a list of tensors. The first tensor is
    the output. The remaining tensors are the last states,
    each with shape `(batch_size, state_size)`, where `state_size` could
    be a high dimension tensor shape.
    - If `return_sequences`: 3D tensor with shape
    `(batch_size, timesteps, output_size)`.

In [136]:
class MinimalRNNCell(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        # self.units = units
        self.state_size = units

    def build(self, input_shape):
        print("input_shape={}".format(input_shape))
        self.kernel = self.add_weight(shape=(input_shape[-1], self.state_size),
                                        initializer='uniform',
                                        name='kernel')
        self.recurrent_kernel = self.add_weight(
            shape=(self.state_size, self.state_size),
            initializer='uniform',
            name='recurrent_kernel')
        self.built = True

    def call(self, inputs, states):
        prev_output = states[0]
        h = tf.keras.ops.matmul(inputs, self.kernel)
        output = h + tf.keras.ops.matmul(prev_output, self.recurrent_kernel)
        return output, [output]

In [137]:
cell = MinimalRNNCell(32)
x = tf.keras.Input(shape=(None,5))
layer = tf.keras.layers.RNN(cell)
y = layer(x)
model = tf.keras.Model(inputs=x,outputs=y)
model.compile(optimizer="adam", loss="mse")
model.summary()

input_shape=(None, 5)


In [138]:
data = tf.random.normal((3,2,5))
print(data.shape)
output = model.predict(x=data)
output.shape

(3, 2, 5)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 601ms/step


(3, 32)

In [85]:
weights, bias = layer.get_weights()
tf.print(weights.shape)
tf.print(bias.shape)

(5, 32)
(32, 32)


In [109]:
cells = [MinimalRNNCell(32), MinimalRNNCell(64)]
x = tf.keras.Input((None, 5))
layer = tf.keras.layers.RNN(cells)
y = layer(x)
model = tf.keras.Model(inputs=x, outputs=y)

input_shape=(None, 5)
input_shape=(None, 32)


In [None]:
data = tf.random.normal((3,2,5))
print(data.shape)
output = model.predict(x=data)
output.shape

## tf.keras.layers.LSTMCell
```
    tf.keras.layers.LSTMCell(
        units,
        activation='tanh',
        recurrent_activation='sigmoid',
        use_bias=True,
        kernel_initializer='glorot_uniform',
        recurrent_initializer='orthogonal',
        bias_initializer='zeros',
        unit_forget_bias=True,
        kernel_regularizer=None,
        recurrent_regularizer=None,
        bias_regularizer=None,
        kernel_constraint=None,
        recurrent_constraint=None,
        bias_constraint=None,
        dropout=0.0,
        recurrent_dropout=0.0,
        seed=None,
        **kwargs
    )
    Call arguments:
        inputs: A 2D tensor, with shape `(batch, features)`.
        states: A 2D tensor with shape `(batch, units)`, which is the state
            from the previous time step.
        training: Python boolean indicating whether the layer should behave in
            training mode or in inference mode. Only relevant when `dropout` or
            `recurrent_dropout` is used.
    权重
        kernel shape =(input_dim, self.units * 4)
        recurrent_kernel shape=(self.units, self.units * 4)
        bias shape =(self.units * 4,)
```

In [60]:
x = np.random.random((2,10,8))
rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(3))
rnn(x)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 0.552522  , -0.38236696, -0.2097214 ],
       [ 0.57467663, -0.38092893, -0.23813564]], dtype=float32)>

In [68]:
rnn2 = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(3), return_sequences=False, return_state=True)
final_output,state_h,state_c = rnn2(x)

In [112]:
tf.print(final_output)
tf.print(state_h)
tf.print(state_c)

[[0.3621279 -0.396188468 -0.0788854212]
 [0.438182026 -0.407655537 0.00722320285]]
[[0.3621279 -0.396188468 -0.0788854212]
 [0.438182026 -0.407655537 0.00722320285]]
[[1.13227391 -0.673898697 -0.130918488]
 [1.05494821 -0.825320661 0.0146492561]]


In [118]:
# kernel shape =(input_dim, self.units * 4)
# recurrent_kernel shape=(self.units, self.units * 4)
# bias shape =(self.units * 4,)
kernel,recurrent_kernel,bias = rnn.get_weights()
print(kernel.shape)
print(recurrent_kernel.shape)
print(bias.shape)

(8, 12)
(3, 12)
(12,)


## tf.keras.layers.LSTM
LSTM继承了tf.keras.layers.RNN


In [127]:
inputs = np.random.random((2, 3, 4))
lstm = tf.keras.layers.LSTM(5)
output = lstm(inputs)
print(output.shape)

lstm = tf.keras.layers.LSTM(5, return_sequences=True, return_state=True)
whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
print(whole_seq_output.shape)
print(final_memory_state.shape)
print(final_carry_state.shape)


(2, 5)
(2, 3, 5)
(2, 5)
(2, 5)


In [128]:
kernel_lstm, recurrent_kernel_lstm, bias_lstm = lstm.get_weights()
print(kernel_lstm.shape)
print(recurrent_kernel_lstm.shape)
print(bias_lstm.shape)

(4, 20)
(5, 20)
(20,)


## tf.keras.layers.GRUCell
```
    def __init__(
        self,
        units,
        activation="tanh",
        recurrent_activation="sigmoid",
        use_bias=True,
        kernel_initializer="glorot_uniform",
        recurrent_initializer="orthogonal",
        bias_initializer="zeros",
        kernel_regularizer=None,
        recurrent_regularizer=None,
        bias_regularizer=None,
        kernel_constraint=None,
        recurrent_constraint=None,
        bias_constraint=None,
        dropout=0.0,
        recurrent_dropout=0.0,
        reset_after=True,
        seed=None,
        **kwargs,
    )
    Call arguments:
        inputs: A 2D tensor, with shape `(batch, features)`.
        states: A 2D tensor with shape `(batch, units)`, which is the state
            from the previous time step.
        training: Python boolean indicating whether the layer should behave in
            training mode or in inference mode. Only relevant when `dropout` or
            `recurrent_dropout` is used.

```

In [121]:
inputs = np.random.random((2,3,4))
gru = tf.keras.layers.GRUCell(5)
rnn1 = tf.keras.layers.RNN(gru)
rnn1(inputs)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[-0.14473268,  0.28370684, -0.14832309,  0.12082352, -0.22530709],
       [-0.31083363,  0.26357356, -0.38767987, -0.28236735,  0.14880402]],
      dtype=float32)>

In [122]:
rnn2 = tf.keras.layers.RNN(gru,return_sequences=True,return_state=True)

In [123]:
rnn2(inputs)

(<tf.Tensor: shape=(2, 3, 5), dtype=float32, numpy=
 array([[[-0.16979632,  0.17237757, -0.18382923, -0.20845969,
           0.01840987],
         [-0.14016089,  0.1289713 , -0.1722104 , -0.03224351,
           0.03292613],
         [-0.14473268,  0.28370684, -0.14832309,  0.12082352,
          -0.22530709]],
 
        [[-0.12160186,  0.16266301, -0.17113881, -0.15289839,
           0.00720164],
         [-0.22395606,  0.30100536, -0.3007007 , -0.22977392,
           0.02138322],
         [-0.31083363,  0.26357356, -0.38767987, -0.28236735,
           0.14880402]]], dtype=float32)>,
 <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
 array([[-0.14473268,  0.28370684, -0.14832309,  0.12082352, -0.22530709],
        [-0.31083363,  0.26357356, -0.38767987, -0.28236735,  0.14880402]],
       dtype=float32)>)

In [126]:
kernel_gru, recurrent_kernel_gru, bias_gru = rnn2.get_weights()
print(kernel_gru.shape)
print(recurrent_kernel_gru.shape)
# separate biases for input and recurrent kernels
print(bias_gru.shape)

(4, 15)
(5, 15)
(2, 15)


## tf.keras.layers.GRU
继承了tf.keras.layers.RNN

In [135]:
inputs = np.random.random((2, 10, 8))
gru1 = tf.keras.layers.GRU(4)
output = gru1(inputs)
output.shape

gru2 = tf.keras.layers.GRU(4, return_sequences=True, return_state=False)
whole_sequence_output = gru2(inputs)
print(whole_sequence_output.shape)
#print(final_state.shape)


(2, 10, 4)


In [133]:
whole_sequence_output

<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
array([[ 0.42748457,  0.55501944,  0.2937019 , -0.06797385],
       [-0.03309484,  0.47437924,  0.29864812, -0.5828115 ]],
      dtype=float32)>

In [134]:
final_state

<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
array([[ 0.42748457,  0.55501944,  0.2937019 , -0.06797385],
       [-0.03309484,  0.47437924,  0.29864812, -0.5828115 ]],
      dtype=float32)>

## tf.keras.layers.Bidirectional
```
tf.keras.layers.Bidirectional(
    layer,
    merge_mode='concat',
    weights=None,
    backward_layer=None,
    **kwargs
)

def call(
    self,
    sequences,
    initial_state=None,
    mask=None,
    training=None,
)

Args:
    layer: `keras.layers.RNN` instance, such as
        `keras.layers.LSTM` or `keras.layers.GRU`.
        It could also be a `keras.layers.Layer` instance
        that meets the following criteria:
        1. Be a sequence-processing layer (accepts 3D+ inputs).
        2. Have a `go_backwards`, `return_sequences` and `return_state`
        attribute (with the same semantics as for the `RNN` class).
        3. Have an `input_spec` attribute.
        4. Implement serialization via `get_config()` and `from_config()`.
        Note that the recommended way to create new RNN layers is to write a
        custom RNN cell and use it with `keras.layers.RNN`, instead of
        subclassing `keras.layers.Layer` directly.
        When `return_sequences` is `True`, the output of the masked
        timestep will be zero regardless of the layer's original
        `zero_output_for_mask` value.
    merge_mode: Mode by which outputs of the forward and backward RNNs
        will be combined. One of `{"sum", "mul", "concat", "ave", None}`.
        If `None`, the outputs will not be combined,
        they will be returned as a list. Defaults to `"concat"`.
    backward_layer: Optional `keras.layers.RNN`,
        or `keras.layers.Layer` instance to be used to handle
        backwards input processing.
        If `backward_layer` is not provided, the layer instance passed
        as the `layer` argument will be used to generate the backward layer
        automatically.
        Note that the provided `backward_layer` layer should have properties
        matching those of the `layer` argument, in particular
        it should have the same values for `stateful`, `return_states`,
        `return_sequences`, etc. In addition, `backward_layer`
        and `layer` should have different `go_backwards` argument values.
        A `ValueError` will be raised if these requirements are not met.

Call arguments:
    The call arguments for this layer are the same as those of the
    wrapped RNN layer. Beware that when passing the `initial_state`
    argument during the call of this layer, the first half in the
    list of elements in the `initial_state` list will be passed to
    the forward RNN call and the last half in the list of elements
    will be passed to the backward RNN call.

Note: instantiating a `Bidirectional` layer from an existing RNN layer
instance will not reuse the weights state of the RNN layer instance -- the
`Bidirectional` layer will have freshly initialized weights.

```

In [151]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(5,10)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(6,return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(5,return_sequences=False)),
    tf.keras.layers.Dense(4, activation="softmax")
])
model.compile(optimizer="rmsprop", loss="Dice")
model.summary()
x=np.random.random((2,5,10))
model.predict(x)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


array([[0.28301755, 0.26528645, 0.21479882, 0.23689722],
       [0.27503747, 0.26902932, 0.2097223 , 0.2462109 ]], dtype=float32)

In [172]:
forward_layer = tf.keras.layers.LSTM(5,return_sequences=True)
backward_layer = tf.keras.layers.LSTM(5, return_sequences=True, go_backwards=True)
model = tf.keras.Sequential([
    tf.keras.layers.Input((None,5)),
    tf.keras.layers.Bidirectional(forward_layer, backward_layer=backward_layer),
    tf.keras.layers.Dense(4, activation='softmax')
])
model.compile(optimizer="adam", loss="mse")
model.summary()

In [176]:
try:
    x1 = np.random.random((3,5))
    x2 = np.random.random((3,5))
    x = [x1,x2]
    print(x)
    x = np.stack([x1,x2], axis=0)
    print(x.shape)
    model.predict(x)
except ValueError as e:
    print("error", e)

[array([[0.65419205, 0.76972482, 0.51887192, 0.00993751, 0.01756697],
       [0.03755188, 0.92278993, 0.80396395, 0.05055183, 0.08868134],
       [0.78110225, 0.95311067, 0.43463611, 0.40639844, 0.77907658]]), array([[0.66915554, 0.82877664, 0.55386778, 0.22147299, 0.86445864],
       [0.97277951, 0.84327059, 0.81434158, 0.1252304 , 0.55577839],
       [0.3836112 , 0.16558977, 0.28735023, 0.83252969, 0.51942957]])]
(2, 3, 5)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
