In [48]:
import tensorflow as tf
import numpy as np

## tf.keras.layers.Embedding
tf.keras.layers.Embedding(
    input_dim,
    output_dim,
    embeddings_initializer='uniform',
    embeddings_regularizer=None,
    embeddings_constraint=None,
    mask_zero=False,
    lora_rank=None,
    **kwargs
)

Args:

        input_dim: Integer. Size of the vocabulary,
            i.e. maximum integer index + 1.

        output_dim: Integer. Dimension of the dense embedding.

        embeddings_initializer: Initializer for the `embeddings`
            matrix (see `keras.initializers`).

        embeddings_regularizer: Regularizer function applied to
            the `embeddings` matrix (see `keras.regularizers`).

        embeddings_constraint: Constraint function applied to
            the `embeddings` matrix (see `keras.constraints`).

        mask_zero: Boolean, whether or not the input value 0 is a special
            "padding" value that should be masked out.
            This is useful when using recurrent layers which
            may take variable length input. If this is `True`,
            then all subsequent layers in the model need
            to support masking or an exception will be raised.
            If mask_zero is set to True, as a consequence,
            index 0 cannot be used in the vocabulary (input_dim should
            equal size of vocabulary + 1).
            
        lora_rank: Optional integer. If set, the layer's forward pass
            will implement LoRA (Low-Rank Adaptation)
            with the provided rank. LoRA sets the layer's embeddings
            matrix to non-trainable and replaces it with a delta over the
            original matrix, obtained via multiplying two lower-rank
            trainable matrices. This can be useful to reduce the
            computation cost of fine-tuning large embedding layers.
            You can also enable LoRA on an existing
            `Embedding` layer by calling `layer.enable_lora(rank)`.

Input shape:

    2D tensor with shape: `(batch_size, input_length)`.

Output shape:

    3D tensor with shape: `(batch_size, input_length, output_dim)`.

In [49]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(1000,32))
model.compile("rmsprop","mse")
x = np.random.randint(1000,size=(2,10))
y = model.predict(x)
print(y.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
(2, 10, 32)


In [50]:
print(type(y))

<class 'numpy.ndarray'>


## tf.keras.layers.Dense
    tf.keras.layers.Dense(
        units,
        activation=None,
        use_bias=True,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None,
        lora_rank=None,
        **kwargs
    )
    `Dense` implements the operation:
        `output = activation(dot(input, kernel) + bias)`
        where `activation` is the element-wise activation function
        passed as the `activation` argument, `kernel` is a weights matrix
        created by the layer, and `bias` is a bias vector created by the layer
        (only applicable if `use_bias` is `True`).

        Note: If the input to the layer has a rank greater than 2, `Dense`
        computes the dot product between the `inputs` and the `kernel` along the
        last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
        For example, if input has dimensions `(batch_size, d0, d1)`, then we create
        a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2
        of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are
        `batch_size * d0` such sub-tensors). The output in this case will have
        shape `(batch_size, d0, units)`.

    Args:
        units: Positive integer, dimensionality of the output space.
        activation: Activation function to use.
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        use_bias: Boolean, whether the layer uses a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix.
        bias_initializer: Initializer for the bias vector.
        kernel_regularizer: Regularizer function applied to
            the `kernel` weights matrix.
        bias_regularizer: Regularizer function applied to the bias vector.
        activity_regularizer: Regularizer function applied to
            the output of the layer (its "activation").
        kernel_constraint: Constraint function applied to
            the `kernel` weights matrix.
        bias_constraint: Constraint function applied to the bias vector.
        lora_rank: Optional integer. If set, the layer's forward pass
            will implement LoRA (Low-Rank Adaptation)
            with the provided rank. LoRA sets the layer's kernel
            to non-trainable and replaces it with a delta over the
            original kernel, obtained via multiplying two lower-rank
            trainable matrices. This can be useful to reduce the
            computation cost of fine-tuning large dense layers.
            You can also enable LoRA on an existing
            `Dense` layer by calling `layer.enable_lora(rank)`.

    Input shape:
        N-D tensor with shape: `(batch_size, ..., input_dim)`.
        The most common situation would be
        a 2D input with shape `(batch_size, input_dim)`.

    Output shape:
        N-D tensor with shape: `(batch_size, ..., units)`.
        For instance, for a 2D input with shape `(batch_size, input_dim)`,
        the output would have shape `(batch_size, units)`.

In [51]:
x = np.arange(12).reshape(2,3,2)
dense = tf.keras.layers.Dense(units=4)
y = dense(x)
weights, bias = dense.get_weights()
tf.print(weights)
tf.print(bias)
y

array([[-0.64172983, -0.03486204,  0.34107637, -0.66875577],
       [ 0.28248692,  0.64448667, -0.7345898 ,  0.55690575]],
      dtype=float32)
array([0., 0., 0., 0.], dtype=float32)


<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[ 0.28248692,  0.64448667, -0.7345898 ,  0.55690575],
        [-0.43599892,  1.8637359 , -1.5216167 ,  0.3332057 ],
        [-1.1544847 ,  3.0829852 , -2.3086436 ,  0.10950565]],

       [[-1.8729706 ,  4.3022346 , -3.0956707 , -0.11419439],
        [-2.5914564 ,  5.5214834 , -3.882697  , -0.33789444],
        [-3.3099422 ,  6.740733  , -4.6697245 , -0.5615945 ]]],
      dtype=float32)>

In [52]:
x @ weights + bias

array([[[ 0.28248692,  0.64448667, -0.73458982,  0.55690575],
        [-0.43599892,  1.86373591, -1.5216167 ,  0.3332057 ],
        [-1.15448475,  3.08298516, -2.30864358,  0.10950565]],

       [[-1.87297058,  4.30223441, -3.09567046, -0.11419439],
        [-2.59145641,  5.52148366, -3.88269734, -0.33789444],
        [-3.30994225,  6.74073291, -4.66972423, -0.56159449]]])

In [53]:
tf.matmul(tf.convert_to_tensor(x,dtype=tf.float32),tf.convert_to_tensor(weights,dtype=tf.float32))+ tf.convert_to_tensor(bias,dtype=tf.float32)

<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[ 0.28248692,  0.64448667, -0.7345898 ,  0.55690575],
        [-0.43599892,  1.8637359 , -1.5216167 ,  0.3332057 ],
        [-1.1544847 ,  3.0829852 , -2.3086436 ,  0.10950565]],

       [[-1.8729706 ,  4.3022346 , -3.0956707 , -0.11419439],
        [-2.5914564 ,  5.5214834 , -3.882697  , -0.33789444],
        [-3.3099422 ,  6.740733  , -4.6697245 , -0.5615945 ]]],
      dtype=float32)>

## tf.keras.layers.Activation
Inherits From: Layer, Operation

tf.keras.layers.Activation(
    activation, **kwargs
)

Args

activation	Activation function. It could be a callable, or the name of an activation from the keras.activations namespace.

**kwargs	Base layer keyword arguments, such as name and dtype.

In [54]:
layer = tf.keras.layers.Activation('relu')
layer

<Activation name=activation, built=False>

In [55]:
x = tf.constant([-3.0, -1.0, 0.0, 2.0], dtype=tf.float32)
layer(x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 2.], dtype=float32)>

In [56]:
layer = tf.keras.layers.Activation(tf.keras.activations.elu)
layer(x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-0.95021296, -0.63212055,  0.        ,  2.        ], dtype=float32)>

In [57]:
tf.where(x<0, tf.math.exp(x)-1, x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-0.95021296, -0.63212055,  0.        ,  2.        ], dtype=float32)>

## tf.keras.layers.Dropout

tf.keras.layers.Dropout(
    rate, noise_shape=None, seed=None, **kwargs
)

    The `Dropout` layer randomly sets input units to 0 with a frequency of
    `rate` at each step during training time, which helps prevent overfitting.
    Inputs not set to 0 are scaled up by `1 / (1 - rate)` such that the sum over
    all inputs is unchanged.

    Note that the `Dropout` layer only applies when `training` is set to `True`
    in `call()`, such that no values are dropped during inference.
    When using `model.fit`, `training` will be appropriately set to `True`
    automatically. In other contexts, you can set the argument explicitly
    to `True` when calling the layer.

    (This is in contrast to setting `trainable=False` for a `Dropout` layer.
    `trainable` does not affect the layer's behavior, as `Dropout` does
    not have any variables/weights that can be frozen during training.)

    Args:
        rate: Float between 0 and 1. Fraction of the input units to drop.
        noise_shape: 1D integer tensor representing the shape of the
            binary dropout mask that will be multiplied with the input.
            For instance, if your inputs have shape
            `(batch_size, timesteps, features)` and
            you want the dropout mask to be the same for all timesteps,
            you can use `noise_shape=(batch_size, 1, features)`.
        seed: A Python integer to use as random seed.

    Call arguments:
        inputs: Input tensor (of any rank).
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (doing nothing).

In [58]:
batch_size=2
time_step=3
num_feature=5
input = np.random.random((batch_size,time_step,num_feature))
dp1 = tf.keras.layers.Dropout(0.2)
dp2 = tf.keras.layers.Dropout(0.2, noise_shape=[None,1,num_feature])
y1 = dp1(input, training=True)
y2 = dp2(input, training=True)

In [59]:
for i in range(batch_size):
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>", i)
    print("input")
    print(input[i])
    print("y1")
    print(y1[i])
    print("y2")
    print(y2[i])

>>>>>>>>>>>>>>>>>>>>>>>>>>>> 0
input
[[0.96275613 0.32299979 0.25035313 0.35506693 0.84324772]
 [0.80321076 0.06097186 0.06952449 0.32996631 0.71498446]
 [0.80810178 0.39748406 0.40057398 0.46095772 0.59467154]]
y1
tf.Tensor(
[[1.2034452  0.4037497  0.         0.44383365 1.0540596 ]
 [1.0040134  0.07621483 0.0869056  0.41245788 0.8937306 ]
 [1.0101272  0.49685508 0.50071746 0.57619715 0.7433394 ]], shape=(3, 5), dtype=float32)
y2
tf.Tensor(
[[1.2034452  0.4037497  0.3129414  0.44383365 0.        ]
 [1.0040134  0.07621483 0.0869056  0.41245788 0.        ]
 [1.0101272  0.49685508 0.50071746 0.57619715 0.        ]], shape=(3, 5), dtype=float32)
>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1
input
[[0.01848825 0.13996816 0.06732699 0.60518559 0.92230018]
 [0.88649941 0.3624613  0.17512831 0.32605931 0.63917347]
 [0.21375652 0.88997229 0.5282882  0.68848123 0.40353503]]
y1
tf.Tensor(
[[0.02311032 0.         0.         0.         1.1528752 ]
 [1.1081243  0.45307663 0.2189104  0.40757415 0.7989668 ]
 [0.2671

## tf.keras.layers.AlphaDropout


## tf.keras.layers.SpatialDropout2D
AlphaDropout是一个更加强大的Dropout，它强大在两个地方：

1. 均值和方差不变 （普通的Dropout在Dropout之后，可能这一层激活值的分布就发生变化了，但AlphaDropout不会）

2. 归一化性质也不变（因为均值和方差不变，所以归一化性质也不变，有了这个性质之后，这个Dropout就可以和批归一化、selu在一块使用，因为它不会导致分布发生变化）

tf.keras.layers.SpatialDropout2D(
    rate, data_format=None, seed=None, name=None, dtype=None
)

This version performs the same function as Dropout, however, it drops
    entire 2D feature maps instead of individual elements. If adjacent pixels
    within feature maps are strongly correlated (as is normally the case in
    early convolution layers) then regular dropout will not regularize the
    activations and will otherwise just result in an effective learning rate
    decrease. In this case, `SpatialDropout2D` will help promote independence
    between feature maps and should be used instead.

    Args:
        rate: Float between 0 and 1. Fraction of the input units to drop.
        data_format: `"channels_first"` or `"channels_last"`.
            In `"channels_first"` mode, the channels dimension (the depth)
            is at index 1, in `"channels_last"` mode is it at index 3.
            It defaults to the `image_data_format` value found in your
            Keras config file at `~/.keras/keras.json`.
            If you never set it, then it will be `"channels_last"`.

    Call arguments:
        inputs: A 4D tensor.
        training: Python boolean indicating whether the layer
            should behave in training mode (applying dropout)
            or in inference mode (pass-through).

    Input shape:
        4D tensor with shape: `(samples, channels, rows, cols)` if
            data_format='channels_first'
        or 4D tensor with shape: `(samples, rows, cols, channels)` if
            data_format='channels_last'.

    Output shape: Same as input.

    Reference:

    - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280)

In [60]:
x = np.random.random((1,2,2,3))
x = tf.constant(x, dtype=tf.float32)
sdropout = tf.keras.layers.SpatialDropout2D(0.4)
y = sdropout(x, training=True)
tf.print(y)

[[[[0.616599381 1.60801017 1.32021952]
   [0.570951164 0.672822714 1.04828417]]

  [[0.0512940101 1.58575416 0.90588516]
   [1.32246041 0.097947225 0.280347347]]]]


## tf.keras.layers.SpatialDropout1D
Input shape

3D tensor with shape: (samples, timesteps, channels)

## tf.keras.layers.SpatialDropout3D
Input shape

5D tensor with shape: (samples, channels, dim1, dim2, dim3) if data_format='channels_first' or 5D tensor with shape: (samples, dim1, dim2, dim3, channels) if data_format='channels_last'.

## tf.keras.layers.Flatten

* tf.keras.layers.Flatten(
    data_format=None, **kwargs
)

- Flattens the input. Does not affect the batch size.

    Note: If inputs are shaped `(batch,)` without a feature axis, then
    flattening adds an extra channel dimension and output shape is `(batch, 1)`.

    Args:
    
        data_format: A string, one of `"channels_last"` (default) or
            `"channels_first"`. The ordering of the dimensions in the inputs.
            `"channels_last"` corresponds to inputs with shape
            `(batch, ..., channels)` while `"channels_first"` corresponds to
            inputs with shape `(batch, channels, ...)`.
            When unspecified, uses `image_data_format` value found in your Keras
            config file at `~/.keras/keras.json` (if exists). Defaults to
            `"channels_last"`.

In [61]:
x = tf.keras.Input(shape=(10,64))
y = tf.keras.layers.Flatten()(x)
y

<KerasTensor shape=(None, 640), dtype=float32, sparse=None, name=keras_tensor_9>

## tf.keras.layers.Reshape
* tf.keras.layers.Reshape(
    target_shape, **kwargs
)

Layer that reshapes inputs into the given shape.

* Args:
        target_shape: Target shape. Tuple of integers, does not include the
            samples dimension (batch size).

* Input shape:
        Arbitrary, although all dimensions in the input shape must be
        known/fixed. Use the keyword argument `input_shape` (tuple of integers,
        does not include the samples/batch size axis) when using this layer as
        the first layer in a model.

* Output shape:
    `(batch_size, *target_shape)`

In [62]:
x = np.random.random((2,12))
x = tf.constant(x, dtype=tf.float32)
y = tf.keras.layers.Reshape(target_shape=(3,4))(x)
y

<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[0.6452224 , 0.06654943, 0.9695012 , 0.44724554],
        [0.2576106 , 0.6819731 , 0.8684523 , 0.6802403 ],
        [0.20798987, 0.6424835 , 0.6363279 , 0.2933764 ]],

       [[0.9978972 , 0.761238  , 0.3668906 , 0.11984321],
        [0.5717654 , 0.02247417, 0.47233447, 0.7455886 ],
        [0.8301009 , 0.90731865, 0.07383701, 0.16560638]]], dtype=float32)>

In [63]:
y = tf.keras.layers.Reshape(target_shape=(-1,2,2))(x)
y.shape

TensorShape([2, 3, 2, 2])

In [64]:
x = tf.keras.Input(shape=(12,))
y = tf.keras.layers.Reshape(target_shape=(-1,2,3))(x)
y.shape

(None, 2, 2, 3)

## tf.keras.layers.Concatenate
    It takes as input a list of tensors, all of the same shape except
    for the concatenation axis, and returns a single tensor that is the
    concatenation of all inputs.

* tf.keras.layers.Concatenate(axis=-1, **kwargs)
* Args:
        axis: Axis along which to concatenate.
        **kwargs: Standard layer keyword arguments.

* Returns:
        A tensor, the concatenation of the inputs alongside axis `axis`.



In [65]:
x1 = np.arange(20).reshape(2,2,5)
x2 = np.arange(20,30).reshape(2,1,5)
y = tf.keras.layers.Concatenate(axis=1)([x1,x2])
y.shape

TensorShape([2, 3, 5])

In [66]:
x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5,2))
x2 = tf.keras.layers.Dense(8)(np.arange(10,20).reshape(5,2))
tf.print(x1.shape)
tf.print(x2.shape)
y=tf.keras.layers.Concatenate()([x1,x2])
tf.print(y.shape)

TensorShape([5, 8])
TensorShape([5, 8])
TensorShape([5, 16])


## tf.keras.layers.Add
tf.keras.layers.Add(
    **kwargs
)
    
    It takes as input a list of tensors, all of the same shape,
    and returns a single tensor (also of the same shape).


In [70]:
input_shape=(2,3,4)
x1 = np.random.random(input_shape)
x2 = np.random.rand(*input_shape)
y = tf.keras.layers.Add()([x1,x2])
y

<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[0.48554325, 1.227601  , 0.8126565 , 0.53688073],
        [0.68666613, 1.6902375 , 1.0352478 , 0.57138455],
        [1.0321999 , 0.501016  , 0.9576036 , 1.2120421 ]],

       [[0.7342508 , 1.4534317 , 0.71955824, 1.7794999 ],
        [1.6020393 , 1.3596292 , 0.6303549 , 0.39443696],
        [0.53170365, 1.138582  , 1.7356129 , 0.96787584]]], dtype=float32)>

In [71]:
x1 + x2

array([[[0.48554325, 1.22760105, 0.8126565 , 0.53688073],
        [0.68666611, 1.69023744, 1.03524777, 0.57138452],
        [1.03219991, 0.50101602, 0.95760354, 1.21204215]],

       [[0.73425074, 1.45343176, 0.71955827, 1.77949984],
        [1.60203927, 1.35962916, 0.63035487, 0.39443696],
        [0.53170364, 1.13858196, 1.73561293, 0.96787586]]])

## tf.keras.layers.Subtract
tf.keras.layers.Subtract(
    **kwargs
)

    Performs elementwise subtraction.
    It takes as input a list of tensors of size 2 both of the
    same shape, and returns a single tensor (inputs[0] - inputs[1])
    of same shape.



In [72]:
tf.keras.layers.Subtract()([x1,x2])

<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[-0.05820923,  0.54523873, -0.33016336,  0.4021411 ],
        [-0.19446963,  0.1893099 ,  0.76850337, -0.49797586],
        [-0.6312529 , -0.3815776 ,  0.46261907,  0.01131099]],

       [[-0.7241807 ,  0.44210625, -0.38760757, -0.01166928],
        [-0.0849604 , -0.07201624, -0.31166935,  0.09301943],
        [ 0.01433945,  0.09968823,  0.18223101, -0.8487245 ]]],
      dtype=float32)>

## tf.keras.layers.Maximum

tf.keras.layers.Maximum(
    **kwargs
)

It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape).

In [73]:
x1 - x2

array([[[-0.05820921,  0.54523875, -0.33016335,  0.40214111],
        [-0.19446962,  0.18930986,  0.76850337, -0.49797586],
        [-0.63125289, -0.38157764,  0.46261907,  0.01131104]],

       [[-0.72418069,  0.44210626, -0.38760759, -0.01166928],
        [-0.0849604 , -0.07201623, -0.31166938,  0.09301944],
        [ 0.01433943,  0.09968821,  0.18223097, -0.84872448]]])

In [74]:
input_shape = (2, 3, 4)
x1 = np.random.rand(*input_shape)
x2 = np.random.rand(*input_shape)
y = tf.keras.layers.Maximum()([x1, x2])
tf.print(x1)
tf.print(x2)
tf.print(y)

array([[[0.81355413, 0.82931134, 0.22891   , 0.15438937],
        [0.46969721, 0.18594595, 0.37698281, 0.99312283],
        [0.14015528, 0.33212982, 0.46019887, 0.93527131]],

       [[0.4736411 , 0.13906825, 0.5870708 , 0.31000703],
        [0.69630795, 0.51727792, 0.75252039, 0.4710736 ],
        [0.68655642, 0.22620015, 0.85941165, 0.5590729 ]]])
array([[[0.41427353, 0.4100347 , 0.03177998, 0.85128087],
        [0.74203412, 0.62904349, 0.44725248, 0.74641753],
        [0.39199419, 0.40299547, 0.65917749, 0.2838739 ]],

       [[0.59430464, 0.94850065, 0.11541002, 0.06381379],
        [0.15556203, 0.77173534, 0.01443398, 0.15971388],
        [0.52261042, 0.54968582, 0.71589447, 0.8129841 ]]])
[[[0.813554108 0.829311311 0.22891 0.851280868]
  [0.742034137 0.629043519 0.447252482 0.993122816]
  [0.391994178 0.402995467 0.659177482 0.935271323]]

 [[0.594304621 0.948500633 0.587070823 0.310007036]
  [0.696307957 0.771735311 0.752520382 0.471073598]
  [0.686556399 0.549685836 0.859411657

In [75]:
tf.where(x1>x2,x1,x2)

<tf.Tensor: shape=(2, 3, 4), dtype=float64, numpy=
array([[[0.81355413, 0.82931134, 0.22891   , 0.85128087],
        [0.74203412, 0.62904349, 0.44725248, 0.99312283],
        [0.39199419, 0.40299547, 0.65917749, 0.93527131]],

       [[0.59430464, 0.94850065, 0.5870708 , 0.31000703],
        [0.69630795, 0.77173534, 0.75252039, 0.4710736 ],
        [0.68655642, 0.54968582, 0.85941165, 0.8129841 ]]])>

## tf.keras.layers.Maximum