In [1]:
import random

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.layers import Dense, Permute, Softmax, Activation, Add

In [2]:
random.seed(42)

print(tf.__version__)
tf.random.set_seed(42)

2.6.0


In [3]:
hidden_state = tf.constant([[random.random() for _ in range(3)] for _ in range(7)], dtype=tf.float32)
cell_state = tf.constant([[random.random() for _ in range(3)] for _ in range(7)], dtype=tf.float32)

print(tf.concat([hidden_state, cell_state], axis=-1).shape)

(7, 6)


In [4]:
n = 4

hs = K.repeat(tf.concat([hidden_state, cell_state], axis=-1), n)
print(hs.shape)

(7, 4, 6)


- tf.keras.layers.Dense

In [5]:
T = 6

print(Dense(T)(hs).shape)

(7, 4, 6)


In [6]:
ele01 = [random.random() for _ in range(4)]
ele02 = [ele01 for _ in range(5)]
ele03 = [ele02 for _ in range(7)]

X = tf.constant(ele03, dtype=tf.float32)

# print(X)
print(X.shape)
print()

X_tr = Permute((2, 1))(X)
# print(X_tr)
print(X_tr.shape)

(7, 5, 4)

(7, 4, 5)


In [7]:
ux = Dense(T)(Permute((2, 1))(X))
print(ux.shape)

(7, 4, 6)


In [8]:
print(hs[0, 0, :])

print(ux[0, 0, :])

tf.Tensor([0.6394268  0.02501076 0.27502933 0.69813937 0.34025052 0.1554795 ], shape=(6,), dtype=float32)
tf.Tensor([0.2069508  0.25804803 0.08218247 0.03953016 0.08098594 0.36927783], shape=(6,), dtype=float32)


### tf.keras.layers.Add

- tf.math.tanh

- tf.keras.layers.Activation

In [9]:
temp_add = Add()([hs, ux])

print(temp_add.shape)

print(temp_add[0, 0, :])

(7, 4, 6)
tf.Tensor([0.8463776  0.2830588  0.3572118  0.7376695  0.42123646 0.5247573 ], shape=(6,), dtype=float32)


In [10]:
tanh_math_add = tf.math.tanh(temp_add)
print(tanh_math_add.shape)

(7, 4, 6)


In [11]:
tanh_act_add = Activation(activation='tanh')(temp_add)
print(tanh_act_add.shape)

(7, 4, 6)


In [12]:
diff_tanh_add = tanh_math_add - tanh_act_add
print(sum(sum(sum(diff_tanh_add))))

tf.Tensor(0.0, shape=(), dtype=float32)


In [13]:
e_add = Dense(1)(tanh_act_add)
print(e_add[:, :, 0])
print(e_add.shape)

tf.Tensor(
[[-0.2160963  -0.22383213 -0.19618845 -0.21672496]
 [-0.5211074  -0.52407795 -0.5133451  -0.5213541 ]
 [-0.1739349  -0.17831427 -0.16177064 -0.1742926 ]
 [-0.6122389  -0.60170215 -0.6350926  -0.6114142 ]
 [-0.73237145 -0.71866274 -0.76282454 -0.73129416]
 [-0.5157484  -0.5086944  -0.53409374 -0.5151692 ]
 [-0.46531782 -0.47022793 -0.45225328 -0.46572426]], shape=(7, 4), dtype=float32)
(7, 4, 1)


In [14]:
attn_add = Softmax()(e_add)
print(attn_add[:, :, 0])
print(attn_add.shape)

tf.Tensor(
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]], shape=(7, 4), dtype=float32)
(7, 4, 1)


In [15]:
attn_add = Softmax()(Permute((2, 1))(e_add))
print(attn_add[:, 0, :])
print(attn_add.shape)

tf.Tensor(
[[0.24926639 0.24734554 0.25427845 0.24910972]
 [0.2497141  0.24897341 0.25166    0.2496525 ]
 [0.24953145 0.24844104 0.25258535 0.2494422 ]
 [0.25070053 0.25335607 0.24503607 0.2509074 ]
 [0.25094816 0.25441203 0.24342121 0.25121865]
 [0.25065923 0.25243363 0.24610272 0.25080445]
 [0.24951059 0.24828847 0.2527917  0.24940917]], shape=(7, 4), dtype=float32)
(7, 1, 4)


### tf.concat

- tf.math.tanh

- tf.keras.layers.Activation

In [16]:
temp_concat = tf.concat([hs, ux], axis=-1)

print(temp_concat.shape)

print(temp_concat[0, 0, :])

(7, 4, 12)
tf.Tensor(
[0.6394268  0.02501076 0.27502933 0.69813937 0.34025052 0.1554795
 0.2069508  0.25804803 0.08218247 0.03953016 0.08098594 0.36927783], shape=(12,), dtype=float32)


In [17]:
tanh_math_concat = tf.math.tanh(temp_concat)
print(tanh_math_concat.shape)

(7, 4, 12)


In [18]:
tanh_act_concat = Activation(activation='tanh')(temp_concat)
print(tanh_act_concat.shape)

(7, 4, 12)


In [19]:
diff_tanh = tanh_math_concat - tanh_act_concat
print(sum(sum(sum(diff_tanh))))

tf.Tensor(0.0, shape=(), dtype=float32)


In [20]:
e_act = Dense(1)(tanh_act_concat)
print(e_act[:, :, 0])
print(e_act.shape)

tf.Tensor(
[[ 0.07945507  0.08792291  0.0620885   0.08009797]
 [-0.13703339 -0.12856555 -0.15439996 -0.1363905 ]
 [ 0.02160849  0.03007633  0.00424191  0.02225139]
 [-0.23938704 -0.2309192  -0.25675362 -0.23874414]
 [-0.15980005 -0.15133223 -0.17716664 -0.15915717]
 [-0.0825969  -0.07412907 -0.09996349 -0.08195402]
 [ 0.09326386  0.1017317   0.07589729  0.09390676]], shape=(7, 4), dtype=float32)
(7, 4, 1)


In [21]:
attn_input_act = Softmax()(e_act)
print(attn_input_act[:, :, 0])
print(attn_input_act.shape)

tf.Tensor(
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]], shape=(7, 4), dtype=float32)
(7, 4, 1)


In [22]:
attn_input_act = Softmax()(Permute((2, 1))(e_act))
print(attn_input_act[:, 0, :])
print(attn_input_act.shape)

tf.Tensor(
[[0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]
 [0.2505054  0.25263563 0.24619251 0.25066647]], shape=(7, 4), dtype=float32)
(7, 1, 4)
