In [50]:
import tensorflow as tf

# Tạo lớp Embedding với mask_zero=True
embedding = tf.keras.layers.Embedding(input_dim=10, output_dim=4, mask_zero=True)

# Chuỗi đầu vào có padding
x = tf.constant([[1, 2, 0], [4, 0, 0]])  # 0 là padding

# Lấy đầu ra từ lớp Embedding
y = embedding(x)

# Lấy mask tự động tạo bởi lớp Embedding
mask = embedding.compute_mask(x)

print("Embedding output:\n", y)
print("Mask:\n", mask)


Embedding output:
 tf.Tensor(
[[[ 0.00588411  0.03575898  0.02621868 -0.00163871]
  [ 0.03707338 -0.0178108  -0.02469549  0.0007063 ]
  [-0.03425833  0.04134751 -0.02473924 -0.03756804]]

 [[ 0.04286656 -0.00566039  0.02738074  0.04398335]
  [-0.03425833  0.04134751 -0.02473924 -0.03756804]
  [-0.03425833  0.04134751 -0.02473924 -0.03756804]]], shape=(2, 3, 4), dtype=float32)
Mask:
 tf.Tensor(
[[ True  True False]
 [ True False False]], shape=(2, 3), dtype=bool)


In [3]:
import numpy as np

In [2]:
input_array = np.random.randint(1000, size=(32, 10))

In [3]:
input_array.shape

(32, 10)

In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, text_processor, units):
    super(Encoder, self).__init__()
    self.text_processor = text_processor
    self.vocab_size = text_processor.vocabulary_size()
    self.units = units
    
    # The embedding layer converts tokens to vectors
    self.embedding = tf.keras.layers.Embedding(self.vocab_size, units,
                                               mask_zero=True) # (batch_size, sequence_length=number_word, units)

    # The RNN layer processes those vectors sequentially.
    self.rnn = tf.keras.layers.Bidirectional(
        merge_mode='sum',
        layer=tf.keras.layers.GRU(units,
                            # Return the sequence and state
                            return_sequences=True,
                            recurrent_initializer='glorot_uniform')) # (batch_size, sequence_length=number_word, units)

  def call(self, x):
    shape_checker = ShapeChecker()
    shape_checker(x, 'batch s')

    # number_word = x.shape[1]
    # 2. The embedding layer looks up the embedding vector for each token.
    x = self.embedding(x) # (batch_size, sequence_length=number_word, units)
    shape_checker(x, 'batch s units')

    # 3. The GRU processes the sequence of embeddings.1
    x = self.rnn(x) # (batch_size, sequence_length=number_word, units)
    shape_checker(x, 'batch s units')

    # 4. Returns the new sequence of embeddings.
    return x

  def convert_input(self, texts):
    texts = tf.convert_to_tensor(texts)
    if len(texts.shape) == 0:
      texts = tf.convert_to_tensor(texts)[tf.newaxis]
    context = self.text_processor(texts).to_tensor()
    context = self(context)
    return context

In [1]:
import keras

In [32]:
inputs = np.random.random((32, 10, 8))
gru = keras.layers.GRU(4)
output = gru(inputs)
output.shape

gru = keras.layers.GRU(4, return_sequences=True, return_state=True)
whole_sequence_output, final_state = gru(inputs)
print(whole_sequence_output.shape)

print(final_state.shape)

(32, 10, 4)
(32, 4)


In [33]:
final_state

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[-2.50189692e-01, -1.42646313e-01, -2.04883680e-01,
         2.17535898e-01],
       [-5.22055998e-02,  3.63209128e-01, -1.01059161e-01,
        -2.96064526e-01],
       [-2.86225230e-01, -4.47082594e-02, -3.88183057e-01,
         1.52176365e-01],
       [-1.98580295e-01, -1.53331250e-01, -3.66241395e-01,
        -5.94613180e-02],
       [-7.92350471e-02,  1.17571697e-01, -1.73683777e-01,
        -7.91808888e-02],
       [-3.83977324e-01,  3.76549006e-01, -6.63885325e-02,
        -1.36582270e-01],
       [-1.15106180e-01,  8.73840451e-02, -3.21828425e-01,
        -2.57591426e-01],
       [-3.01236719e-01, -3.44546884e-02, -2.71671891e-01,
         4.44086455e-02],
       [ 2.60451213e-02,  1.97499618e-01, -2.33257830e-01,
        -2.57448584e-01],
       [-4.18493971e-02, -5.08257188e-02, -2.87322909e-01,
         1.80913582e-01],
       [-3.65964115e-01,  1.50516391e-01, -3.20583999e-01,
        -1.31729782e-01],
       [-3.09436

In [35]:
whole_sequence_output.numpy()[0, 9, :] == final_state.numpy()[0]

array([ True,  True,  True,  True])

In [37]:
a = keras.layers.Bidirectional(
        merge_mode='sum',
        layer=keras.layers.GRU(4,
                            return_sequences=True,
                            recurrent_initializer='glorot_uniform', return_state=True))

In [None]:
whole_sequence_output, *final_state = a(inputs, )

In [39]:
whole_sequence_output.shape

TensorShape([32, 10, 4])

In [40]:
len(final_state)

2

In [42]:
final_state[0].shape

TensorShape([32, 4])

In [43]:
final_state[1].shape

TensorShape([32, 4])

In [44]:
# Chuỗi đầu tiên
gru1 = keras.layers.GRU(units=4, return_sequences=True, return_state=True)
whole_sequence_output1, final_state1 = gru1(inputs)

# Chuỗi thứ hai (sử dụng trạng thái của GRU1 làm trạng thái khởi tạo)
gru2 = keras.layers.GRU(units=4, return_sequences=True, return_state=True)
whole_sequence_output2, final_state2 = gru2(inputs, initial_state=final_state1)


In [45]:
# Chuỗi Bidirectional đầu tiên
bi_gru1 = keras.layers.Bidirectional(
    keras.layers.GRU(units=4, return_sequences=True, return_state=True),
    merge_mode='sum'
)

whole_sequence_output1, final_state_fwd1, final_state_bwd1 = bi_gru1(inputs)

# Chuỗi Bidirectional thứ hai (sử dụng trạng thái của chuỗi trước)
bi_gru2 = keras.layers.Bidirectional(
    keras.layers.GRU(units=4, return_sequences=True, return_state=True),
    merge_mode='sum'
)

# Truyền trạng thái từ chuỗi trước
whole_sequence_output2, final_state_fwd2, final_state_bwd2 = bi_gru2(
    inputs,
    initial_state=[final_state_fwd1, final_state_bwd1]
)


In [47]:
keras.__version__

'3.6.0'

In [48]:
import tensorflow as tf

In [49]:
tf.__version__

'2.17.0'

In [55]:
import tensorflow as tf

# Đầu vào
query = tf.random.normal(shape=(64, 10, 512))  # (batch_size, query_length, d_model)
key = tf.random.normal(shape=(64, 15, 512))   # (batch_size, key_length, d_model)
value = tf.random.normal(shape=(64, 15, 512)) # (batch_size, value_length, d_model)

# Multi-Head Attention
mha = tf.keras.layers.MultiHeadAttention(num_heads=8, key_dim=64)
output, attention_scores = mha(query=query, value=value, key=key, return_attention_scores=True)

print("Output shape:", output.shape)  # (64, 10, 512)
print("Attention scores shape:", attention_scores.shape)  # (64, 8, 10, 15)


Output shape: (64, 10, 512)
Attention scores shape: (64, 8, 10, 15)
