In [1]:
import tensorflow as tf

In [2]:
dense_units = 5
vocab_size = 100
embedding_dim = 3
max_length = 6      # no of timesteps
gru_units = 10     # same for encoder and decoder, enc/dec size
batch_size = 3

In [3]:
inputs = tf.Variable([
                [1, 3, 4, 10, 54, 2], # eg: <start> this is a  bag <end>
                [1, 30, 4, 10, 76, 2],   #eg: <start> he is a boy <end>
                [1, 3, 4, 10, 34, 2] # eg: <start> this is a bird <end>
                     ])

In [4]:
print(f"Inputs: \n{inputs.numpy()}")

Inputs: 
[[ 1  3  4 10 54  2]
 [ 1 30  4 10 76  2]
 [ 1  3  4 10 34  2]]


In [5]:
# Input shape = (batch_size, max_length)
print(f"Input Shape: {inputs.shape}")

Input Shape: (3, 6)


In [6]:
enc_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
x = enc_embedding(inputs)

In [7]:
print(f"Embeddings: \n{x}")

Embeddings: 
[[[-0.00891199  0.02697823 -0.02869316]
  [-0.01130106  0.01705582 -0.01922449]
  [ 0.0285736   0.01118445 -0.04263396]
  [ 0.00814123  0.02765713 -0.04640173]
  [-0.0217955   0.04247181  0.02000103]
  [ 0.04501773  0.04759785 -0.04773393]]

 [[-0.00891199  0.02697823 -0.02869316]
  [-0.02386779  0.00910946 -0.00812159]
  [ 0.0285736   0.01118445 -0.04263396]
  [ 0.00814123  0.02765713 -0.04640173]
  [-0.00733377 -0.00835412 -0.01835717]
  [ 0.04501773  0.04759785 -0.04773393]]

 [[-0.00891199  0.02697823 -0.02869316]
  [-0.01130106  0.01705582 -0.01922449]
  [ 0.0285736   0.01118445 -0.04263396]
  [ 0.00814123  0.02765713 -0.04640173]
  [ 0.01415043  0.03779215  0.00729457]
  [ 0.04501773  0.04759785 -0.04773393]]]


In [8]:
# Embedding Shape = (batch_size, max_length, embedding_dim)
print(f"Embedding Shape: {x.shape}")

Embedding Shape: (3, 6, 3)


In [9]:
# Encoder GRU
enc_gru = tf.keras.layers.GRU(gru_units,
                               return_sequences=True,
                               return_state=True,
                               recurrent_initializer='glorot_uniform')

In [10]:
# output_shape = (batch_size, max_length, hidden_size_encoder)
enc_output, enc_hidden = enc_gru(x)

In [11]:
print(f"Output of Encoder or Values: {enc_output.shape}")
print(f"State of Encoder: {enc_hidden.shape}")

Output of Encoder or Values: (3, 6, 10)
State of Encoder: (3, 10)


In [12]:
values = enc_output

In [13]:
# Query shape = (batch_size, gru_units)
query = enc_hidden
print(f"Query Shape: {query.shape}")

Query Shape: (3, 10)


In [14]:
# Query with time axis shape = (batch_size, 1, gru_units)
query_with_time_axis = tf.expand_dims(query, 1)
print(f"Query with Time Axis Shape: {query_with_time_axis.shape}")

Query with Time Axis Shape: (3, 1, 10)


In [15]:
#Attention Layers
query_layer = tf.keras.layers.Dense(dense_units)
value_layer = tf.keras.layers.Dense(dense_units)
V = tf.keras.layers.Dense(1)

In [16]:
# intermediate query shape = (batch_size, 1, dense_units)
intermediate_query = query_layer(query_with_time_axis)
print(f"Intermediate Query Shape: {intermediate_query.shape}")

Intermediate Query Shape: (3, 1, 5)


In [17]:
# intermediate values shape = (batch_size, max_length, dense_units)
intermediate_values = value_layer(values)
print(f"Intermediate Values Shape: {intermediate_values.shape}")

Intermediate Values Shape: (3, 6, 5)


In [18]:
# without V shape = (batch_size, max_length, dense_units)
without_v = tf.tanh(intermediate_query + intermediate_values)
print(f"Without V layer: {without_v.shape}")

Without V layer: (3, 6, 5)


In [19]:
# score shape = (batch_size, max_length, 1) #applying V
score = V(tf.nn.tanh(
    query_layer(query_with_time_axis) + value_layer(values)))
print(f"Score Shape: {score.shape}")

Score Shape: (3, 6, 1)


In [20]:
attention_weights = tf.nn.softmax(score, axis=1)
print(f"Attention Weights Shape: {attention_weights.shape}")

Attention Weights Shape: (3, 6, 1)


In [21]:
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
print(f"Context Vector Shape: {context_vector.shape}")

Context Vector Shape: (3, 10)


In [22]:
# Decoder Embedding
dec_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

In [23]:
# Decoder GRU
dec_gru = tf.keras.layers.GRU(gru_units,
                               return_sequences=True,
                               return_state=True,
                               recurrent_initializer='glorot_uniform')


In [24]:
# Decoder Fully Connected
fc = tf.keras.layers.Dense(vocab_size)

In [25]:
 # loss object
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

In [26]:
# Loss Function
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

In [27]:
# Decoder Inputs
decoder_inputs = tf.Variable([
    [1, 4, 2], #eg. <start> adfk <end>
    [1, 40, 2], #eg. <start> rtrtt <end>
    [1, 80, 2]  #eg. <start> jadskf <end>
])


In [28]:
for i in range(3):
    print("\n\n\n")
    single_dec_input = tf.expand_dims(decoder_inputs[:, i], 1)
    print(f"Decoder Input Shape {single_dec_input.shape}")
    # y shape after passing through embedding == (batch_size, 1, embedding_dim)
    y = dec_embedding(single_dec_input)
    print(f"Decoder Embedding Shape: {y.shape}")
    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    y = tf.concat([tf.expand_dims(context_vector, 1), y], axis=-1)
    print(f"After Context Vector Concatenation Shape: {y.shape}")
    # passing the concatenated vector to the GRU
    output, state = dec_gru(y)
    print(f"Decoder GRU Output Shape: {output.shape}")
    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))
    print(f"Decoder GRU Output Shape Reshape: {output.shape}")
    # output shape == (batch_size, vocab)
    y = fc(output)
    print(f"Fully Connected Output: {y.shape}")
    # print(y)
    loss = loss_function(real=decoder_inputs[:, i], pred=y)
    print(loss)






Decoder Input Shape (3, 1)
Decoder Embedding Shape: (3, 1, 3)
After Context Vector Concatenation Shape: (3, 1, 13)
Decoder GRU Output Shape: (3, 1, 10)
Decoder GRU Output Shape Reshape: (3, 10)
Fully Connected Output: (3, 100)
tf.Tensor(4.6061683, shape=(), dtype=float32)




Decoder Input Shape (3, 1)
Decoder Embedding Shape: (3, 1, 3)
After Context Vector Concatenation Shape: (3, 1, 13)
Decoder GRU Output Shape: (3, 1, 10)
Decoder GRU Output Shape Reshape: (3, 10)
Fully Connected Output: (3, 100)
tf.Tensor(4.6030946, shape=(), dtype=float32)




Decoder Input Shape (3, 1)
Decoder Embedding Shape: (3, 1, 3)
After Context Vector Concatenation Shape: (3, 1, 13)
Decoder GRU Output Shape: (3, 1, 10)
Decoder GRU Output Shape Reshape: (3, 10)
Fully Connected Output: (3, 100)
tf.Tensor(4.6090603, shape=(), dtype=float32)
