In [4]:
import tensorflow as tf
from tensorflow import convert_to_tensor, string
from tensorflow.keras.layers import TextVectorization, Embedding, Layer
from tensorflow.data import Dataset
import numpy as np
import matplotlib.pyplot as plt

In [21]:
output_sequence_length = 5
vocab_size = 10
sentences = [["I am a robot"], ["you too robot"]]
sentence_data = Dataset.from_tensor_slices(sentences)
vectorize_layer = TextVectorization(output_sequence_length=output_sequence_length, max_tokens=vocab_size)

vectorize_layer.adapt(sentence_data)

word_tensors = convert_to_tensor(sentences, dtype=tf.string)

vectorized_words = vectorize_layer(word_tensors)

print(vectorize_layer.get_vocabulary())
print(vectorized_words)

['', '[UNK]', 'robot', 'you', 'too', 'i', 'am', 'a']
tf.Tensor(
[[5 6 7 2 0]
 [3 4 2 0 0]], shape=(2, 5), dtype=int64)


In [22]:
output_length = 6
word_embedding_layer = Embedding(vocab_size, output_length)
embedded_words = word_embedding_layer(vectorized_words)
print(embedded_words)

tf.Tensor(
[[[-0.00743676  0.00580027 -0.03863354 -0.01150661 -0.00587779
    0.01994712]
  [-0.02479414  0.03879354  0.00335553  0.04718759  0.00511967
   -0.03202271]
  [-0.00978108 -0.00172647 -0.02296423  0.01573963 -0.04328272
    0.01426512]
  [-0.00612276  0.00890882  0.03838393 -0.01087879  0.04461842
    0.04233933]
  [ 0.02857304 -0.03869771  0.03484816  0.00633516 -0.01976827
    0.04625731]]

 [[ 0.0309825   0.02079538 -0.00659797  0.04120417 -0.04497439
    0.0410295 ]
  [-0.01375675 -0.03537474 -0.01551646  0.03660912 -0.03467355
    0.01793889]
  [-0.00612276  0.00890882  0.03838393 -0.01087879  0.04461842
    0.04233933]
  [ 0.02857304 -0.03869771  0.03484816  0.00633516 -0.01976827
    0.04625731]
  [ 0.02857304 -0.03869771  0.03484816  0.00633516 -0.01976827
    0.04625731]]], shape=(2, 5, 6), dtype=float32)


In [26]:
position_embedding_layer = Embedding(output_sequence_length, output_length)
position_indices = tf.range(output_sequence_length)
print(position_indices)
embedded_indices = position_embedding_layer(position_indices)
print(embedded_indices)

tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
tf.Tensor(
[[ 0.00324764  0.00335218  0.01955496 -0.02756544  0.01382935 -0.03779992]
 [ 0.0163407  -0.01837867  0.01326715  0.04866434  0.0496294   0.04409998]
 [-0.03838681  0.01735834 -0.0096382  -0.01564624 -0.01338614 -0.03904953]
 [ 0.00388866  0.00323056  0.02113147  0.00269967 -0.00929653  0.007836  ]
 [-0.02654176  0.02961044 -0.0427573   0.02799765 -0.03256803 -0.00012803]], shape=(5, 6), dtype=float32)


In [27]:
final_output_embedding = embedded_words + embedded_indices
print("Final output: ", final_output_embedding)

Final output:  tf.Tensor(
[[[-4.1891225e-03  9.1524459e-03 -1.9078575e-02 -3.9072052e-02
    7.9515614e-03 -1.7852794e-02]
  [-8.4534381e-03  2.0414863e-02  1.6622674e-02  9.5851928e-02
    5.4749072e-02  1.2077261e-02]
  [-4.8167896e-02  1.5631866e-02 -3.2602429e-02  9.3389302e-05
   -5.6668866e-02 -2.4784405e-02]
  [-2.2341013e-03  1.2139380e-02  5.9515402e-02 -8.1791170e-03
    3.5321891e-02  5.0175324e-02]
  [ 2.0312816e-03 -9.0872645e-03 -7.9091415e-03  3.4332808e-02
   -5.2336298e-02  4.6129275e-02]]

 [[ 3.4230139e-02  2.4147555e-02  1.2956988e-02  1.3638722e-02
   -3.1145032e-02  3.2295845e-03]
  [ 2.5839470e-03 -5.3753413e-02 -2.2493117e-03  8.5273452e-02
    1.4955852e-02  6.2038861e-02]
  [-4.4509567e-02  2.6267156e-02  2.8745733e-02 -2.6525032e-02
    3.1232275e-02  3.2898001e-03]
  [ 3.2461695e-02 -3.5467148e-02  5.5979624e-02  9.0348348e-03
   -2.9064799e-02  5.4093301e-02]
  [ 2.0312816e-03 -9.0872645e-03 -7.9091415e-03  3.4332808e-02
   -5.2336298e-02  4.6129275e-02]]],