In [1]:
# import all the necessary libraries
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentence = ['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
print(sentence)

['the glass of milk', 'the glass of juice', 'the cup of tea', 'I am a good boy', 'I am a good developer', 'understand the meaning of words', 'your videos are good']


In [4]:
# Vocabulary size
voc_size = 10000

# One Hot Representation of Words

It provides the index position of all the words present in the dictionary

In [5]:
onehot_representation = [ one_hot(words,voc_size) for words in sentence]
print(onehot_representation)

[[1476, 432, 6521, 9579], [1476, 432, 6521, 3505], [1476, 8880, 6521, 1741], [4804, 9616, 9623, 946, 4779], [4804, 9616, 9623, 946, 4669], [3742, 1476, 1393, 6521, 4321], [9238, 9889, 2272, 946]]


# Word Embedding Representation

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
# To create an Embedding matrix from the onehot representation of the words
sent_length = 8
embedding_sequence = pad_sequences(onehot_representation, padding = 'pre', maxlen = sent_length)
print(embedding_sequence)

[[   0    0    0    0 1476  432 6521 9579]
 [   0    0    0    0 1476  432 6521 3505]
 [   0    0    0    0 1476 8880 6521 1741]
 [   0    0    0 4804 9616 9623  946 4779]
 [   0    0    0 4804 9616 9623  946 4669]
 [   0    0    0 3742 1476 1393 6521 4321]
 [   0    0    0    0 9238 9889 2272  946]]


In [8]:
dim = 10 

In [9]:
# pass the input into the embedding layer
model = Sequential()
# add the embedding layer
model.add(Embedding(voc_size, 10, input_length = sent_length))
# compile the model with 'adam' optimizer
model.compile('adam','mse')

In [10]:
# summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Get the vectorized representation of the words
print(model.predict(embedding_sequence))

[[[-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419
   -0.00317445  0.01617844  0.04520619  0.02794706  0.04464606]
  [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419
   -0.00317445  0.01617844  0.04520619  0.02794706  0.04464606]
  [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419
   -0.00317445  0.01617844  0.04520619  0.02794706  0.04464606]
  [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419
   -0.00317445  0.01617844  0.04520619  0.02794706  0.04464606]
  [-0.01057332 -0.04814214 -0.02897564 -0.03757374  0.0347049
   -0.0467235  -0.02903759  0.0253568  -0.01989452  0.01646716]
  [ 0.02886364  0.00994092  0.02528677  0.00210773 -0.02625689
   -0.00399375  0.00477371 -0.03638518  0.02842586 -0.01994228]
  [ 0.01544688  0.02574969 -0.01064975 -0.04521927  0.04926241
    0.03771789  0.02472155 -0.01034353 -0.04056023  0.01480242]
  [-0.04104323  0.01115636  0.01871915 -0.00247991  0.00847461
    0.01002359  0.02648214  0.03928815 -0.0072935

In [12]:
print(embedding_sequence[0])

[   0    0    0    0 1476  432 6521 9579]


In [13]:
# Get the vectorized representation of all the onehot values in form of dimensions
print(model.predict(embedding_sequence[0]))

[[-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419 -0.00317445
   0.01617844  0.04520619  0.02794706  0.04464606]
 [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419 -0.00317445
   0.01617844  0.04520619  0.02794706  0.04464606]
 [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419 -0.00317445
   0.01617844  0.04520619  0.02794706  0.04464606]
 [-0.00670875  0.01133387  0.02488199 -0.01436546 -0.02233419 -0.00317445
   0.01617844  0.04520619  0.02794706  0.04464606]
 [-0.01057332 -0.04814214 -0.02897564 -0.03757374  0.0347049  -0.0467235
  -0.02903759  0.0253568  -0.01989452  0.01646716]
 [ 0.02886364  0.00994092  0.02528677  0.00210773 -0.02625689 -0.00399375
   0.00477371 -0.03638518  0.02842586 -0.01994228]
 [ 0.01544688  0.02574969 -0.01064975 -0.04521927  0.04926241  0.03771789
   0.02472155 -0.01034353 -0.04056023  0.01480242]
 [-0.04104323  0.01115636  0.01871915 -0.00247991  0.00847461  0.01002359
   0.02648214  0.03928815 -0.00729351 -0.00727483]]
