In [1]:
# import all the necessary libraries
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentence = ['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
print(sentence)

['the glass of milk', 'the glass of juice', 'the cup of tea', 'I am a good boy', 'I am a good developer', 'understand the meaning of words', 'your videos are good']


In [4]:
# Vocabulary size
voc_size = 10000

# One Hot Representation of Words

 It provides the index position of all the words present in the dictionary

In [5]:
onehot_representation = [ one_hot(words,voc_size) for words in sentence]
print(onehot_representation)

[[9305, 7635, 2116, 6237], [9305, 7635, 2116, 141], [9305, 8929, 2116, 9321], [1236, 2247, 754, 7051, 3755], [1236, 2247, 754, 7051, 3583], [6046, 9305, 6482, 2116, 2732], [6484, 1875, 317, 7051]]


# Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [8]:
# To create an Embedding matrix from the onehot representation of the words
sent_length = 8
embedding_sequence = pad_sequences(onehot_representation, padding = 'pre', maxlen = sent_length)
print(embedding_sequence)

[[   0    0    0    0 9305 7635 2116 6237]
 [   0    0    0    0 9305 7635 2116  141]
 [   0    0    0    0 9305 8929 2116 9321]
 [   0    0    0 1236 2247  754 7051 3755]
 [   0    0    0 1236 2247  754 7051 3583]
 [   0    0    0 6046 9305 6482 2116 2732]
 [   0    0    0    0 6484 1875  317 7051]]


In [9]:
dim = 10 

In [10]:
# pass the input into the embedding layer
model = Sequential()
# add the embedding layer
model.add(Embedding(voc_size, 10, input_length = sent_length))
# compile the model with 'adam' optimizer
model.compile('adam','mse')

In [11]:
# summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
# Get the vectorized representation of the words
print(model.predict(embedding_sequence))

[[[ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567
   -0.03317936 -0.01047315  0.00454076  0.03931682  0.00625292]
  [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567
   -0.03317936 -0.01047315  0.00454076  0.03931682  0.00625292]
  [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567
   -0.03317936 -0.01047315  0.00454076  0.03931682  0.00625292]
  [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567
   -0.03317936 -0.01047315  0.00454076  0.03931682  0.00625292]
  [-0.00483926 -0.02220302  0.00719935 -0.04338865  0.03173994
   -0.02405405 -0.02635107  0.02894468 -0.00027215 -0.0014366 ]
  [-0.01873233 -0.00272293  0.00105198  0.00218949 -0.0288746
    0.02834352 -0.01328101  0.00554389  0.04593093  0.01437405]
  [-0.03749872 -0.00343376 -0.01019546 -0.00180776 -0.00381108
    0.0401533  -0.0130757  -0.00926391 -0.04307494 -0.00078475]
  [ 0.02767524 -0.01392082  0.04854545  0.03666205 -0.03882828
    0.04853204 -0.04986686 -0.02295629 -0.0480732

In [13]:
print(embedding_sequence[0])

[   0    0    0    0 9305 7635 2116 6237]


In [14]:
# Get the vectorized representation of all the onehot values in form of dimensions
print(model.predict(embedding_sequence[0]))

[[ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567 -0.03317936
  -0.01047315  0.00454076  0.03931682  0.00625292]
 [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567 -0.03317936
  -0.01047315  0.00454076  0.03931682  0.00625292]
 [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567 -0.03317936
  -0.01047315  0.00454076  0.03931682  0.00625292]
 [ 0.00010131 -0.02468785 -0.01095946 -0.01197703  0.04227567 -0.03317936
  -0.01047315  0.00454076  0.03931682  0.00625292]
 [-0.00483926 -0.02220302  0.00719935 -0.04338865  0.03173994 -0.02405405
  -0.02635107  0.02894468 -0.00027215 -0.0014366 ]
 [-0.01873233 -0.00272293  0.00105198  0.00218949 -0.0288746   0.02834352
  -0.01328101  0.00554389  0.04593093  0.01437405]
 [-0.03749872 -0.00343376 -0.01019546 -0.00180776 -0.00381108  0.0401533
  -0.0130757  -0.00926391 -0.04307494 -0.00078475]
 [ 0.02767524 -0.01392082  0.04854545  0.03666205 -0.03882828  0.04853204
  -0.04986686 -0.02295629 -0.04807329  0.04352177]]
