In [1]:
from tensorflow.keras.preprocessing.text import one_hot




In [2]:
## Sentences
sent = [ 'the glass of milk',
        'the glass of juice,',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice,',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
## Define the vocabulary size
voc_size = 10000

In [5]:
## One Hot Representation
one_hot_repr=[one_hot(words,voc_size)for words in sent]
one_hot_repr

[[3514, 2952, 4393, 1537],
 [3514, 2952, 4393, 3982],
 [3514, 889, 4393, 7523],
 [6976, 4291, 9546, 6470, 6795],
 [6976, 4291, 9546, 6470, 871],
 [3784, 3514, 4382, 4393, 9439],
 [5404, 3651, 4874, 6470]]

In [7]:
## Word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
'''Some sentences are 4 words and 5 words, so we need to all the sentences of equal size otherwise we will not be able
to train in our RNN as at the end of the day are words are going for a fixed no of time stamps'''
from tensorflow.keras.models import Sequential

In [8]:
import numpy as np

In [9]:
sent_length = 8  #all the sentences should have a maximum of 8 words, to make it would add 0 forward or backward
embedded_docs =  pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length) #there is also #post
print(embedded_docs)

[[   0    0    0    0 3514 2952 4393 1537]
 [   0    0    0    0 3514 2952 4393 3982]
 [   0    0    0    0 3514  889 4393 7523]
 [   0    0    0 6976 4291 9546 6470 6795]
 [   0    0    0 6976 4291 9546 6470  871]
 [   0    0    0 3784 3514 4382 4393 9439]
 [   0    0    0    0 5404 3651 4874 6470]]


In [10]:
## feature representation
dim = 10

In [11]:
model = Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')





In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
model.predict(embedded_docs)



array([[[-0.03614493,  0.01339461, -0.01274728, -0.04860941,
         -0.04568264,  0.0317408 ,  0.03064937,  0.04919375,
         -0.02864579,  0.0348432 ],
        [-0.03614493,  0.01339461, -0.01274728, -0.04860941,
         -0.04568264,  0.0317408 ,  0.03064937,  0.04919375,
         -0.02864579,  0.0348432 ],
        [-0.03614493,  0.01339461, -0.01274728, -0.04860941,
         -0.04568264,  0.0317408 ,  0.03064937,  0.04919375,
         -0.02864579,  0.0348432 ],
        [-0.03614493,  0.01339461, -0.01274728, -0.04860941,
         -0.04568264,  0.0317408 ,  0.03064937,  0.04919375,
         -0.02864579,  0.0348432 ],
        [-0.02472092,  0.00327254,  0.00050934, -0.04977829,
         -0.04789375, -0.02264346, -0.03701063,  0.00540442,
          0.03942127,  0.0298978 ],
        [-0.01520824, -0.02274861, -0.01817018,  0.01982382,
         -0.01605853, -0.00852628, -0.04518867, -0.01149194,
         -0.02264475, -0.03368431],
        [ 0.02537609, -0.01582148,  0.04677917, -0.0

In [14]:
embedded_docs[0]

array([   0,    0,    0,    0, 3514, 2952, 4393, 1537])

In [15]:
model.predict(embedded_docs[0])



array([[-0.03614493,  0.01339461, -0.01274728, -0.04860941, -0.04568264,
         0.0317408 ,  0.03064937,  0.04919375, -0.02864579,  0.0348432 ],
       [-0.03614493,  0.01339461, -0.01274728, -0.04860941, -0.04568264,
         0.0317408 ,  0.03064937,  0.04919375, -0.02864579,  0.0348432 ],
       [-0.03614493,  0.01339461, -0.01274728, -0.04860941, -0.04568264,
         0.0317408 ,  0.03064937,  0.04919375, -0.02864579,  0.0348432 ],
       [-0.03614493,  0.01339461, -0.01274728, -0.04860941, -0.04568264,
         0.0317408 ,  0.03064937,  0.04919375, -0.02864579,  0.0348432 ],
       [-0.02472092,  0.00327254,  0.00050934, -0.04977829, -0.04789375,
        -0.02264346, -0.03701063,  0.00540442,  0.03942127,  0.0298978 ],
       [-0.01520824, -0.02274861, -0.01817018,  0.01982382, -0.01605853,
        -0.00852628, -0.04518867, -0.01149194, -0.02264475, -0.03368431],
       [ 0.02537609, -0.01582148,  0.04677917, -0.0226696 ,  0.03651932,
         0.04520525, -0.0458485 , -0.04441224