In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
### Vocabulary size
voc_size=10000

## One Hot Representation

In [5]:
## For each word in a sentence is represented by its index value
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[5125, 7949, 9891, 3557], [5125, 7949, 9891, 1254], [5125, 3698, 9891, 2223], [9845, 4418, 3492, 3070, 8554], [9845, 4418, 3492, 3070, 2583], [7824, 5125, 758, 9891, 7258], [2529, 5216, 8219, 3070]]


## Word Embedding Representation

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [8]:
## Since the length of all sentences are not same, padding of zeroes is done to make all of equal length
import numpy as np
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 5125 7949 9891 3557]
 [   0    0    0    0 5125 7949 9891 1254]
 [   0    0    0    0 5125 3698 9891 2223]
 [   0    0    0 9845 4418 3492 3070 8554]
 [   0    0    0 9845 4418 3492 3070 2583]
 [   0    0    0 7824 5125  758 9891 7258]
 [   0    0    0    0 2529 5216 8219 3070]]


In [9]:
# Creating 10 dimensions for each word => for 0 --> 10 values, 5125 --> 10 values,......
dim=10

In [10]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119
    0.03300105  0.04641673 -0.03924444 -0.02542899  0.04065888]
  [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119
    0.03300105  0.04641673 -0.03924444 -0.02542899  0.04065888]
  [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119
    0.03300105  0.04641673 -0.03924444 -0.02542899  0.04065888]
  [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119
    0.03300105  0.04641673 -0.03924444 -0.02542899  0.04065888]
  [-0.04152884 -0.04226343  0.0030555   0.04605554  0.0234573
   -0.03590945  0.03600157 -0.03494368 -0.04868668 -0.04668662]
  [ 0.0418947   0.04768052 -0.01448528  0.03769627  0.01354904
    0.03181711  0.01394569  0.03454981  0.03169801  0.01520202]
  [ 0.02941816  0.04838451  0.01430439  0.04617889 -0.01274301
   -0.03524004  0.02816161  0.04040783 -0.02296486  0.02338127]
  [-0.0485844  -0.00498611  0.03708175  0.04445534  0.02660364
   -0.00306512  0.04341615 -0.04095807 -0.0200224

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 5125, 7949, 9891, 3557], dtype=int32)

In [14]:
print(model.predict(embedded_docs)[0])

[[-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119  0.03300105
   0.04641673 -0.03924444 -0.02542899  0.04065888]
 [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119  0.03300105
   0.04641673 -0.03924444 -0.02542899  0.04065888]
 [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119  0.03300105
   0.04641673 -0.03924444 -0.02542899  0.04065888]
 [-0.0070396   0.01518467 -0.02733507  0.0434821  -0.04418119  0.03300105
   0.04641673 -0.03924444 -0.02542899  0.04065888]
 [-0.04152884 -0.04226343  0.0030555   0.04605554  0.0234573  -0.03590945
   0.03600157 -0.03494368 -0.04868668 -0.04668662]
 [ 0.0418947   0.04768052 -0.01448528  0.03769627  0.01354904  0.03181711
   0.01394569  0.03454981  0.03169801  0.01520202]
 [ 0.02941816  0.04838451  0.01430439  0.04617889 -0.01274301 -0.03524004
   0.02816161  0.04040783 -0.02296486  0.02338127]
 [-0.0485844  -0.00498611  0.03708175  0.04445534  0.02660364 -0.00306512
   0.04341615 -0.04095807 -0.02002249 -0.02507227]]