### Word Embedding Techniques using Embedding Layer in Keras

In [1]:
### Libraries USed Tensorflow> 2.0  and keras

In [2]:
# tensorflow >2.0
from tensorflow.keras.preprocessing.text import one_hot


In [3]:
# sentences
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good',]


In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
# Vocabulary size
voc_size = 10000


#### One Hot Representation

In [6]:
onehot_repr = [one_hot(words, voc_size)for words in sent]
print(onehot_repr)


[[3679, 2622, 9771, 8424], [3679, 2622, 9771, 5848], [3679, 1820, 9771, 8439], [857, 4866, 4049, 2023, 3861], [857, 4866, 4049, 2023, 8917], [8745, 3679, 4718, 9771, 7494], [1475, 2097, 1292, 2023]]


### Word Embedding Represntation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential


In [8]:
import numpy as np

In [9]:
sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)


[[   0    0    0    0 3679 2622 9771 8424]
 [   0    0    0    0 3679 2622 9771 5848]
 [   0    0    0    0 3679 1820 9771 8439]
 [   0    0    0  857 4866 4049 2023 3861]
 [   0    0    0  857 4866 4049 2023 8917]
 [   0    0    0 8745 3679 4718 9771 7494]
 [   0    0    0    0 1475 2097 1292 2023]]


In [10]:
dim=10


In [11]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_length))
model.compile('adam', 'mse')


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
print(model.predict(embedded_docs))

[[[-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238
    0.01374699 -0.02790443  0.02586663 -0.01286371  0.00271822]
  [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238
    0.01374699 -0.02790443  0.02586663 -0.01286371  0.00271822]
  [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238
    0.01374699 -0.02790443  0.02586663 -0.01286371  0.00271822]
  [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238
    0.01374699 -0.02790443  0.02586663 -0.01286371  0.00271822]
  [ 0.03625422 -0.01301678  0.00297958 -0.00105896  0.01619231
    0.02919498  0.02268193  0.03664701 -0.00360162 -0.01114527]
  [-0.01138156 -0.03825836  0.00125705  0.01672653  0.04771587
    0.01555388  0.00626968 -0.01153215 -0.02780624 -0.0132091 ]
  [-0.03938477 -0.01883823 -0.01837931 -0.00062984  0.04774848
   -0.03978508  0.01576329 -0.0176021  -0.00240756 -0.01968031]
  [ 0.04164816  0.00338339 -0.04680823  0.02771819  0.03334801
   -0.04188005 -0.00035033 -0.04934472 -0.04529377 -

In [14]:
embedded_docs[0]

array([   0,    0,    0,    0, 3679, 2622, 9771, 8424])

In [15]:
print(model.predict(embedded_docs)[0])

[[-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238   0.01374699
  -0.02790443  0.02586663 -0.01286371  0.00271822]
 [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238   0.01374699
  -0.02790443  0.02586663 -0.01286371  0.00271822]
 [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238   0.01374699
  -0.02790443  0.02586663 -0.01286371  0.00271822]
 [-0.0055091  -0.02331215  0.00901954 -0.04068003 -0.0174238   0.01374699
  -0.02790443  0.02586663 -0.01286371  0.00271822]
 [ 0.03625422 -0.01301678  0.00297958 -0.00105896  0.01619231  0.02919498
   0.02268193  0.03664701 -0.00360162 -0.01114527]
 [-0.01138156 -0.03825836  0.00125705  0.01672653  0.04771587  0.01555388
   0.00626968 -0.01153215 -0.02780624 -0.0132091 ]
 [-0.03938477 -0.01883823 -0.01837931 -0.00062984  0.04774848 -0.03978508
   0.01576329 -0.0176021  -0.00240756 -0.01968031]
 [ 0.04164816  0.00338339 -0.04680823  0.02771819  0.03334801 -0.04188005
  -0.00035033 -0.04934472 -0.04529377 -0.00812439]]