### Word Embedding using Embedding Layer in Tensorflow

In [1]:
#Imports
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentences=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [3]:
# Vocabulary size
voc_size=10000

### One Hot Representation of the words of sentences

In [4]:
onehot_repr=[one_hot(words,voc_size)for words in sentences] 

In [5]:
onehot_repr

[[6591, 68, 6109, 6515],
 [6591, 68, 6109, 5666],
 [6591, 2827, 6109, 9448],
 [7777, 8569, 5126, 735, 6541],
 [7777, 8569, 5126, 735, 7969],
 [3245, 6591, 8167, 6109, 8860],
 [894, 2044, 4586, 735]]

### Word Embedding Represntation

In [6]:
# Imports
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

To make the each sentence of same length padding is done.

In [7]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)

In [8]:
embedded_docs

array([[   0,    0,    0,    0, 6591,   68, 6109, 6515],
       [   0,    0,    0,    0, 6591,   68, 6109, 5666],
       [   0,    0,    0,    0, 6591, 2827, 6109, 9448],
       [   0,    0,    0, 7777, 8569, 5126,  735, 6541],
       [   0,    0,    0, 7777, 8569, 5126,  735, 7969],
       [   0,    0,    0, 3245, 6591, 8167, 6109, 8860],
       [   0,    0,    0,    0,  894, 2044, 4586,  735]])

In [9]:
# Dimension of the each word when represented as vector
dim=10

In [10]:
model=Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[ 0.02876223 -0.01266718  0.03365619 -0.03290718  0.01985565
   -0.00429195 -0.03213429  0.04465527 -0.0029465  -0.01063075]
  [ 0.02876223 -0.01266718  0.03365619 -0.03290718  0.01985565
   -0.00429195 -0.03213429  0.04465527 -0.0029465  -0.01063075]
  [ 0.02876223 -0.01266718  0.03365619 -0.03290718  0.01985565
   -0.00429195 -0.03213429  0.04465527 -0.0029465  -0.01063075]
  [ 0.02876223 -0.01266718  0.03365619 -0.03290718  0.01985565
   -0.00429195 -0.03213429  0.04465527 -0.0029465  -0.01063075]
  [ 0.00936367 -0.04697746 -0.00582675 -0.02898883 -0.04666133
   -0.02745428  0.03898097 -0.01404786  0.03119533  0.0466743 ]
  [ 0.01909446  0.00896175  0.00649772 -0.02913356 -0.03575172
   -0.02246157 -0.00822574 -0.0158234   0.03836964  0.03144804]
  [ 0.0312984  -0.03500662  0.04555286 -0.03536637 -0.01149725
   -0.01003714 -0.00185329 -0.04409772  0.03972182 -0.043795  ]
  [-0.00126369  0.04125975  0.03944157  0.00516307  0.02001325
   -0.00214316  0.0242287  -0.04759758 -0.026140

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 6591,   68, 6109, 6515])

In [14]:
model.predict(embedded_docs)[0]

array([[ 0.02876223, -0.01266718,  0.03365619, -0.03290718,  0.01985565,
        -0.00429195, -0.03213429,  0.04465527, -0.0029465 , -0.01063075],
       [ 0.02876223, -0.01266718,  0.03365619, -0.03290718,  0.01985565,
        -0.00429195, -0.03213429,  0.04465527, -0.0029465 , -0.01063075],
       [ 0.02876223, -0.01266718,  0.03365619, -0.03290718,  0.01985565,
        -0.00429195, -0.03213429,  0.04465527, -0.0029465 , -0.01063075],
       [ 0.02876223, -0.01266718,  0.03365619, -0.03290718,  0.01985565,
        -0.00429195, -0.03213429,  0.04465527, -0.0029465 , -0.01063075],
       [ 0.00936367, -0.04697746, -0.00582675, -0.02898883, -0.04666133,
        -0.02745428,  0.03898097, -0.01404786,  0.03119533,  0.0466743 ],
       [ 0.01909446,  0.00896175,  0.00649772, -0.02913356, -0.03575172,
        -0.02246157, -0.00822574, -0.0158234 ,  0.03836964,  0.03144804],
       [ 0.0312984 , -0.03500662,  0.04555286, -0.03536637, -0.01149725,
        -0.01003714, -0.00185329, -0.04409772