# Word Embedding Techniques using Embedding Layer in Keras

In [5]:
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [6]:
from tensorflow.keras.preprocessing.text import one_hot

In [7]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [8]:
### Vocabulary size
voc_size=500

# One Hot Representation

In [9]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[443, 220, 70, 251], [443, 220, 70, 180], [443, 48, 70, 15], [324, 131, 176, 397, 149], [324, 131, 176, 397, 186], [144, 443, 90, 70, 80], [103, 343, 283, 397]]


# Word Embedding Represntation

In [10]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences # for post or pre padding 
from tensorflow.keras.models import Sequential # for sequential neural network

In [11]:
import numpy as np

In [12]:
## pre padding
# we are doing padding to make all the sentences of same length
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0 443 220  70 251]
 [  0   0   0   0 443 220  70 180]
 [  0   0   0   0 443  48  70  15]
 [  0   0   0 324 131 176 397 149]
 [  0   0   0 324 131 176 397 186]
 [  0   0   0 144 443  90  70  80]
 [  0   0   0   0 103 343 283 397]]


In [13]:
## 10 feature dimesnions representation
dim=10

In [14]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length)) #adding embedding layer
model.compile('adam','mse') # here adam is an optimizer and mse is a loss function

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5,000
Trainable params: 5,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
##'the glass of milk',
embedded_docs[0]

array([  0,   0,   0,   0, 443, 220,  70, 251])

In [17]:
model.predict(embedded_docs[0])



array([[ 0.01443127, -0.01734231, -0.04515622, -0.03217013,  0.01302481,
        -0.00855223, -0.01143442,  0.04685256,  0.023409  ,  0.02838391],
       [ 0.01443127, -0.01734231, -0.04515622, -0.03217013,  0.01302481,
        -0.00855223, -0.01143442,  0.04685256,  0.023409  ,  0.02838391],
       [ 0.01443127, -0.01734231, -0.04515622, -0.03217013,  0.01302481,
        -0.00855223, -0.01143442,  0.04685256,  0.023409  ,  0.02838391],
       [ 0.01443127, -0.01734231, -0.04515622, -0.03217013,  0.01302481,
        -0.00855223, -0.01143442,  0.04685256,  0.023409  ,  0.02838391],
       [ 0.03783423,  0.00928871, -0.03506346, -0.02808001, -0.03684388,
        -0.02635704,  0.01290685, -0.00135878,  0.04621149, -0.0039367 ],
       [ 0.01885296,  0.04436511, -0.00283719,  0.03253331,  0.01013098,
        -0.01780714, -0.02910249, -0.03756069,  0.03674093,  0.03980203],
       [-0.02212195,  0.00402122,  0.00715335, -0.01900041, -0.02183359,
        -0.04285866,  0.03372612,  0.04574862

In [19]:
print(model.predict(embedded_docs))

[[[ 0.01443127 -0.01734231 -0.04515622 -0.03217013  0.01302481
   -0.00855223 -0.01143442  0.04685256  0.023409    0.02838391]
  [ 0.01443127 -0.01734231 -0.04515622 -0.03217013  0.01302481
   -0.00855223 -0.01143442  0.04685256  0.023409    0.02838391]
  [ 0.01443127 -0.01734231 -0.04515622 -0.03217013  0.01302481
   -0.00855223 -0.01143442  0.04685256  0.023409    0.02838391]
  [ 0.01443127 -0.01734231 -0.04515622 -0.03217013  0.01302481
   -0.00855223 -0.01143442  0.04685256  0.023409    0.02838391]
  [ 0.03783423  0.00928871 -0.03506346 -0.02808001 -0.03684388
   -0.02635704  0.01290685 -0.00135878  0.04621149 -0.0039367 ]
  [ 0.01885296  0.04436511 -0.00283719  0.03253331  0.01013098
   -0.01780714 -0.02910249 -0.03756069  0.03674093  0.03980203]
  [-0.02212195  0.00402122  0.00715335 -0.01900041 -0.02183359
   -0.04285866  0.03372612  0.04574862  0.01064173  0.02683348]
  [-0.02467957 -0.02736647  0.00146738  0.00445465  0.02796376
   -0.00014289 -0.03673381 -0.02878597 -0.026075