## Word Embedding Techniques using Embedding Layer in Keras

In [1]:
##tensorflow >2.0
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
# Sentences
Sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'understand the meaning of words',
        'South America is the fourth largest country',]

In [3]:
Sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'understand the meaning of words',
 'South America is the fourth largest country']

In [4]:
### Vocabulary size
voc_size = 10000

***One hot representation***

In [5]:
one_hot_repr = [one_hot(words,voc_size)for words in Sent]
print(one_hot_repr)

[[7934, 9631, 4928, 8291], [7934, 9631, 4928, 706], [7934, 8892, 4928, 494], [3586, 3322, 9082, 8420, 6786], [8237, 7934, 8212, 4928, 4661], [507, 400, 1348, 7934, 187, 7261, 8332]]


**WORD Embedding Representation**

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np


In [8]:
sent_lenght = 10
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_lenght)
print(embedded_docs)

[[   0    0    0    0    0    0 7934 9631 4928 8291]
 [   0    0    0    0    0    0 7934 9631 4928  706]
 [   0    0    0    0    0    0 7934 8892 4928  494]
 [   0    0    0    0    0 3586 3322 9082 8420 6786]
 [   0    0    0    0    0 8237 7934 8212 4928 4661]
 [   0    0    0  507  400 1348 7934  187 7261 8332]]


In [9]:
dim =10

In [10]:
model = Sequential()
model.add(Embedding(voc_size,12,input_length=sent_lenght))
model.compile("adam", "mse")

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 10, 12)            120000    
Total params: 120,000
Trainable params: 120,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839
    0.04530302  0.03718981 -0.04820434  0.01778633  0.00334017
   -0.01935947 -0.01570432]
  [-0.01387974  0.03135752 -0.03246009 -0.00759978 -0.01011951
    0.0104962

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0,    0,    0, 7934, 9631, 4928, 8291],
      dtype=int32)

In [14]:
print(model.predict(embedded_docs)[0])

[[ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [ 0.04990384  0.01940347 -0.04554195 -0.01175528 -0.02714839  0.04530302
   0.03718981 -0.04820434  0.01778633  0.00334017 -0.01935947 -0.01570432]
 [-0.01387974  0.03135752 -0.03246009 -0.00759978 -0.01011951  0.01049626
   0.01508485  0.00110012  0.020