#  Word Embedding Techniques using Embedding Layer in Keras

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

## Original text

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
### Vocabulary size
voc_size=10000

##  One Hot Representation

In [5]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[2479, 9010, 6340, 2708], [2479, 9010, 6340, 8116], [2479, 9183, 6340, 1910], [5291, 5091, 3613, 1816, 618], [5291, 5091, 3613, 1816, 6816], [6200, 2479, 3485, 6340, 2130], [3545, 6845, 1690, 1816]]


## Word Embedding Represntation

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [8]:
#Setting length of each sentences
sent_length=6

#Padding is necessary to make the sentences of same length
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0 2479 9010 6340 2708]
 [   0    0 2479 9010 6340 8116]
 [   0    0 2479 9183 6340 1910]
 [   0 5291 5091 3613 1816  618]
 [   0 5291 5091 3613 1816 6816]
 [   0 6200 2479 3485 6340 2130]
 [   0    0 3545 6845 1690 1816]]


In [9]:
# Setting Dimensions
dim=10

In [10]:
model=Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 6, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112
   -0.00248778 -0.00167231  0.01287413  0.01603267 -0.03193973]
  [ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112
   -0.00248778 -0.00167231  0.01287413  0.01603267 -0.03193973]
  [ 0.02762694 -0.02858141  0.0022598   0.02232992  0.01818904
    0.04489087 -0.02411619 -0.0379663   0.00086417 -0.03850863]
  [-0.01667928  0.04007561  0.03153784 -0.00309895  0.02855412
   -0.01503282 -0.01575247 -0.02208949 -0.01922837 -0.02588023]
  [-0.00703995 -0.01440049 -0.03396499  0.03260211  0.01761958
   -0.04316157  0.01216493 -0.01882537  0.03656668  0.02081913]
  [-0.0397227   0.02500897  0.0446477   0.01056538 -0.03773441
   -0.00172304 -0.0058098  -0.01042762 -0.02518382  0.00697591]]

 [[ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112
   -0.00248778 -0.00167231  0.01287413  0.01603267 -0.03193973]
  [ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112
   -0.00248778 -0.00167231  0.01287413  0.0160

In [13]:
embedded_docs[0]

array([   0,    0, 2479, 9010, 6340, 2708])

In [14]:
#Each word from word embedding Representation in getting converted to 10 vectors!
#Above cell vector representation is shown below
print(model.predict(embedded_docs)[0])

[[ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112 -0.00248778
  -0.00167231  0.01287413  0.01603267 -0.03193973]
 [ 0.02830229  0.04752325 -0.02123218 -0.03643661  0.00885112 -0.00248778
  -0.00167231  0.01287413  0.01603267 -0.03193973]
 [ 0.02762694 -0.02858141  0.0022598   0.02232992  0.01818904  0.04489087
  -0.02411619 -0.0379663   0.00086417 -0.03850863]
 [-0.01667928  0.04007561  0.03153784 -0.00309895  0.02855412 -0.01503282
  -0.01575247 -0.02208949 -0.01922837 -0.02588023]
 [-0.00703995 -0.01440049 -0.03396499  0.03260211  0.01761958 -0.04316157
   0.01216493 -0.01882537  0.03656668  0.02081913]
 [-0.0397227   0.02500897  0.0446477   0.01056538 -0.03773441 -0.00172304
  -0.0058098  -0.01042762 -0.02518382  0.00697591]]


#### LINK to refer --> https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/