### Word Embedding technique using Embedding layer in keras

In [12]:
from tensorflow.keras.preprocessing.text import one_hot

In [13]:
sent = ['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [14]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [16]:
### Vocabulary size
voc_size = 10000

### One hot representation

In [17]:
onehot = [one_hot(words, voc_size)for words in sent]
print(onehot)

[[1485, 2195, 9861, 4962], [1485, 2195, 9861, 3264], [1485, 1931, 9861, 3853], [2981, 5529, 3771, 2276, 8525], [2981, 5529, 3771, 2276, 7603], [6414, 1485, 7697, 9861, 6893], [5104, 7634, 9747, 2276]]


In [21]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [23]:
sent_len = 8
embedded_docs = pad_sequences(onehot, padding='pre', maxlen=sent_len)
print(embedded_docs)

[[   0    0    0    0 1485 2195 9861 4962]
 [   0    0    0    0 1485 2195 9861 3264]
 [   0    0    0    0 1485 1931 9861 3853]
 [   0    0    0 2981 5529 3771 2276 8525]
 [   0    0    0 2981 5529 3771 2276 7603]
 [   0    0    0 6414 1485 7697 9861 6893]
 [   0    0    0    0 5104 7634 9747 2276]]


In [24]:
dim = 10

In [26]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length=sent_len))
model.compile('adam', 'mse')

In [27]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [28]:
print(model.predict(embedded_docs))

[[[-2.7386487e-02 -2.8704191e-02 -6.7861788e-03  2.7141783e-02
   -3.3525310e-02 -4.2268503e-02 -3.2890595e-02  4.3552842e-02
   -4.3854654e-02  1.8885102e-02]
  [-2.7386487e-02 -2.8704191e-02 -6.7861788e-03  2.7141783e-02
   -3.3525310e-02 -4.2268503e-02 -3.2890595e-02  4.3552842e-02
   -4.3854654e-02  1.8885102e-02]
  [-2.7386487e-02 -2.8704191e-02 -6.7861788e-03  2.7141783e-02
   -3.3525310e-02 -4.2268503e-02 -3.2890595e-02  4.3552842e-02
   -4.3854654e-02  1.8885102e-02]
  [-2.7386487e-02 -2.8704191e-02 -6.7861788e-03  2.7141783e-02
   -3.3525310e-02 -4.2268503e-02 -3.2890595e-02  4.3552842e-02
   -4.3854654e-02  1.8885102e-02]
  [ 1.2480795e-02  1.1499263e-02  4.0355552e-02 -2.8767586e-03
   -2.7539134e-02 -4.9231566e-02  1.7446671e-02 -3.5633218e-02
    4.4672672e-02 -1.5227724e-02]
  [-2.0238305e-02  8.7920800e-03  3.9380465e-02  3.7217047e-02
   -3.4308590e-02 -7.8883879e-03  4.5314804e-03 -1.2002364e-03
    4.9104691e-03 -2.3733020e-02]
  [-1.8017747e-02  8.4364042e-03  3.6935

In [29]:
embedded_docs[0]

array([   0,    0,    0,    0, 1485, 2195, 9861, 4962])

In [30]:
print(model.predict(embedded_docs)[0])

[[-0.02738649 -0.02870419 -0.00678618  0.02714178 -0.03352531 -0.0422685
  -0.0328906   0.04355284 -0.04385465  0.0188851 ]
 [-0.02738649 -0.02870419 -0.00678618  0.02714178 -0.03352531 -0.0422685
  -0.0328906   0.04355284 -0.04385465  0.0188851 ]
 [-0.02738649 -0.02870419 -0.00678618  0.02714178 -0.03352531 -0.0422685
  -0.0328906   0.04355284 -0.04385465  0.0188851 ]
 [-0.02738649 -0.02870419 -0.00678618  0.02714178 -0.03352531 -0.0422685
  -0.0328906   0.04355284 -0.04385465  0.0188851 ]
 [ 0.0124808   0.01149926  0.04035555 -0.00287676 -0.02753913 -0.04923157
   0.01744667 -0.03563322  0.04467267 -0.01522772]
 [-0.0202383   0.00879208  0.03938046  0.03721705 -0.03430859 -0.00788839
   0.00453148 -0.00120024  0.00491047 -0.02373302]
 [-0.01801775  0.0084364   0.03693532  0.019652    0.03119728 -0.02495444
  -0.04503194 -0.00354055 -0.00675451  0.01392961]
 [ 0.00557112 -0.03923676 -0.01346495  0.0297405  -0.01923785  0.04201258
   0.02973611  0.02511423 -0.00300621 -0.041934  ]]
