## **Word Embedding Techniques using Embedding Layer in Keras**
---
---

### Connect GPU Please

In [4]:
import tensorflow as tf

In [5]:
from tensorflow.keras.preprocessing.text import one_hot

### **Define Sentences Example**

In [6]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [7]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [9]:
# Define Vocabulary Size

voc_size = 500

## **One Hot Representation**

In [12]:
onehot_rep = [one_hot(words, voc_size)for words in sent]
print(onehot_rep)

[[333, 112, 490, 204], [333, 112, 490, 382], [333, 276, 490, 51], [27, 277, 5, 298, 23], [27, 277, 5, 298, 443], [461, 333, 77, 490, 172], [35, 214, 308, 298]]


## **Word Embedding Representation - Pre Padding**

In [23]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

sent_length=8
embedded_docs=pad_sequences(onehot_rep,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0 333 112 490 204]
 [  0   0   0   0 333 112 490 382]
 [  0   0   0   0 333 276 490  51]
 [  0   0   0  27 277   5 298  23]
 [  0   0   0  27 277   5 298 443]
 [  0   0   0 461 333  77 490 172]
 [  0   0   0   0  35 214 308 298]]


## **Model Preparation**

In [24]:
## Feature Dimension

dim = 10

In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

In [26]:
model = Sequential()
model.add(Embedding (voc_size, 10, input_length = sent_length))
model.compile('adam', 'mse')

In [27]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5000 (19.53 KB)
Trainable params: 5000 (19.53 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
##'the glass of milk',
embedded_docs[0]

array([  0,   0,   0,   0, 333, 112, 490, 204], dtype=int32)

In [29]:
model.predict(embedded_docs[0])



array([[ 0.02253313,  0.02364678,  0.03055363, -0.02770517,  0.03163827,
        -0.04949279, -0.04172983, -0.01153203,  0.0097811 ,  0.0315769 ],
       [ 0.02253313,  0.02364678,  0.03055363, -0.02770517,  0.03163827,
        -0.04949279, -0.04172983, -0.01153203,  0.0097811 ,  0.0315769 ],
       [ 0.02253313,  0.02364678,  0.03055363, -0.02770517,  0.03163827,
        -0.04949279, -0.04172983, -0.01153203,  0.0097811 ,  0.0315769 ],
       [ 0.02253313,  0.02364678,  0.03055363, -0.02770517,  0.03163827,
        -0.04949279, -0.04172983, -0.01153203,  0.0097811 ,  0.0315769 ],
       [-0.03002279, -0.02872071, -0.0114452 ,  0.0392057 ,  0.037064  ,
         0.0421629 ,  0.01543096, -0.01658965, -0.00745425,  0.04419034],
       [ 0.04986066, -0.02108379, -0.02789565,  0.00061845, -0.03768307,
        -0.0469161 ,  0.00305456,  0.03591101, -0.02435069,  0.03101785],
       [-0.03560441,  0.02631371, -0.01965072, -0.03698667,  0.02919558,
        -0.01031271, -0.01877086,  0.0088012 

In [30]:
print(model.predict(embedded_docs))

[[[ 0.02253313  0.02364678  0.03055363 -0.02770517  0.03163827
   -0.04949279 -0.04172983 -0.01153203  0.0097811   0.0315769 ]
  [ 0.02253313  0.02364678  0.03055363 -0.02770517  0.03163827
   -0.04949279 -0.04172983 -0.01153203  0.0097811   0.0315769 ]
  [ 0.02253313  0.02364678  0.03055363 -0.02770517  0.03163827
   -0.04949279 -0.04172983 -0.01153203  0.0097811   0.0315769 ]
  [ 0.02253313  0.02364678  0.03055363 -0.02770517  0.03163827
   -0.04949279 -0.04172983 -0.01153203  0.0097811   0.0315769 ]
  [-0.03002279 -0.02872071 -0.0114452   0.0392057   0.037064
    0.0421629   0.01543096 -0.01658965 -0.00745425  0.04419034]
  [ 0.04986066 -0.02108379 -0.02789565  0.00061845 -0.03768307
   -0.0469161   0.00305456  0.03591101 -0.02435069  0.03101785]
  [-0.03560441  0.02631371 -0.01965072 -0.03698667  0.02919558
   -0.01031271 -0.01877086  0.0088012   0.03057882 -0.02010313]
  [-0.03029221 -0.00881622  0.01263868 -0.03177278 -0.03300505
   -0.04584241  0.03215871 -0.02265584  0.0177642 