In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
## sentences
sent = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]

In [5]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [6]:
## Define vocabulary size
voc_size = 10000

In [7]:
## One hot representation
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr

[[6408, 274, 1441, 6398],
 [6408, 274, 1441, 769],
 [6408, 8524, 1441, 4214],
 [1439, 972, 7234, 3948, 6728],
 [1439, 972, 7234, 3948, 2662],
 [1064, 6408, 3706, 1441, 3378],
 [6424, 2868, 6875, 3948]]

In [10]:
## Word embedding representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

import numpy as np

In [13]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 6408  274 1441 6398]
 [   0    0    0    0 6408  274 1441  769]
 [   0    0    0    0 6408 8524 1441 4214]
 [   0    0    0 1439  972 7234 3948 6728]
 [   0    0    0 1439  972 7234 3948 2662]
 [   0    0    0 1064 6408 3706 1441 3378]
 [   0    0    0    0 6424 2868 6875 3948]]


In [14]:
## feature representation
dim = 10

In [16]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_length))
model.compile(optimizer='adam', loss='mse')




In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
model.predict(embedded_docs)[0]



array([[ 0.02414849, -0.03012638,  0.04867486, -0.04793939,  0.02923149,
         0.02476618, -0.04032129,  0.00213636,  0.00691742, -0.00754889],
       [ 0.02414849, -0.03012638,  0.04867486, -0.04793939,  0.02923149,
         0.02476618, -0.04032129,  0.00213636,  0.00691742, -0.00754889],
       [ 0.02414849, -0.03012638,  0.04867486, -0.04793939,  0.02923149,
         0.02476618, -0.04032129,  0.00213636,  0.00691742, -0.00754889],
       [ 0.02414849, -0.03012638,  0.04867486, -0.04793939,  0.02923149,
         0.02476618, -0.04032129,  0.00213636,  0.00691742, -0.00754889],
       [-0.03882322, -0.0245204 ,  0.0409398 ,  0.04327375, -0.01878547,
         0.02366594, -0.01987461,  0.02498956,  0.01987013, -0.03823682],
       [-0.00804259,  0.01970047, -0.04978743,  0.04726375, -0.02773323,
        -0.03562165,  0.03684862,  0.04319472,  0.03772352, -0.00397316],
       [ 0.02865184, -0.03902243,  0.0458759 , -0.00443899, -0.03867054,
         0.03220526,  0.0441171 , -0.00771819