In [4]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sent = [
    'the glass of milk',
    'the glass of orange juice',
    'the cup of tea',
    'I am a student',
    'I am a good developer',
    'Understanding the problem',
    'This is a good example',
]
sent

['the glass of milk',
 'the glass of orange juice',
 'the cup of tea',
 'I am a student',
 'I am a good developer',
 'Understanding the problem',
 'This is a good example']

In [5]:
# define the vocabulary size
vocab_size = 10000

# one-hot representation
one_hot_repr = [one_hot(words, vocab_size) for words in sent]
one_hot_repr

[[9806, 247, 5293, 6753],
 [9806, 247, 5293, 4513, 9053],
 [9806, 2025, 5293, 8778],
 [9021, 5845, 4561, 7823],
 [9021, 5845, 4561, 3250, 2013],
 [5589, 9806, 9019],
 [5517, 6304, 4561, 3250, 9933]]

In [8]:
# word embedding representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

import numpy as np

In [11]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, maxlen=sent_length, padding='pre')
embedded_docs

array([[   0,    0,    0,    0, 9806,  247, 5293, 6753],
       [   0,    0,    0, 9806,  247, 5293, 4513, 9053],
       [   0,    0,    0,    0, 9806, 2025, 5293, 8778],
       [   0,    0,    0,    0, 9021, 5845, 4561, 7823],
       [   0,    0,    0, 9021, 5845, 4561, 3250, 2013],
       [   0,    0,    0,    0,    0, 5589, 9806, 9019],
       [   0,    0,    0, 5517, 6304, 4561, 3250, 9933]])

In [12]:
# feature representation
dim = 10

In [13]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=sent_length))
model.compile('adam', 'mse')

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.predict(embedded_docs)

array([[[-0.01923426,  0.02396462,  0.04020219,  0.04571543,
          0.02504296,  0.04664213, -0.04997855,  0.03593931,
         -0.02757558,  0.01338838],
        [-0.01923426,  0.02396462,  0.04020219,  0.04571543,
          0.02504296,  0.04664213, -0.04997855,  0.03593931,
         -0.02757558,  0.01338838],
        [-0.01923426,  0.02396462,  0.04020219,  0.04571543,
          0.02504296,  0.04664213, -0.04997855,  0.03593931,
         -0.02757558,  0.01338838],
        [-0.01923426,  0.02396462,  0.04020219,  0.04571543,
          0.02504296,  0.04664213, -0.04997855,  0.03593931,
         -0.02757558,  0.01338838],
        [-0.0092919 ,  0.02203036, -0.03255007,  0.04799148,
          0.01324851,  0.04069635,  0.01836823, -0.01850816,
         -0.02284502, -0.01177831],
        [-0.02625424,  0.01592163, -0.00513256, -0.03401368,
         -0.00464161, -0.01960353,  0.02762685,  0.00094493,
         -0.00267522,  0.03340397],
        [-0.01875826, -0.04130087,  0.00202908,  0.0

In [16]:
embedded_docs[0]

array([   0,    0,    0,    0, 9806,  247, 5293, 6753])