In [71]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential
# For creating padding (all the sentences should be same no. of words)
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [72]:
sentence = ['He is a good boy', 
            'Something good is cooking', 
            'I am very hungry', 
            'You need to understand how Tensorflow works',
            'Let study tensorflow', 
            'Machine Learning is the future',
            'Get me some biscuits', 
            'Cryptocurrency is the future of finance']

display(sentence)

['He is a good boy',
 'Something good is cooking',
 'I am very hungry',
 'You need to understand how Tensorflow works',
 'Let study tensorflow',
 'Machine Learning is the future',
 'Get me some biscuits',
 'Cryptocurrency is the future of finance']

In [73]:
# where 10000 is the vocabulary size i.e. dictionary
oneHot = [one_hot(word, 10000) for word in sentence]
display(oneHot)

[[7543, 199, 7464, 619, 2047],
 [6264, 619, 199, 4276],
 [3466, 1391, 9819, 6490],
 [7871, 8721, 781, 2740, 972, 4576, 341],
 [1136, 4606, 4576],
 [7019, 1074, 199, 6766, 8892],
 [1131, 4325, 9387, 1072],
 [4545, 199, 6766, 8892, 8041, 2866]]

In [74]:
# Padding all the sentences with length of 10. 
# Padding will be done at the start of every sentence.
oneHotPadded = pad_sequences(oneHot, padding='pre', maxlen=10)
oneHotPadded = np.array(oneHotPadded)
print(oneHotPadded)

[[   0    0    0    0    0 7543  199 7464  619 2047]
 [   0    0    0    0    0    0 6264  619  199 4276]
 [   0    0    0    0    0    0 3466 1391 9819 6490]
 [   0    0    0 7871 8721  781 2740  972 4576  341]
 [   0    0    0    0    0    0    0 1136 4606 4576]
 [   0    0    0    0    0 7019 1074  199 6766 8892]
 [   0    0    0    0    0    0 1131 4325 9387 1072]
 [   0    0    0    0 4545  199 6766 8892 8041 2866]]


In [77]:
# voc_size --> no. of dimension for every word in the sentence.
# input_length --> length of every sentence.
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim = 10, input_length=10))
model.compile('adam', 'mse')
display(model.summary())

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_20 (Embedding)    (None, 10, 10)            100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


None

In [81]:
model.predict(oneHotPadded[0])

array([[-0.04359294,  0.01269324, -0.02753763,  0.03517114,  0.01663211,
         0.00979983,  0.03896698, -0.04224716,  0.03092004, -0.02965454],
       [-0.04359294,  0.01269324, -0.02753763,  0.03517114,  0.01663211,
         0.00979983,  0.03896698, -0.04224716,  0.03092004, -0.02965454],
       [-0.04359294,  0.01269324, -0.02753763,  0.03517114,  0.01663211,
         0.00979983,  0.03896698, -0.04224716,  0.03092004, -0.02965454],
       [-0.04359294,  0.01269324, -0.02753763,  0.03517114,  0.01663211,
         0.00979983,  0.03896698, -0.04224716,  0.03092004, -0.02965454],
       [-0.04359294,  0.01269324, -0.02753763,  0.03517114,  0.01663211,
         0.00979983,  0.03896698, -0.04224716,  0.03092004, -0.02965454],
       [-0.02091444, -0.00288856,  0.03895693, -0.04648236,  0.04838778,
         0.01946336,  0.01361184,  0.00930878, -0.02307057, -0.04326379],
       [-0.02111969, -0.00157107,  0.0294031 , -0.03559632,  0.00358398,
         0.01082584, -0.00157455, -0.04401217

In [82]:
oneHotPadded[0]

array([   0,    0,    0,    0,    0, 7543,  199, 7464,  619, 2047],
      dtype=int32)