In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [6]:
## vocabulary size
voc_size = 10000

In [7]:
## sentences
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [8]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

# One Hot Representation 

In [9]:
# we will get the indexes

onehot_repr=[one_hot(words,voc_size)for words in sent]
print(onehot_repr)

[[7198, 6083, 6946, 6879], [7198, 6083, 6946, 2403], [7198, 85, 6946, 4984], [1764, 5027, 1881, 7530, 4599], [1764, 5027, 1881, 7530, 7019], [2811, 7198, 9479, 6946, 5513], [2684, 4469, 3948, 7530]]


# Word Embedding Representation 

In [10]:
## we are using pad_sequences because we have to make all the sentences size should be same

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [11]:
import numpy as np

In [12]:
## here we are giving size to all sentences which is padding. it will start with 0 and make all the sentence equal size

sent_length=8
embedded_docs= pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 7198 6083 6946 6879]
 [   0    0    0    0 7198 6083 6946 2403]
 [   0    0    0    0 7198   85 6946 4984]
 [   0    0    0 1764 5027 1881 7530 4599]
 [   0    0    0 1764 5027 1881 7530 7019]
 [   0    0    0 2811 7198 9479 6946 5513]
 [   0    0    0    0 2684 4469 3948 7530]]


In [13]:
# here we are giving the diminsions or features 
dim=10

In [14]:
# we are implementing the embedding layer with voc_size, dimension=10, sentence length=8
model = Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951
   -0.02305216 -0.03203966 -0.04330948  0.0064633   0.00987948]
  [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951
   -0.02305216 -0.03203966 -0.04330948  0.0064633   0.00987948]
  [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951
   -0.02305216 -0.03203966 -0.04330948  0.0064633   0.00987948]
  [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951
   -0.02305216 -0.03203966 -0.04330948  0.0064633   0.00987948]
  [-0.03321941  0.00623692 -0.04207294  0.02386386  0.03504859
    0.00585292  0.0136993   0.04226683  0.03298299  0.01994372]
  [ 0.03494786 -0.02591912 -0.03240041 -0.02581116  0.00252669
   -0.03294457  0.00451231 -0.0339483   0.01984968  0.01068091]
  [-0.04749812 -0.004496    0.02794597 -0.00421721 -0.01715671
    0.04947526  0.02648664  0.03917713  0.03079074 -0.0460734 ]
  [-0.00292041  0.00959351  0.00471687 -0.02358097  0.03198479
    0.04486436 -0.02319207  0.01363672  0.008398

In [17]:
embedded_docs[0]

array([   0,    0,    0,    0, 7198, 6083, 6946, 6879])

In [19]:
# embedding matrix

print(model.predict(embedded_docs)[0])

[[ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951 -0.02305216
  -0.03203966 -0.04330948  0.0064633   0.00987948]
 [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951 -0.02305216
  -0.03203966 -0.04330948  0.0064633   0.00987948]
 [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951 -0.02305216
  -0.03203966 -0.04330948  0.0064633   0.00987948]
 [ 0.00859893 -0.02293475  0.0240513  -0.01923424 -0.04814951 -0.02305216
  -0.03203966 -0.04330948  0.0064633   0.00987948]
 [-0.03321941  0.00623692 -0.04207294  0.02386386  0.03504859  0.00585292
   0.0136993   0.04226683  0.03298299  0.01994372]
 [ 0.03494786 -0.02591912 -0.03240041 -0.02581116  0.00252669 -0.03294457
   0.00451231 -0.0339483   0.01984968  0.01068091]
 [-0.04749812 -0.004496    0.02794597 -0.00421721 -0.01715671  0.04947526
   0.02648664  0.03917713  0.03079074 -0.0460734 ]
 [-0.00292041  0.00959351  0.00471687 -0.02358097  0.03198479  0.04486436
  -0.02319207  0.01363672  0.00839893 -0.02968538]]