In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
## Define the vocabulary size
# this is the number of unique words in the vocabulary
voc_size=10000

In [41]:
### One Hot Representation
# one hot takes a word and converts it into a vector of size voc_size
one_hot_repr=[one_hot(words,voc_size)for words in sent]
one_hot_repr

[[3103, 73, 4717, 509],
 [3103, 73, 4717, 7972],
 [3103, 393, 4717, 5064],
 [3460, 7991, 604, 1308, 7762],
 [3460, 7991, 604, 1308, 1031],
 [5202, 3103, 4219, 4717, 7195],
 [9639, 5945, 335, 1308]]

In [42]:
## word Embedding Representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [None]:
# Pad the sequences to ensure that all sentences have the same length 
# this is done by adding zeros to the beginning or at the end of the sequence to make all sequences of length sent_length

sent_length=8
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 3103   73 4717  509]
 [   0    0    0    0 3103   73 4717 7972]
 [   0    0    0    0 3103  393 4717 5064]
 [   0    0    0 3460 7991  604 1308 7762]
 [   0    0    0 3460 7991  604 1308 1031]
 [   0    0    0 5202 3103 4219 4717 7195]
 [   0    0    0    0 9639 5945  335 1308]]


In [None]:
## feature representation
#Each word is represented by a vector of size 10
dim = 10

In [36]:
## word embedding representation
# here we are using the embedding layer of keras
# the embedding layer takes the input as the index of the word in the vocabulary
# and outputs the embedding vector of size dim
model=Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')



In [None]:
model.build(input_shape=(None, sent_length))
model.summary()

In [44]:
embedded_docs

array([[   0,    0,    0,    0, 3103,   73, 4717,  509],
       [   0,    0,    0,    0, 3103,   73, 4717, 7972],
       [   0,    0,    0,    0, 3103,  393, 4717, 5064],
       [   0,    0,    0, 3460, 7991,  604, 1308, 7762],
       [   0,    0,    0, 3460, 7991,  604, 1308, 1031],
       [   0,    0,    0, 5202, 3103, 4219, 4717, 7195],
       [   0,    0,    0,    0, 9639, 5945,  335, 1308]])

In [38]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step


array([[[ 2.53770463e-02,  1.49498917e-02,  7.35829026e-03,
          3.35447453e-02,  3.84448878e-02, -5.08405268e-04,
          9.04776901e-03,  3.81303914e-02, -1.05826482e-02,
          3.79535295e-02],
        [ 2.53770463e-02,  1.49498917e-02,  7.35829026e-03,
          3.35447453e-02,  3.84448878e-02, -5.08405268e-04,
          9.04776901e-03,  3.81303914e-02, -1.05826482e-02,
          3.79535295e-02],
        [ 2.53770463e-02,  1.49498917e-02,  7.35829026e-03,
          3.35447453e-02,  3.84448878e-02, -5.08405268e-04,
          9.04776901e-03,  3.81303914e-02, -1.05826482e-02,
          3.79535295e-02],
        [ 2.53770463e-02,  1.49498917e-02,  7.35829026e-03,
          3.35447453e-02,  3.84448878e-02, -5.08405268e-04,
          9.04776901e-03,  3.81303914e-02, -1.05826482e-02,
          3.79535295e-02],
        [-2.56086718e-02, -7.47118145e-03,  3.23604383e-02,
          3.89474146e-02,  3.73188592e-02,  3.27905081e-02,
          9.95839760e-03, -5.11999056e-03,  1.855589

In [39]:
embedded_docs[0]

array([   0,    0,    0,    0, 3103,   73, 4717,  509])

In [40]:
model.predict(embedded_docs[0].reshape(1,sent_length))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step


array([[[ 0.02537705,  0.01494989,  0.00735829,  0.03354475,
          0.03844489, -0.00050841,  0.00904777,  0.03813039,
         -0.01058265,  0.03795353],
        [ 0.02537705,  0.01494989,  0.00735829,  0.03354475,
          0.03844489, -0.00050841,  0.00904777,  0.03813039,
         -0.01058265,  0.03795353],
        [ 0.02537705,  0.01494989,  0.00735829,  0.03354475,
          0.03844489, -0.00050841,  0.00904777,  0.03813039,
         -0.01058265,  0.03795353],
        [ 0.02537705,  0.01494989,  0.00735829,  0.03354475,
          0.03844489, -0.00050841,  0.00904777,  0.03813039,
         -0.01058265,  0.03795353],
        [-0.02560867, -0.00747118,  0.03236044,  0.03894741,
          0.03731886,  0.03279051,  0.0099584 , -0.00511999,
          0.01855589,  0.01416532],
        [-0.00259968,  0.04167343,  0.04118284,  0.02623457,
         -0.04541204, -0.0436337 ,  0.03331913,  0.00035255,
          0.02343153,  0.04928071],
        [-0.02771413,  0.00115359,  0.02142804, -0.0