In [20]:
from tensorflow.keras.preprocessing.text import one_hot

In [21]:
### sentences
sentences=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [22]:
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [23]:
## Define the vocabulary size
voc_size=10000

In [24]:
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [25]:
# One Hot Representation of each sentence. 
one_hot_repr=[one_hot(words,voc_size) for words in sentences]
one_hot_repr

[[6812, 9322, 4221, 488],
 [6812, 9322, 4221, 5795],
 [6812, 2104, 4221, 821],
 [4420, 1948, 3133, 9077, 4427],
 [4420, 1948, 3133, 9077, 4541],
 [6148, 6812, 7842, 4221, 788],
 [6630, 2478, 803, 9077]]

In [26]:
## word Embedding Representation

from tensorflow.keras.layers import Embedding, Input
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [27]:
sentences_length=len(sentences)
# Padding the sentences to make them of equal length. Using 0 as padding
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sentences_length)
print(embedded_docs)

[[   0    0    0 6812 9322 4221  488]
 [   0    0    0 6812 9322 4221 5795]
 [   0    0    0 6812 2104 4221  821]
 [   0    0 4420 1948 3133 9077 4427]
 [   0    0 4420 1948 3133 9077 4541]
 [   0    0 6148 6812 7842 4221  788]
 [   0    0    0 6630 2478  803 9077]]


In [28]:
## feature representation, 10 dimensional vector
dimensions=10

In [29]:
model=Sequential()
# Add an Input layer to explicitly define the input shape
model.add(Input(shape=(sentences_length,)))
model.add(Embedding(voc_size,dimensions,input_length=sentences_length))
model.compile('adam','mse')



In [30]:
model.summary()

In [31]:
# Each word is presented by 10 dimensions, and each sentence is represented by 8 words.
# So the whole corpus is represented by a 3D array.
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


array([[[-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.01256957,  0.03839036,  0.0121214 ,  0.03529363,
         -0.01346838, -0.02092313, -0.0017042 ,  0.00325852,
         -0.03350148, -0.03550392],
        [-0.01516712,  0.04828042,  0.03207307, -0.02295617,
          0.00418943,  0.03563832,  0.01290416,  0.04745105,
         -0.02150483,  0.04985965],
        [ 0.0457033 , -0.03071065, -0.02975959, -0.04828147,
          0.00819664, -0.01200432, -0.02476536,  0.01563838,
         -0.01344535,  0.00941537],
        [-0.01499474,  0.04395353, -0.0130306 ,  0.0

In [32]:
embedded_docs[0]

array([   0,    0,    0, 6812, 9322, 4221,  488], dtype=int32)

In [33]:
# Convert the single document to a batch with one sample
single_sample = np.expand_dims(embedded_docs[0], axis=0)
single_sample

array([[   0,    0,    0, 6812, 9322, 4221,  488]], dtype=int32)

In [34]:


# Predict using the model
prediction = model.predict(single_sample)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


array([[[-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.03924055,  0.00573438,  0.02859208,  0.00999935,
         -0.02867278, -0.0340052 , -0.00041204,  0.0473137 ,
          0.04807879, -0.00086183],
        [-0.01256957,  0.03839036,  0.0121214 ,  0.03529363,
         -0.01346838, -0.02092313, -0.0017042 ,  0.00325852,
         -0.03350148, -0.03550392],
        [-0.01516712,  0.04828042,  0.03207307, -0.02295617,
          0.00418943,  0.03563832,  0.01290416,  0.04745105,
         -0.02150483,  0.04985965],
        [ 0.0457033 , -0.03071065, -0.02975959, -0.04828147,
          0.00819664, -0.01200432, -0.02476536,  0.01563838,
         -0.01344535,  0.00941537],
        [-0.01499474,  0.04395353, -0.0130306 ,  0.0