In [28]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential

In [29]:
sentences = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]

In [30]:
voc_size = 10000

In [31]:
one_hot_repr = [one_hot(words, voc_size) for words in sentences]
print("One-hot encoded sentences:")
print(one_hot_repr)

One-hot encoded sentences:
[[4457, 1702, 6900, 7314], [4457, 1702, 6900, 3458], [4457, 7119, 6900, 1793], [5069, 5141, 5596, 3782, 1822], [5069, 5141, 5596, 3782, 4245], [5823, 4457, 7264, 6900, 6299], [4370, 6610, 5729, 3782]]


In [32]:
sent_length = 8  # Fixed length for all sentences
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print("Padded sequences:")
print(embedded_docs)

Padded sequences:
[[   0    0    0    0 4457 1702 6900 7314]
 [   0    0    0    0 4457 1702 6900 3458]
 [   0    0    0    0 4457 7119 6900 1793]
 [   0    0    0 5069 5141 5596 3782 1822]
 [   0    0    0 5069 5141 5596 3782 4245]
 [   0    0    0 5823 4457 7264 6900 6299]
 [   0    0    0    0 4370 6610 5729 3782]]


In [33]:
dim = 10

In [34]:
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=dim, input_length=sent_length))
model.build(input_shape=(None, sent_length))  # Force model building
model.compile(optimizer='adam', loss='mse')
model.summary()

In [40]:
import numpy as np
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


array([[[-4.29514311e-02,  4.04275395e-02,  2.65466683e-02,
         -4.12707403e-03,  2.86945142e-02,  6.60086796e-03,
         -9.28219408e-03, -4.45008539e-02,  3.62881087e-02,
          3.73176001e-02],
        [-4.29514311e-02,  4.04275395e-02,  2.65466683e-02,
         -4.12707403e-03,  2.86945142e-02,  6.60086796e-03,
         -9.28219408e-03, -4.45008539e-02,  3.62881087e-02,
          3.73176001e-02],
        [-4.29514311e-02,  4.04275395e-02,  2.65466683e-02,
         -4.12707403e-03,  2.86945142e-02,  6.60086796e-03,
         -9.28219408e-03, -4.45008539e-02,  3.62881087e-02,
          3.73176001e-02],
        [-4.29514311e-02,  4.04275395e-02,  2.65466683e-02,
         -4.12707403e-03,  2.86945142e-02,  6.60086796e-03,
         -9.28219408e-03, -4.45008539e-02,  3.62881087e-02,
          3.73176001e-02],
        [-4.30595875e-02, -2.22280622e-02,  1.65790953e-02,
         -4.53306101e-02, -2.59096976e-02, -1.99819934e-02,
         -1.38526037e-03,  1.82220452e-02,  2.781852

In [43]:
# Predict for a single sentence
example_input = np.expand_dims(embedded_docs[0], axis=0)  # Add batch dimension
single_prediction = model.predict(example_input)
print("Prediction for the first sentence:")
print(single_prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step
Prediction for the first sentence:
[[[-0.04295143  0.04042754  0.02654667 -0.00412707  0.02869451
    0.00660087 -0.00928219 -0.04450085  0.03628811  0.0373176 ]
  [-0.04295143  0.04042754  0.02654667 -0.00412707  0.02869451
    0.00660087 -0.00928219 -0.04450085  0.03628811  0.0373176 ]
  [-0.04295143  0.04042754  0.02654667 -0.00412707  0.02869451
    0.00660087 -0.00928219 -0.04450085  0.03628811  0.0373176 ]
  [-0.04295143  0.04042754  0.02654667 -0.00412707  0.02869451
    0.00660087 -0.00928219 -0.04450085  0.03628811  0.0373176 ]
  [-0.04305959 -0.02222806  0.0165791  -0.04533061 -0.0259097
   -0.01998199 -0.00138526  0.01822205  0.02781853 -0.03817584]
  [-0.01986421  0.04231245 -0.04498339  0.02086207  0.04164578
   -0.03098351  0.02808762  0.03411118  0.02367078 -0.04056127]
  [-0.00169193 -0.00049477 -0.03570456 -0.0143706   0.00604173
    0.0218801   0.0416323  -0.00129708 -0.01108887 -0.02019559]
  [ 