In [18]:
from tensorflow.keras.preprocessing.text import one_hot

In [19]:
sentences = [
    "I love programming in Python",
    "Deep learning is fascinating",
    "Natural Language Processing is a complex field",
    "I enjoy solving problems with code",
    "I love deep learning and AI"
]

In [20]:
# Defining the vocabulary size
vocab_size = 10_000

# one-hot encoding the sentences
one_hot_results = [one_hot(sentence, vocab_size) for sentence in sentences]
one_hot_results

[[1222, 6590, 5844, 9325, 2516],
 [8780, 9611, 6065, 8982],
 [8190, 3827, 8992, 6065, 9165, 3182, 9687],
 [1222, 5583, 3655, 4731, 9114, 9019],
 [1222, 6590, 8780, 9611, 5846, 7164]]

In [21]:
# word embedding representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [22]:
# pad sequences to ensure uniform input size
max_length = 10
embedded_docs = pad_sequences(one_hot_results, maxlen=max_length, padding='pre')
embedded_docs

array([[   0,    0,    0,    0,    0, 1222, 6590, 5844, 9325, 2516],
       [   0,    0,    0,    0,    0,    0, 8780, 9611, 6065, 8982],
       [   0,    0,    0, 8190, 3827, 8992, 6065, 9165, 3182, 9687],
       [   0,    0,    0,    0, 1222, 5583, 3655, 4731, 9114, 9019],
       [   0,    0,    0,    0, 1222, 6590, 8780, 9611, 5846, 7164]],
      dtype=int32)

In [26]:
# feature representation through embedding layer
dim = 10

model = Sequential()
model.add(Embedding(vocab_size, dim))
model.compile("adam", "mse")

In [27]:
model.summary()

In [32]:
# predicting
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


array([[[-2.77968496e-03, -4.63449471e-02, -1.73946992e-02,
         -2.42802259e-02,  1.80366077e-02,  1.09174624e-02,
          4.90615517e-03,  2.66758837e-02,  3.61892618e-02,
         -1.42380483e-02],
        [-2.77968496e-03, -4.63449471e-02, -1.73946992e-02,
         -2.42802259e-02,  1.80366077e-02,  1.09174624e-02,
          4.90615517e-03,  2.66758837e-02,  3.61892618e-02,
         -1.42380483e-02],
        [-2.77968496e-03, -4.63449471e-02, -1.73946992e-02,
         -2.42802259e-02,  1.80366077e-02,  1.09174624e-02,
          4.90615517e-03,  2.66758837e-02,  3.61892618e-02,
         -1.42380483e-02],
        [-2.77968496e-03, -4.63449471e-02, -1.73946992e-02,
         -2.42802259e-02,  1.80366077e-02,  1.09174624e-02,
          4.90615517e-03,  2.66758837e-02,  3.61892618e-02,
         -1.42380483e-02],
        [-2.77968496e-03, -4.63449471e-02, -1.73946992e-02,
         -2.42802259e-02,  1.80366077e-02,  1.09174624e-02,
          4.90615517e-03,  2.66758837e-02,  3.618926

In [29]:
embedded_docs[0]

array([   0,    0,    0,    0,    0, 1222, 6590, 5844, 9325, 2516],
      dtype=int32)

In [None]:
model.predict(np.array([embedded_docs[0]]))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


array([[[-0.00277968, -0.04634495, -0.0173947 , -0.02428023,
          0.01803661,  0.01091746,  0.00490616,  0.02667588,
          0.03618926, -0.01423805],
        [-0.00277968, -0.04634495, -0.0173947 , -0.02428023,
          0.01803661,  0.01091746,  0.00490616,  0.02667588,
          0.03618926, -0.01423805],
        [-0.00277968, -0.04634495, -0.0173947 , -0.02428023,
          0.01803661,  0.01091746,  0.00490616,  0.02667588,
          0.03618926, -0.01423805],
        [-0.00277968, -0.04634495, -0.0173947 , -0.02428023,
          0.01803661,  0.01091746,  0.00490616,  0.02667588,
          0.03618926, -0.01423805],
        [-0.00277968, -0.04634495, -0.0173947 , -0.02428023,
          0.01803661,  0.01091746,  0.00490616,  0.02667588,
          0.03618926, -0.01423805],
        [ 0.02209621, -0.03429848, -0.03702158, -0.0410597 ,
         -0.04451046,  0.01040596,  0.00781135,  0.0200807 ,
         -0.00355848,  0.025859  ],
        [ 0.00856694,  0.03914991,  0.04113653, -0.0