In [1]:
# importing libraries

from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# The sentence we want to embed
sentences = [
    'the glass of milk',
    'the glass of juice',
    'I amd a LLMs Expert',
    'I am a good NLP Expert',
    'I am a good developer',
    'understand the meaning of words',
    'I like coding',
]

In [3]:
sentences

['the glass of milk',
 'the glass of juice',
 'I amd a LLMs Expert',
 'I am a good NLP Expert',
 'I am a good developer',
 'understand the meaning of words',
 'I like coding']

In [4]:
# Vocabulary Size
voc = 10000

In [6]:
# one hot representation of words in a senetence
onehot_repre = [one_hot(sent, voc) for sent in sentences]
print(onehot_repre)

[[4736, 5916, 305, 5227], [4736, 5916, 305, 3065], [9908, 7822, 4528, 5305, 295], [9908, 6675, 4528, 1947, 2938, 295], [9908, 6675, 4528, 1947, 3718], [1119, 4736, 6571, 305, 741], [9908, 8223, 1825]]


In [8]:
max_sent_length = 7 # maximum length of sentence

padded_sent = pad_sequences(onehot_repre,max_sent_length,padding='pre') # padding 
print(padded_sent)

[[   0    0    0 4736 5916  305 5227]
 [   0    0    0 4736 5916  305 3065]
 [   0    0 9908 7822 4528 5305  295]
 [   0 9908 6675 4528 1947 2938  295]
 [   0    0 9908 6675 4528 1947 3718]
 [   0    0 1119 4736 6571  305  741]
 [   0    0    0    0 9908 8223 1825]]


In [9]:
dim = 10 # dimension of embedding vector

In [10]:
model = Sequential() 
model.add(Embedding(voc,dim,input_length=max_sent_length)) # adding Embedding layer
model.compile('adam','mse')

In [11]:
# summary of model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 7, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Getting embedded vectors
print(model.predict(padded_sent))

[[[-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706
   -0.02214284  0.04618183  0.00440424 -0.00169808  0.0150709 ]
  [-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706
   -0.02214284  0.04618183  0.00440424 -0.00169808  0.0150709 ]
  [-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706
   -0.02214284  0.04618183  0.00440424 -0.00169808  0.0150709 ]
  [-0.01436921  0.03976275  0.00894896 -0.01282896  0.01032519
    0.02276995  0.03655708  0.04597462  0.03022042  0.00968598]
  [-0.02486321 -0.01503862 -0.0470174  -0.0492767  -0.02933103
   -0.02982229 -0.04179402  0.04712344  0.00475221  0.03225943]
  [ 0.01132494  0.04164844 -0.00354914  0.04091844  0.01490058
   -0.04017408  0.04027027  0.00574514  0.03078652 -0.03080601]
  [-0.00941274  0.0361835   0.02997449  0.00571889 -0.02367417
    0.03161124  0.03394028 -0.02610927 -0.00587664  0.01670427]]

 [[-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706
   -0.02214284  0.04618183  0.00440424 -0.0016

In [16]:
# one hot representation of 1st sentences after adding padding
print(padded_sent[0])

[   0    0    0 4736 5916  305 5227]


In [14]:
# Embedded vector representation of 1st sentences
print(model.predict(padded_sent)[0])

[[-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706 -0.02214284
   0.04618183  0.00440424 -0.00169808  0.0150709 ]
 [-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706 -0.02214284
   0.04618183  0.00440424 -0.00169808  0.0150709 ]
 [-0.01853482  0.01008189  0.01264877 -0.00770773  0.04268706 -0.02214284
   0.04618183  0.00440424 -0.00169808  0.0150709 ]
 [-0.01436921  0.03976275  0.00894896 -0.01282896  0.01032519  0.02276995
   0.03655708  0.04597462  0.03022042  0.00968598]
 [-0.02486321 -0.01503862 -0.0470174  -0.0492767  -0.02933103 -0.02982229
  -0.04179402  0.04712344  0.00475221  0.03225943]
 [ 0.01132494  0.04164844 -0.00354914  0.04091844  0.01490058 -0.04017408
   0.04027027  0.00574514  0.03078652 -0.03080601]
 [-0.00941274  0.0361835   0.02997449  0.00571889 -0.02367417  0.03161124
   0.03394028 -0.02610927 -0.00587664  0.01670427]]
