## Word Embedding using Embedding Layer in Keras

In [3]:
import keras

In [4]:
keras.__version__

'2.12.0'

In [5]:
from tensorflow.keras.preprocessing.text import one_hot

In [6]:
##sentence
snt=[ 'the glass of milk',
     'the glass of juice',
    'the cup of tea',
    'i am good boy',
    'I am good devoloper',
    'understand the meaning of word',
    'your videos are good',]

In [7]:
snt

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'i am good boy',
 'I am good devoloper',
 'understand the meaning of word',
 'your videos are good']

In [8]:
## vocabulary size
voc_size=10000

### One Hot Representation

In [9]:
onehot_repr = [one_hot(words,voc_size) for words in snt]
print(onehot_repr)  ##index from dictionary

[[195, 6513, 5982, 8697], [195, 6513, 5982, 7600], [195, 7182, 5982, 2015], [1793, 455, 2741, 8269], [1793, 455, 2741, 9438], [6500, 195, 2857, 5982, 6510], [6780, 4294, 2257, 2741]]


### Word Embedding Representation

In [10]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences  ## in every sentence same number of words(same size)
from tensorflow.keras.models import Sequential
import numpy as np

In [11]:
sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0  195 6513 5982 8697]
 [   0    0    0    0  195 6513 5982 7600]
 [   0    0    0    0  195 7182 5982 2015]
 [   0    0    0    0 1793  455 2741 8269]
 [   0    0    0    0 1793  455 2741 9438]
 [   0    0    0 6500  195 2857 5982 6510]
 [   0    0    0    0 6780 4294 2257 2741]]


In [13]:
## dimension
dim=10

In [15]:
model = Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [16]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [17]:
print(model.predict(embedded_docs))

[[[-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297
    0.03077427 -0.00563002  0.04942629  0.04053222 -0.03041469]
  [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297
    0.03077427 -0.00563002  0.04942629  0.04053222 -0.03041469]
  [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297
    0.03077427 -0.00563002  0.04942629  0.04053222 -0.03041469]
  [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297
    0.03077427 -0.00563002  0.04942629  0.04053222 -0.03041469]
  [-0.04008716 -0.03147718 -0.01468266  0.03388233 -0.04930493
    0.03826338 -0.02215146 -0.00623018 -0.01098312 -0.00349455]
  [ 0.01101459 -0.03163885 -0.04943828  0.04762322  0.01122724
   -0.04815269 -0.04115853  0.02923882  0.00563005 -0.04698339]
  [-0.02240099 -0.03656863 -0.00627693 -0.00586854  0.01776174
   -0.0438928  -0.0306826   0.01558458 -0.0278422   0.01931036]
  [ 0.02441821 -0.04908919  0.04215345  0.0258624   0.00508966
   -0.00302477  0.01695761 -0.01520833 -0.045191

In [18]:
embedded_docs[0]

array([   0,    0,    0,    0,  195, 6513, 5982, 8697])

In [19]:
print(model.predict(embedded_docs)[0])

[[-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297  0.03077427
  -0.00563002  0.04942629  0.04053222 -0.03041469]
 [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297  0.03077427
  -0.00563002  0.04942629  0.04053222 -0.03041469]
 [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297  0.03077427
  -0.00563002  0.04942629  0.04053222 -0.03041469]
 [-0.00129722  0.02955873  0.0107665  -0.02178662  0.01819297  0.03077427
  -0.00563002  0.04942629  0.04053222 -0.03041469]
 [-0.04008716 -0.03147718 -0.01468266  0.03388233 -0.04930493  0.03826338
  -0.02215146 -0.00623018 -0.01098312 -0.00349455]
 [ 0.01101459 -0.03163885 -0.04943828  0.04762322  0.01122724 -0.04815269
  -0.04115853  0.02923882  0.00563005 -0.04698339]
 [-0.02240099 -0.03656863 -0.00627693 -0.00586854  0.01776174 -0.0438928
  -0.0306826   0.01558458 -0.0278422   0.01931036]
 [ 0.02441821 -0.04908919  0.04215345  0.0258624   0.00508966 -0.00302477
   0.01695761 -0.01520833 -0.045191    0.01960868]]
