# Word Embedding Techniques using Embedding Layer in Keras

In [1]:
!pip install tensorflow



In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
# Sentences

sent = ["The world is a better place",
      "Marvel series is my favourite movie",
      "I like DC movies",
      "the cat is eating the food",
      "Tom and Jerry is my favourite movie",
      "Python is my favourite programming language"]

In [4]:
sent

['The world is a better place',
 'Marvel series is my favourite movie',
 'I like DC movies',
 'the cat is eating the food',
 'Tom and Jerry is my favourite movie',
 'Python is my favourite programming language']

In [5]:
# Vocabulary size

voc_size = 10000

# One Hot Representation

In [6]:
onehot_repr = [one_hot(words, voc_size) for words in sent]

print(onehot_repr)

[[345, 7757, 5678, 194, 7500, 9774], [515, 7641, 5678, 6003, 7797, 7658], [3390, 2732, 3536, 2092], [345, 4081, 5678, 6634, 345, 9610], [1057, 6191, 954, 5678, 6003, 7797, 7658], [7248, 5678, 6003, 7797, 1221, 5740]]


# Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

import numpy as np

In [8]:
# pre padding

sent_length = 8

embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)

print(embedded_docs)

[[   0    0  345 7757 5678  194 7500 9774]
 [   0    0  515 7641 5678 6003 7797 7658]
 [   0    0    0    0 3390 2732 3536 2092]
 [   0    0  345 4081 5678 6634  345 9610]
 [   0 1057 6191  954 5678 6003 7797 7658]
 [   0    0 7248 5678 6003 7797 1221 5740]]


In [9]:
dim = 10

In [10]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length = sent_length))
model.compile('adam', 'mse')



In [11]:
model.summary()

In [12]:
print(model.predict(embedded_docs))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[[[ 0.02684449  0.04841875  0.01323238 -0.03764128 -0.04895658
    0.02481129 -0.00081769 -0.00444959 -0.02256937  0.03020643]
  [ 0.02684449  0.04841875  0.01323238 -0.03764128 -0.04895658
    0.02481129 -0.00081769 -0.00444959 -0.02256937  0.03020643]
  [ 0.04368674 -0.00066308  0.03810689 -0.02846077  0.04305739
    0.04496935  0.00130614  0.01924146  0.04987354 -0.03902163]
  [-0.00292604 -0.02464615 -0.01720327 -0.00361962 -0.00486781
   -0.01069207  0.03660892 -0.03931755  0.02434153  0.00751302]
  [ 0.04370015  0.00014519 -0.04188915  0.01578971 -0.04790139
   -0.03342431  0.0192199  -0.0204546  -0.01099417 -0.04030534]
  [-0.04067259  0.03964863  0.02968608  0.03055347  0.01897505
    0.00403186 -0.04217473  0.01032522 -0.03036503  0.04494753]
  [-0.01379765  0.01909081 -0.0440014  -0.01059464  0.04555487
   -0.00657779 -0.02150186  0.03339634  0.03568579  0.02913679]
  [-0.01497857  0.04981879  0.0330425 