<a href="https://colab.research.google.com/github/swarnava-96/Natural-Language-Processing-NLP-/blob/main/Word_Embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Word Embedding Techniques using Embedding Layer in Keras**
### Importing the necessary libraries

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

### Sentences

In [2]:
sent = ["the glass of milk",
        "the glass of juice",
        "the cup of tea",
        "I am a good boy",
        "I am a good developer",
        "understand the meaning of words",
        "your videos are good"]

In [3]:
# Checking out our sentences

sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

### Setting Vocabulary size

In [4]:
voc_size = 10000

### One Hot Representation

In [5]:
onehot_repr = [one_hot(words, voc_size) for words in sent]
print(onehot_repr)

[[7645, 7835, 1698, 9368], [7645, 7835, 1698, 1434], [7645, 4423, 1698, 6664], [1236, 8512, 4795, 6404, 388], [1236, 8512, 4795, 6404, 6979], [9451, 7645, 7287, 1698, 1554], [2832, 9694, 9800, 6404]]


### Word Embedding Representation

In [7]:
# Importing the libraries required for Word Embedding

import numpy as np
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [8]:
# Word Embedding

sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding = "pre", maxlen = sent_length)
print(embedded_docs)

[[   0    0    0    0 7645 7835 1698 9368]
 [   0    0    0    0 7645 7835 1698 1434]
 [   0    0    0    0 7645 4423 1698 6664]
 [   0    0    0 1236 8512 4795 6404  388]
 [   0    0    0 1236 8512 4795 6404 6979]
 [   0    0    0 9451 7645 7287 1698 1554]
 [   0    0    0    0 2832 9694 9800 6404]]


In [9]:
# Lets set the number of dimensions for feature representation
dim = 10

In [10]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length = sent_length))
model.compile("adam", "mse")

In [11]:
# Lets check the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Lets print the predicted embedded docs

print(model.predict(embedded_docs))

[[[ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]
  [ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]
  [ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]
  [ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]
  [ 0.02065897  0.03505475 -0.00252882 -0.02693741 -0.00110728
   -0.00746633 -0.0368011  -0.03159845  0.03696335  0.01862318]
  [ 0.04774947 -0.0314851  -0.03344506 -0.02918556 -0.00589155
    0.03298198 -0.02697475 -0.00680984 -0.04018686  0.02878436]
  [ 0.00894778 -0.02650975  0.04831034 -0.00918754 -0.00099257
    0.00219346 -0.04965463  0.03449284 -0.00610434 -0.03959753]
  [ 0.00574357 -0.03479492 -0.01697616 -0.0412372   0.04821521
    0.02283743 -0.03967321  0.02922625  0.027419

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 7645, 7835, 1698, 9368], dtype=int32)

In [14]:
print(model.predict(embedded_docs[0]))

[[[ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]]

 [[ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]]

 [[ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]]

 [[ 0.04385605 -0.04427306 -0.01280542  0.02702048  0.00590461
   -0.03008297  0.00358193  0.04501467 -0.0036065  -0.02306447]]

 [[ 0.02065897  0.03505475 -0.00252882 -0.02693741 -0.00110728
   -0.00746633 -0.0368011  -0.03159845  0.03696335  0.01862318]]

 [[ 0.04774947 -0.0314851  -0.03344506 -0.02918556 -0.00589155
    0.03298198 -0.02697475 -0.00680984 -0.04018686  0.02878436]]

 [[ 0.00894778 -0.02650975  0.04831034 -0.00918754 -0.00099257
    0.00219346 -0.04965463  0.03449284 -0.00610434 -0.03959753]]

 [[ 0.00574357 -0.03479492 -0.01697616 -0.0412372   0.04821521
    0.02283743 -0.03967321  0.0292