## Embedding Layers
- Embedding layer used to convert one hot indices vectors(sentences) to n dimensions output

In [2]:
import keras 
import tensorflow

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer,one_hot,text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences,skipgrams

In [12]:
sentences = ['My name is Prakash', 'I have a dog', 'I have a cat with three legs']
vocab_size = 10000

In [13]:
one_hot_sentences = [one_hot(words, vocab_size) for words in sentences]

In [14]:
#All the words of sentences are changed to their indices according to vocabulary
one_hot_sentences

[[6967, 7645, 7813, 3163],
 [7781, 1502, 1204, 4950],
 [7781, 1502, 1204, 9793, 5713, 805, 4122]]

In [15]:
#Lets pad all of them with same length
max_length = 8
padded_one_hot_sentences = pad_sequences(one_hot_sentences, max_length,padding='pre')

In [16]:
padded_one_hot_sentences

array([[   0,    0,    0,    0, 6967, 7645, 7813, 3163],
       [   0,    0,    0,    0, 7781, 1502, 1204, 4950],
       [   0, 7781, 1502, 1204, 9793, 5713,  805, 4122]])

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding

In [19]:
out_dim=10 #Features
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=out_dim, input_length=max_length)
])
model.compile("adam","mse")

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [26]:
padded_one_hot_sentences

array([[   0,    0,    0,    0, 6967, 7645, 7813, 3163],
       [   0,    0,    0,    0, 7781, 1502, 1204, 4950],
       [   0, 7781, 1502, 1204, 9793, 5713,  805, 4122]])

In [21]:
model.predict(padded_one_hot_sentences)

array([[[-0.04975795, -0.00176424, -0.03522616,  0.00721263,
         -0.0445093 ,  0.03277076,  0.02105275, -0.00984788,
         -0.01640121, -0.01726111],
        [-0.04975795, -0.00176424, -0.03522616,  0.00721263,
         -0.0445093 ,  0.03277076,  0.02105275, -0.00984788,
         -0.01640121, -0.01726111],
        [-0.04975795, -0.00176424, -0.03522616,  0.00721263,
         -0.0445093 ,  0.03277076,  0.02105275, -0.00984788,
         -0.01640121, -0.01726111],
        [-0.04975795, -0.00176424, -0.03522616,  0.00721263,
         -0.0445093 ,  0.03277076,  0.02105275, -0.00984788,
         -0.01640121, -0.01726111],
        [ 0.0023398 , -0.00601881,  0.01444662, -0.04768455,
          0.01281433, -0.03632858,  0.02604935,  0.00473189,
         -0.02137638,  0.02137518],
        [-0.00649123, -0.02824244,  0.01313602, -0.01033213,
         -0.04984204,  0.03601568,  0.02464323,  0.03065279,
          0.03606087,  0.02137338],
        [ 0.00977827,  0.00454103,  0.03578228, -0.0

In [27]:
model.predict(padded_one_hot_sentences)[0]

array([[-0.04975795, -0.00176424, -0.03522616,  0.00721263, -0.0445093 ,
         0.03277076,  0.02105275, -0.00984788, -0.01640121, -0.01726111],
       [-0.04975795, -0.00176424, -0.03522616,  0.00721263, -0.0445093 ,
         0.03277076,  0.02105275, -0.00984788, -0.01640121, -0.01726111],
       [-0.04975795, -0.00176424, -0.03522616,  0.00721263, -0.0445093 ,
         0.03277076,  0.02105275, -0.00984788, -0.01640121, -0.01726111],
       [-0.04975795, -0.00176424, -0.03522616,  0.00721263, -0.0445093 ,
         0.03277076,  0.02105275, -0.00984788, -0.01640121, -0.01726111],
       [ 0.0023398 , -0.00601881,  0.01444662, -0.04768455,  0.01281433,
        -0.03632858,  0.02604935,  0.00473189, -0.02137638,  0.02137518],
       [-0.00649123, -0.02824244,  0.01313602, -0.01033213, -0.04984204,
         0.03601568,  0.02464323,  0.03065279,  0.03606087,  0.02137338],
       [ 0.00977827,  0.00454103,  0.03578228, -0.0451211 , -0.04464176,
        -0.03648539, -0.01050369,  0.00991673

In [28]:
## Here Input indices of sentences are converted to 10 dimensional outputs

## Steps
- Convert sentences to one hot indices
- pad the one hot indices
- Use Embedding layer to convert the padded one hot indices to n(10 in above) dimensional feature representation