#### Problem Statement: 
How embedding layer works in Keras?

In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

In [43]:
model=Sequential() #create sequential model 

In [44]:
emb=Embedding(input_dim=10,output_dim=4,input_length=2) # creates embedding for 10 vocabularies with output dimension (10,4) 
                                                        #and expects input sequence length of 2 (gives warning if >< 2)
model.add(emb)# add layer to model
model.compile('adam','mse') #compile above model

In [59]:
import numpy as np
input_data=np.array([[1,2]])# create input data

In [60]:
pred=model.predict(input_data) #Predict for above input

In [61]:
pred

array([[[-0.03176137,  0.04662043, -0.03105542,  0.00867708],
        [ 0.03106494,  0.03647831,  0.04578931,  0.03198867]]],
      dtype=float32)

In [64]:
emb.get_weights()[0].shape

(10, 4)

#### Training an embedding layer

In [23]:
from numpy import array
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import  Flatten,Embedding,Dense
from tensorflow.keras.models import Sequential

In [24]:
reviews=[
        "Never coming back!",
        "Horrible service",
        "Rude waitress",
         "cold food.",
         "Horrible food!",
         "Awesome",
         "Awesome service!",
         "Rocks!",
         "poor work",
        "couldn\'t have done better"
]

In [26]:
labels=array([1,1,1,1,1,0,0,0,0,0]) #1 -negative 0- positive

In [27]:
VOCAB_SIZE=50
encoded_reviews=[one_hot(d,VOCAB_SIZE) for d in reviews]
print(f"Encoded reviews : {encoded_reviews }")

Encoded reviews : [[29, 7, 18], [31, 15], [29, 37], [4, 7], [31, 7], [48], [48, 15], [34], [9, 41], [4, 44, 12, 38]]


In [31]:
pad_sequences??

In [32]:
MAX_LENGTH=4
padded_reviews=pad_sequences(encoded_reviews,maxlen=MAX_LENGTH,padding="post")
print(padded_reviews)

[[29  7 18  0]
 [31 15  0  0]
 [29 37  0  0]
 [ 4  7  0  0]
 [31  7  0  0]
 [48  0  0  0]
 [48 15  0  0]
 [34  0  0  0]
 [ 9 41  0  0]
 [ 4 44 12 38]]


In [34]:
model=Sequential()
embedding_layer=Embedding(VOCAB_SIZE,8,input_length=MAX_LENGTH)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',loss="binary_crossentropy",metrics=['acc'])


print(model.summary())


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [35]:
model.fit(padded_reviews,labels,epochs=100,verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x2e1a6fd1a90>

In [36]:
print(embedding_layer.get_weights()[0])

[[ 1.29968047e-01  6.17162436e-02  1.17176965e-01 -1.02523744e-01
   1.20807678e-01  1.15326509e-01  1.38356417e-01  5.09862900e-02]
 [ 1.09456852e-03 -3.29487696e-02 -2.90077329e-02  8.42285156e-03
  -3.79139557e-02  1.81378052e-03  2.55202688e-02 -3.30099687e-02]
 [-6.96761534e-03 -1.68717019e-02  2.13806070e-02 -1.48048401e-02
   1.65809318e-03 -1.73527114e-02 -8.00790638e-03  3.81978489e-02]
 [ 3.11393030e-02  4.29348685e-02  4.87075932e-02  2.93940939e-02
   1.33677572e-03  5.51140308e-03 -4.67559695e-02  3.51661332e-02]
 [ 8.81352425e-02  5.27929142e-02 -1.05815798e-01 -1.21042423e-01
  -1.00462586e-01 -1.42243803e-01  8.08918551e-02  9.27122682e-02]
 [-4.55707088e-02 -5.52631915e-04  1.98621862e-02 -3.23953778e-02
  -4.88193408e-02 -4.17602062e-02 -4.80681434e-02 -2.98222657e-02]
 [-1.88774951e-02  2.19022967e-02 -2.80194879e-02 -1.30860880e-03
   3.11461203e-02  1.29497908e-02  3.75992097e-02 -2.53645070e-02]
 [-5.86785972e-02 -7.01396540e-02 -1.40762731e-01  1.18695214e-01
   

In [38]:
print(embedding_layer.get_weights()[0].shape)

(50, 8)


In [39]:
loss,accuracy=model.evaluate(padded_reviews,labels,verbose=0)

In [40]:
loss

0.45243263244628906

In [65]:
accuracy

1.0