# Embedding example
https://hk.saowen.com/a/d80aa14a501c9f9d225fe2d67f5403bdbba58b9e62b0ff6a1f46f7422e52459e

In [1]:
from keras.layers import Dense, Flatten, Input
from keras.layers.embeddings import Embedding
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import one_hot
import numpy as np


Using TensorFlow backend.


In [2]:

# define documents
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']
# define class labels
labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
# integer encode the documents
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]
encoded_docs

for i in range(10):
    print('-----------')
    print(docs[i])
    print(encoded_docs[i])

-----------
Well done!
[22, 20]
-----------
Good work
[1, 26]
-----------
Great effort
[36, 40]
-----------
nice work
[11, 26]
-----------
Excellent!
[21]
-----------
Weak
[21]
-----------
Poor effort!
[39, 40]
-----------
not good
[25, 1]
-----------
poor work
[39, 26]
-----------
Could have done better.
[24, 12, 20, 28]


In [3]:
# pad documents to a max length of 4 words
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

[[22 20  0  0]
 [ 1 26  0  0]
 [36 40  0  0]
 [11 26  0  0]
 [21  0  0  0]
 [21  0  0  0]
 [39 40  0  0]
 [25  1  0  0]
 [39 26  0  0]
 [24 12 20 28]]


Embedding(50, 8, input_length=4)

50 words in a dict

4 words in a input sequence

one word to a 8dim vector (ex: 1 > [0,0,0,0,0,0,0,1] )

In [4]:

# define the model
input = Input(shape=(4, ))
x = Embedding(vocab_size, 8, input_length=4)(input)
x = Flatten()(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=input, outputs=x)

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])


In [5]:
# summarize the model
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 4)                 0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_1 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [6]:
# fit the model
model.fit(padded_docs, 
          labels, 
          epochs=50, 
          verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f06cf48ac88>

In [7]:

# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=1)
print(accuracy)

0.800000011920929


In [8]:
print(padded_docs)
model.predict(padded_docs)

[[22 20  0  0]
 [ 1 26  0  0]
 [36 40  0  0]
 [11 26  0  0]
 [21  0  0  0]
 [21  0  0  0]
 [39 40  0  0]
 [25  1  0  0]
 [39 26  0  0]
 [24 12 20 28]]


array([[0.5680893 ],
       [0.5604374 ],
       [0.5384886 ],
       [0.57842195],
       [0.50213486],
       [0.50213486],
       [0.48496452],
       [0.49553412],
       [0.52664924],
       [0.3772443 ]], dtype=float32)

In [9]:
# 21 for 'good', 20 for 'poor'
arr = [[21,21,21,21],
       [20,20,20,20],
       [36,18,0,0]]
arr = np.array(arr)
model.predict(arr)

array([[0.42756134],
       [0.4483724 ],
       [0.5582324 ]], dtype=float32)