In [1]:
import keras
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding

Using TensorFlow backend.


# Usage Embedding layer provided by Keras for deep learning 

In [3]:
# we use Keras embedding layer to convert text into numerical value for neural network training
# It requires that the input data be integer encoded, so that each word is represented by a unique integer
# Embedding layer is initialized with random weights and and will 
#learn an embedding for all of the words in the training dataset.
'''
Embedding layer is defined as the first hidden layer of a network. It must specify 3 arguments:
    i. input dim:  It is size of the vocabulary in the text data. 
        For example, if your data is integer encoded to values between 0-6, 
        then the size of the vocabulary would be 7 words.
    ii. output dim: It is the size of the vector space in which words will be embedded.
        It defines the size of the output vectors from this layer for each word. 
        For example, it could be 32 or 100 or even larger. Test different values for your problem.
    iii. input_length: This is the length of input sequences, as you would define 
        for any input layer of a Keras model.
        For example, if all of your input documents are comprised of 1000 words, this would be 1000.
'''

'\nEmbedding layer is defined as the first hidden layer of a network. It must specify 3 arguments:\n    i. input dim:  It is size of the vocabulary in the text data. \n        For example, if your data is integer encoded to values between 0-6, \n        then the size of the vocabulary would be 7 words.\n    ii. output dim: It is the size of the vector space in which words will be embedded.\n        It defines the size of the output vectors from this layer for each word. \n        For example, it could be 32 or 100 or even larger. Test different values for your problem.\n    iii. input_length: This is the length of input sequences, as you would define \n        for any input layer of a Keras model.\n        For example, if all of your input documents are comprised of 1000 words, this would be 1000.\n'

In [4]:
# Define documents 
# 5 positive and 5 negative samples
docs =  ["well done", "nice work", 
         "excellent job", "good work", 
         "nicely done", "poor work", "very bad", 
         "weak effort", "Improper job", 
        "Could have been improved more"]
labels = [1,1,1,1,1,0,0,0,0,0]

# We encode all the docs into integers
# we use one_hot function provided by keras,
# it assigns unique number to each word
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]
print(encoded_docs)

[[25, 48], [17, 22], [28, 40], [18, 22], [34, 48], [3, 22], [3, 28], [37, 3], [48, 40], [40, 19, 25, 35, 16]]


In [5]:
# According to Keras, all the docs should have same length
# since in our doc, one doc has more words(4) so we need to use padding 
# pad docs to maximum length of 4
pad_max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen=pad_max_length, padding='post')
print padded_docs

[[25 48  0  0]
 [17 22  0  0]
 [28 40  0  0]
 [18 22  0  0]
 [34 48  0  0]
 [ 3 22  0  0]
 [ 3 28  0  0]
 [37  3  0  0]
 [48 40  0  0]
 [19 25 35 16]]


In [6]:
# Define the model 
model = Sequential()
model.add(Embedding(vocab_size, 8, input_length=pad_max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print model.summary()

# fit the model
model.fit(padded_docs, labels, epochs=50, verbose=0)

# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_1 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None
Accuracy: 89.999998
