# How to Use Word Embedding Layers for Deep Learning with Keras

https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

In [1]:
from keras.preprocessing.text import one_hot, Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

import numpy as np

Using TensorFlow backend.
  return f(*args, **kwds)


## Example of Learning an Embedding

In [2]:
docs = ['Well done!',
    'Good work',
    'Great effort',
    'nice work',
    'Excellent!',
    'Weak',
    'Poor effort!',
    'not good',
    'poor work',
    'Could have done better.']
# 1 - positive, 0 - negative
labels = [1,1,1,1,1,0,0,0,0,0]

In [3]:
vocab_size = 50 # estimate

In [4]:
encoded_docs = [one_hot(d, vocab_size) for d in docs]

In [5]:
print(encoded_docs)

[[28, 11], [43, 43], [35, 25], [2, 43], [21], [28], [44, 25], [35, 43], [44, 43], [38, 49, 11, 1]]


In [6]:
max_length=4 # the last word is the longest so 4.
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [7]:
print(padded_docs)

[[28 11  0  0]
 [43 43  0  0]
 [35 25  0  0]
 [ 2 43  0  0]
 [21  0  0  0]
 [28  0  0  0]
 [44 25  0  0]
 [35 43  0  0]
 [44 43  0  0]
 [38 49 11  1]]



The Embedding has a vocabulary *of 50 and an input length of 4. We will choose a small embedding space of 8 dimensions*.

The model is a simple binary classification model. Importantly, the output from the Embedding layer will be 4 vectors of 8 dimensions each, one for each word. We flatten this to a one 32-element vector to pass on to the Dense output layer.


In [8]:
model = Sequential()
model.add(Embedding(vocab_size, 8, input_length=max_length))
model.add(Flatten()) # this will be one 32-element vector
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])


model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_1 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(padded_docs, labels, epochs=50, verbose=0)

<keras.callbacks.History at 0x181fae3908>

In [10]:
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)

In [11]:
print('Accuracy: {}'.format(accuracy*100))

Accuracy: 80.0000011920929


## Example of Using Pre-Trained GloVe Embedding



In [12]:
t = Tokenizer()
t.fit_on_texts(docs)

vocab_size = len(t.word_index) + 1

encoded_docs = t.texts_to_sequences(docs)

padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [13]:
embeddings_index = dict()
with open('glove.6B/glove.6B.100d.txt') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
print('Loaded {} word vectors.'.format(len(embeddings_index)))


Loaded 400000 word vectors.


In [14]:
embedding_matrix = np.zeros((vocab_size, 100))

In [15]:
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
    

### Embedding layer
params
- `trainable` = False เพราะใช้ model ที่ train แล้ว
- `weights` = matrix ที่ผ่านการเทรนมาแล้ว จะสังเกตว่าแต่ละคำจะมี weight มาให้แล้ว
- `output_dim` = 100 เพราะเราเลือก glove version 100-dimensional 

In [16]:
model = Sequential()
model.add(Embedding(vocab_size, 100, weights=[embedding_matrix],input_length=max_length, trainable=False))
model.add(Flatten()) # this will be one 32-element vector
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 4, 100)            1500      
_________________________________________________________________
flatten_2 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 401       
Total params: 1,901
Trainable params: 401
Non-trainable params: 1,500
_________________________________________________________________


In [17]:
model.fit(padded_docs, labels, epochs=50, verbose=0)
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)

In [18]:
print('Accuracy: {}'.format(accuracy*100))

Accuracy: 100.0
