In [73]:
%matplotlib inline
import matplotlib.pyplot as plot
from IPython import display
from __future__ import print_function

import os
import sys
import numpy as np
import numpy.random as nr

from keras.utils import to_categorical
from keras.layers import Dense, Input, Flatten, Dropout
from keras.layers import Conv1D, MaxPooling1D, Embedding, GlobalMaxPooling1D
from keras.models import Model, Sequential


<img src = '../figures/Embedding_1.png',width=700pt></img>

In [74]:
nb_words = 20
nb_feats = 5
seq_len = 10

model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (None, 10, 5)             100       
Total params: 100
Trainable params: 100
Non-trainable params: 0
_________________________________________________________________


In [75]:
W = np.arange(nb_words).reshape(-1,1).dot(np.arange(1,nb_feats+1).reshape(1,-1))

model.set_weights([W])
W[:5,:], W.shape, W.dtype

(array([[ 0,  0,  0,  0,  0],
        [ 1,  2,  3,  4,  5],
        [ 2,  4,  6,  8, 10],
        [ 3,  6,  9, 12, 15],
        [ 4,  8, 12, 16, 20]]), (20, 5), dtype('int64'))

In [76]:
sequences = np.array([[19, 10, 0, 1, 7, 5, 0, 1, 15, 2]])
print(sequences.shape)
p = model.predict(sequences).astype(np.int)
p, p.shape

(1, 10)


(array([[[19, 38, 57, 76, 95],
         [10, 20, 30, 40, 50],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [ 7, 14, 21, 28, 35],
         [ 5, 10, 15, 20, 25],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [15, 30, 45, 60, 75],
         [ 2,  4,  6,  8, 10]]]), (1, 10, 5))

<img src='../figures/Embedding_neural.png', width = 400pt></img>

In [77]:
# to_categorical faz um ravel() antes de criar os one-hots
nb_seqs,seq_len = sequences.shape
a = to_categorical(sequences, nb_words).reshape(nb_seqs, seq_len, nb_words).astype(np.int)
a

array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])

In [78]:
pp = np.dot(a, W).astype(np.int)
pp, pp.shape

(array([[[19, 38, 57, 76, 95],
         [10, 20, 30, 40, 50],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [ 7, 14, 21, 28, 35],
         [ 5, 10, 15, 20, 25],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [15, 30, 45, 60, 75],
         [ 2,  4,  6,  8, 10]]]), (1, 10, 5))

In [79]:
sequences = np.array([[19, 10, 0, 1, 7, 5, 0, 1, 15, 2],
                      [9, 8, 7, 6, 5, 4, 3, 15, 1, 1]])
print(sequences.shape)
p = model.predict(sequences).astype(int)
p, p.shape

(2, 10)


(array([[[19, 38, 57, 76, 95],
         [10, 20, 30, 40, 50],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [ 7, 14, 21, 28, 35],
         [ 5, 10, 15, 20, 25],
         [ 0,  0,  0,  0,  0],
         [ 1,  2,  3,  4,  5],
         [15, 30, 45, 60, 75],
         [ 2,  4,  6,  8, 10]],
 
        [[ 9, 18, 27, 36, 45],
         [ 8, 16, 24, 32, 40],
         [ 7, 14, 21, 28, 35],
         [ 6, 12, 18, 24, 30],
         [ 5, 10, 15, 20, 25],
         [ 4,  8, 12, 16, 20],
         [ 3,  6,  9, 12, 15],
         [15, 30, 45, 60, 75],
         [ 1,  2,  3,  4,  5],
         [ 1,  2,  3,  4,  5]]]), (2, 10, 5))

In [80]:
model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.add(Conv1D(1, 3))
model.set_weights([W])
model.summary()

model.predict(np.array([[19, 10, 0, 1, 7, 5, 0, 1, 15, 2]]))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_12 (Embedding)     (None, 10, 5)             100       
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 8, 1)              16        
Total params: 116
Trainable params: 116
Non-trainable params: 0
_________________________________________________________________


array([[[ 44.58876038],
        [ 20.0692482 ],
        [ -3.27068806],
        [  3.13346505],
        [ 17.14089394],
        [  9.76227283],
        [ -7.628304  ],
        [  9.1053791 ]]], dtype=float32)

In [81]:
model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.add(GlobalMaxPooling1D())
model.set_weights([W])
model.summary()

model.predict(np.array([[19, 10, 0, 1, 7, 5, 0, 1, 15, 2]]))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 10, 5)             100       
_________________________________________________________________
global_max_pooling1d_4 (Glob (None, 5)                 0         
Total params: 100
Trainable params: 100
Non-trainable params: 0
_________________________________________________________________


array([[ 19.,  38.,  57.,  76.,  95.]], dtype=float32)