In [1]:
%matplotlib inline
import matplotlib.pyplot as plot
from IPython import display
from __future__ import print_function

import os
import sys
import numpy as np
import numpy.random as nr

from keras.utils import to_categorical
from keras.layers import Dense, Input, Flatten, Dropout
from keras.layers import Conv1D, MaxPooling1D, Embedding, GlobalMaxPooling1D
from keras.models import Model, Sequential


Using TensorFlow backend.


In [2]:
nb_words = 100
nb_feats = 5
seq_len = 10

model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.summary()

W = np.tile(np.arange(nb_words), nb_feats).reshape(nb_feats, nb_words).T

model.set_weights([W])
W[:5,:], W.shape

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 10, 5)             500       
Total params: 500
Trainable params: 500
Non-trainable params: 0
_________________________________________________________________


(array([[0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4]]), (100, 5))

In [3]:
sequences = np.array([[2, 1, 3, 4, 5, 2, 1, 3, 9, 7]])
nb_seqs, _ = sequences.shape
p = model.predict(sequences)
p, p.shape

(array([[[ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 4.,  4.,  4.,  4.,  4.],
         [ 5.,  5.,  5.,  5.,  5.],
         [ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 9.,  9.,  9.,  9.,  9.],
         [ 7.,  7.,  7.,  7.,  7.]]], dtype=float32), (1, 10, 5))

In [4]:
sequences = np.array([[2, 1, 3, 4, 5, 2, 1, 3, 9, 7],
                      [9, 8, 7, 6, 5, 4, 3, 2, 1, 1]])
nb_seqs, _ = sequences.shape
p = model.predict(sequences)
p, p.shape

(array([[[ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 4.,  4.,  4.,  4.,  4.],
         [ 5.,  5.,  5.,  5.,  5.],
         [ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 9.,  9.,  9.,  9.,  9.],
         [ 7.,  7.,  7.,  7.,  7.]],
 
        [[ 9.,  9.,  9.,  9.,  9.],
         [ 8.,  8.,  8.,  8.,  8.],
         [ 7.,  7.,  7.,  7.,  7.],
         [ 6.,  6.,  6.,  6.,  6.],
         [ 5.,  5.,  5.,  5.,  5.],
         [ 4.,  4.,  4.,  4.,  4.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.,  1.]]], dtype=float32), (2, 10, 5))

In [5]:
# to_categorical faz um ravel() antes de criar os one-hots
a = to_categorical(sequences, nb_words).reshape(nb_seqs, seq_len, nb_words)
pp = np.dot(a, W)
pp, pp.shape

(array([[[ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 4.,  4.,  4.,  4.,  4.],
         [ 5.,  5.,  5.,  5.,  5.],
         [ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 9.,  9.,  9.,  9.,  9.],
         [ 7.,  7.,  7.,  7.,  7.]],
 
        [[ 9.,  9.,  9.,  9.,  9.],
         [ 8.,  8.,  8.,  8.,  8.],
         [ 7.,  7.,  7.,  7.,  7.],
         [ 6.,  6.,  6.,  6.,  6.],
         [ 5.,  5.,  5.,  5.,  5.],
         [ 4.,  4.,  4.,  4.,  4.],
         [ 3.,  3.,  3.,  3.,  3.],
         [ 2.,  2.,  2.,  2.,  2.],
         [ 1.,  1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.,  1.]]]), (2, 10, 5))

In [6]:
model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.add(Conv1D(2, 3))
model.set_weights([W])
model.summary()

model.predict(np.array([[2, 1, 3, 4, 5, 2, 1, 3, 9, 7]]))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 10, 5)             500       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 8, 2)              32        
Total params: 532
Trainable params: 532
Non-trainable params: 0
_________________________________________________________________


array([[[ 1.07130241,  0.27366447],
        [ 1.85898256,  0.98257858],
        [ 2.40612602,  0.60883653],
        [ 1.89604783, -0.70369828],
        [ 0.84111482, -1.42720759],
        [ 1.07130241,  0.27366447],
        [ 3.16937852,  2.60109496],
        [ 4.26655149,  1.32922018]]], dtype=float32)

In [7]:
model = Sequential()
model.add(Embedding(nb_words, nb_feats, input_length=seq_len))
model.add(GlobalMaxPooling1D())
model.set_weights([W])
model.summary()

model.predict(np.array([[2, 1, 3, 40, 5, 2, 1, 3, 9, 7]]))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 10, 5)             500       
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 5)                 0         
Total params: 500
Trainable params: 500
Non-trainable params: 0
_________________________________________________________________


array([[ 40.,  40.,  40.,  40.,  40.]], dtype=float32)