In [1]:
import os
import sys

if '../' not in sys.path:
    sys.path.append('../')

In [2]:
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

  from ._conv import register_converters as _register_converters


In [3]:
tf.executing_eagerly()

True

In [4]:
glove_embedding = np.load('../data/preprocessed_201805031126/vectors.npy')
V, p0 = glove_embedding.shape

In [5]:
W = tf.get_variable(
    'embedding',
    shape=[V, p0],
    initializer=tf.constant_initializer(glove_embedding),
    trainable=False)
W_unk = tf.get_variable(
    'unk_embedding',
    shape=[1, p0],
    initializer=tf.random_uniform_initializer())

emb = tf.where(
    tf.tile(tf.expand_dims([[True, False, False]], -1), [1, 1, 300]),
    tf.nn.embedding_lookup(
        W_unk, tf.zeros_like([[True, False, False]], dtype=tf.int32)),
    tf.nn.embedding_lookup(
        W, [[1, 2, 3]]))

np.isclose(emb[0][0], glove_embedding[1])

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [6]:
class WordEmbedding(tf.keras.layers.Layer):
    def __init__(self, embeddind_matrix, **kwargs):
        self._embedding_matrix = embeddind_matrix
        self._V = self._embedding_matrix.shape[0]
        self._dim = self._embedding_matrix.shape[1]
        super(WordEmbedding, self).__init__(**kwargs)

    def build(self, input_shape):
        self._W = self.add_weight(
            'embedding',
            [self._V, self._dim],
            initializer=tf.constant_initializer(self._embedding_matrix),
            trainable=False)

        self._W_unk = self.add_weight(
            'unk_embedding',
            [1, self._dim],
            initializer='glorot_uniform')

        super(WordEmbedding, self).build(input_shape)
        
    def call(self, x):
        """call
        
          x:
            words: (batch_size, N)
            word_unk_label: (batch_size, N)
        """
        words, word_unk_label = x
        # なぜかfloatで渡ってくる…
        words = tf.cast(words, tf.int32)
        word_unk_label = tf.cast(word_unk_label, tf.bool)
        
        # All the out-of-vocabulary words are mapped to an <UNK> token,
        # whose embedding is trainable with random initialization. 

        # (batch_size, N, dim)
        return tf.where(
            tf.tile(tf.expand_dims(word_unk_label, -1), [1, 1, self._dim]),
            tf.nn.embedding_lookup(
                self._W_unk, tf.zeros_like(word_unk_label, dtype=tf.int32)),
            tf.nn.embedding_lookup(
                self._W, words))

    def compute_output_shape(self, input_shape):
        word_shape, _ = input_shape
        return tf.TensorShape([word_shape[0], word_shape[1], self._dim])

In [7]:
batch_size = 2
N = 10 # number of maximum context length

context_words = tf.keras.layers.Input(shape=(N,))
context_word_unk_label = tf.keras.layers.Input(shape=(N,))

word_embedding = WordEmbedding(glove_embedding)

context_word_emb = word_embedding(
    [context_words, context_word_unk_label])

model = tf.keras.models.Model(
    inputs=[
        context_words,
        context_word_unk_label],
    outputs=context_word_emb)
model.compile(
    optimizer=tf.train.GradientDescentOptimizer(0.001),
    loss='mse')

In [8]:
model.predict([np.array([[1, 11, 12]], dtype=np.int32), np.array([[True, False, False]])])

array([[[ 7.68503696e-02,  1.04205996e-01, -1.39469475e-01,
          6.28802031e-02,  2.42887735e-02,  1.00749478e-01,
          1.12519622e-01,  5.13631254e-02, -2.45877504e-02,
          1.03057176e-02,  4.18215990e-04, -5.53228185e-02,
          7.60826170e-02,  5.93279600e-02,  1.09392375e-01,
         -7.96944946e-02,  4.56506014e-03, -1.30267441e-01,
         -2.04834789e-02,  3.85618210e-02,  4.07712460e-02,
         -1.40247315e-01,  6.24878258e-02,  7.34151602e-02,
          4.49982882e-02,  1.27534837e-01,  4.48523909e-02,
          1.37752354e-01,  9.09069777e-02,  9.53004360e-02,
         -2.50213444e-02,  2.07052827e-02,  6.07331842e-02,
          5.18857837e-02, -3.94883156e-02,  2.56035775e-02,
         -1.51342154e-03, -1.02390736e-01,  4.21325564e-02,
          6.12837821e-02,  6.66057914e-02, -4.49351370e-02,
         -9.96994078e-02, -1.03500322e-01,  2.83468515e-02,
          1.32524639e-01, -1.21427476e-02, -1.15223855e-01,
          5.54360151e-02,  1.15253150e-0

In [9]:
import unittest

np.random.seed(1234)

class TestWordEmbedding(unittest.TestCase):
    def test_embedding(self):
        embedding_matrix = np.random.randn(100, 50)

        in_words = tf.keras.layers.Input(shape=(3,))
        in_unk_label = tf.keras.layers.Input(shape=(3,))
        
        out_word_emb = WordEmbedding(embedding_matrix)([
            in_words, in_unk_label])
        
        model = tf.keras.models.Model(
            inputs=[in_words, in_unk_label], outputs=out_word_emb)
        model.compile(
            optimizer=tf.train.GradientDescentOptimizer(0.001),
            loss='mse')

        word_emb = model.predict([
            np.array([[1, 2, 3]]),
            np.array([[True, False, False]])])
        
        # 一つ目はUNK
        self.assertTrue(not all(np.isclose(
            word_emb[0][0], embedding_matrix[1])))
        # 二つ目はembedding_matrixと同じ
        self.assertTrue(all(np.isclose(
            word_emb[0][1], embedding_matrix[2])))

In [10]:
unittest.main(argv=['first-arg-is-ignored'], exit=False)

  if d.decorator_argspec is not None), _inspect.getargspec(target))
.
----------------------------------------------------------------------
Ran 1 test in 0.006s

OK


<unittest.main.TestProgram at 0x7f2b8c07bba8>

In [17]:
x = np.concatenate((
    np.random.normal(scale=10., size=(2, 8, 20)),
    np.zeros((2, 2, 20))
), axis=1)

W = np.random.randn(20, 22)

In [26]:
x.shape

(2, 10, 20)

In [22]:
W.shape

(20, 22)

In [29]:
np.tensordot(x, W, [[2], [0]])

array([[[  16.71082784,   83.0363351 ,  -28.30510987,   55.76282792,
           30.89115186,  -30.38347571,   11.5848846 ,    2.60410708,
          -33.34644981,  -13.10479936,   -4.62272399,   20.63470164,
          -26.05527949,   60.69433053,    6.18806933,  -19.1871707 ,
           -6.36904031,   -9.40577331,   67.98881972,   34.13436679,
           15.61339492,   12.50455664],
        [ -11.72174395,   -0.18392334,    6.30559555,  -71.63065314,
          -34.76865841,   -8.18367206,  -21.69978708,   -3.29398165,
          -23.56795023,    7.00600056,  -31.76877537,    3.29470444,
            5.39307581,    1.66072692,   32.5208641 ,  -16.57816853,
           54.35146685,  -11.85108442,   31.80972209,  -63.42511192,
           20.3478518 ,   10.9459972 ],
        [  74.1796957 ,   69.46761917,  -96.3508304 ,   -4.49621735,
           93.77788171,  -13.76666023,   32.69168675,    3.08627325,
          -47.69619897,    5.5468541 ,   -8.59722253,  -16.13336309,
           39.57750282,

In [30]:
np.dot(x[0], W)

array([[  16.71082784,   83.0363351 ,  -28.30510987,   55.76282792,
          30.89115186,  -30.38347571,   11.5848846 ,    2.60410708,
         -33.34644981,  -13.10479936,   -4.62272399,   20.63470164,
         -26.05527949,   60.69433053,    6.18806933,  -19.1871707 ,
          -6.36904031,   -9.40577331,   67.98881972,   34.13436679,
          15.61339492,   12.50455664],
       [ -11.72174395,   -0.18392334,    6.30559555,  -71.63065314,
         -34.76865841,   -8.18367206,  -21.69978708,   -3.29398165,
         -23.56795023,    7.00600056,  -31.76877537,    3.29470444,
           5.39307581,    1.66072692,   32.5208641 ,  -16.57816853,
          54.35146685,  -11.85108442,   31.80972209,  -63.42511192,
          20.3478518 ,   10.9459972 ],
       [  74.1796957 ,   69.46761917,  -96.3508304 ,   -4.49621735,
          93.77788171,  -13.76666023,   32.69168675,    3.08627325,
         -47.69619897,    5.5468541 ,   -8.59722253,  -16.13336309,
          39.57750282,   26.15881669, 

In [31]:
np.dot(x[0], x[0].T)

array([[ 1470.91657366,  -521.5286495 ,   950.38073456,  -121.2938098 ,
          276.86751698,     8.57006884,   272.6677907 ,   461.16299708,
            0.        ,     0.        ],
       [ -521.5286495 ,  1155.6489936 ,  -289.44720528,   153.12724602,
          168.95699663,   -67.10641066,   157.49567838,    17.96330218,
            0.        ,     0.        ],
       [  950.38073456,  -289.44720528,  2163.25525414,   439.24988293,
           41.52247132,   134.42911744,   507.02798888,   382.21580075,
            0.        ,     0.        ],
       [ -121.2938098 ,   153.12724602,   439.24988293,  2711.8954907 ,
        -1098.84687058,   451.54798405,  -673.97825201,  -146.90368422,
            0.        ,     0.        ],
       [  276.86751698,   168.95699663,    41.52247132, -1098.84687058,
         2499.17341731,   117.83034065,   -60.33120901,   848.05513749,
            0.        ,     0.        ],
       [    8.57006884,   -67.10641066,   134.42911744,   451.54798405,
   

In [33]:
tf.nn.softmax(tf.matmul(x[0], x[0].T))

<tf.Tensor: id=137, shape=(10, 10), dtype=float64, numpy=
array([[1.00000000e+000, 0.00000000e+000, 8.59324995e-227,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 1.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 1.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        1.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 1.00000000e+000, 0.00000000e+000,
        0.00

In [34]:
VERY_NEGATIVE_NUMBER = - 1e30
    
def exp_mask(val, mask):
    return val + (1. - tf.cast(mask, tf.float32)) * VERY_NEGATIVE_NUMBER

In [47]:
tf.cast(tf.matmul(x[0], x[0].T), tf.float32)

<tf.Tensor: id=167, shape=(10, 10), dtype=float32, numpy=
array([[ 1470.9166  ,  -521.5286  ,   950.38074 ,  -121.29381 ,
          276.86752 ,     8.570069,   272.6678  ,   461.163   ,
            0.      ,     0.      ],
       [ -521.5286  ,  1155.649   ,  -289.4472  ,   153.12724 ,
          168.957   ,   -67.10641 ,   157.49568 ,    17.963303,
            0.      ,     0.      ],
       [  950.38074 ,  -289.4472  ,  2163.2554  ,   439.24988 ,
           41.522472,   134.42912 ,   507.02798 ,   382.2158  ,
            0.      ,     0.      ],
       [ -121.29381 ,   153.12724 ,   439.24988 ,  2711.8955  ,
        -1098.8469  ,   451.54797 ,  -673.9783  ,  -146.90369 ,
            0.      ,     0.      ],
       [  276.86752 ,   168.957   ,    41.522472, -1098.8469  ,
         2499.1733  ,   117.83034 ,   -60.331207,   848.0551  ,
            0.      ,     0.      ],
       [    8.570069,   -67.10641 ,   134.42912 ,   451.54797 ,
          117.83034 ,  1220.0077  ,  -618.4055  ,    

In [44]:
mask = np.concatenate((
    np.array([[1., 1., 1., 1., 1., 1., 1., 1., 0., 0.]] * 8),
    np.zeros((2, 10)))).astype(np.float32)
mask

array([[1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [48]:
tf.nn.softmax(exp_mask(tf.cast(tf.matmul(x[0], x[0].T), tf.float32), mask))

<tf.Tensor: id=177, shape=(10, 10), dtype=float32, numpy=
array([[1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. ],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]], dtype=float32)>

In [49]:
tf.nn.softmax(tf.matmul(x[0], x[0].T))

<tf.Tensor: id=182, shape=(10, 10), dtype=float64, numpy=
array([[1.00000000e+000, 0.00000000e+000, 8.59324995e-227,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 1.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 1.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        1.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 1.00000000e+000, 0.00000000e+000,
        0.00

In [57]:
tf.exp([ 1470.9166  ,  -521.5286  ,   950.38074 ,  -121.29381 ,
          276.86752 ,     8.570069,   272.6678  ,   461.163   ,
            0.      ,     0.      ])

<tf.Tensor: id=210, shape=(10,), dtype=float32, numpy=
array([          inf, 0.0000000e+00,           inf, 0.0000000e+00,
                 inf, 5.2714956e+03,           inf,           inf,
       1.0000000e+00, 1.0000000e+00], dtype=float32)>