In [1]:
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

  from ._conv import register_converters as _register_converters


In [2]:
# mask?

tf.executing_eagerly()

np.random.seed(1234)

batch_size = 2
N = 10 # number of maximum context length
M = 5 # number of maximum question length
d = 128

M0 = np.random.randn(batch_size, N, d * 4)
M1 = np.random.randn(batch_size, N, d * 4)
M2 = np.random.randn(batch_size, N, d * 4)
context_mask = np.array([[1.] * (N - 2) + [0.] * 2] * batch_size)

print(M0.shape)
print(context_mask.shape)

(2, 10, 512)
(2, 10)


In [3]:
class PositionPrediction(tf.keras.layers.Layer):
    def __init__(self, initializer='glorot_uniform', **kwargs):
        """コンストラクタ
        
          引数:
            initializer: 初期化
        """
        self._initializer = initializer
        super(PositionPrediction, self).__init__(**kwargs)

    def build(self, input_shape):
        M_a_shape, M_b_shape, _ = input_shape
        
        self._W = self.add_weight(
            'weight',
            [M_a_shape[-1] + M_b_shape[-1], 1],
            initializer=self._initializer)
        
        super(PositionPrediction, self).build(input_shape)

    def call(self, x):
        """call
        
        引数:
          x:
            M_a: (batch_size, N, d * 4)
            M_b: (batch_size, N, d * 4)
            context_mask: (batch_size, N)
        """
        M_a, M_b, context_mask = x
        M = tf.concat([M_a, M_b], axis=2)
        # (batch_size, N)
        logits = tf.squeeze(tf.tensordot(M, self._W, [[2], [0]]))
        return tf.nn.softmax(exp_mask(logits, context_mask))
    
    def compute_output_shape(self, input_shape):
        M_a_shape, _, _ = input_shape        
        return tf.TensorShape([M_a_shape[0], 1])

VERY_NEGATIVE_NUMBER = - 1e30
    
def exp_mask(val, mask):
    return val + (1. - tf.cast(mask, tf.float32)) * VERY_NEGATIVE_NUMBER

In [4]:
in_M_0 = tf.keras.layers.Input(shape=(N, d * 4))
in_M_1 = tf.keras.layers.Input(shape=(N, d * 4))
in_M_2 = tf.keras.layers.Input(shape=(N, d * 4))
in_context_mask = tf.keras.layers.Input(shape=(N,))

p_1 = PositionPrediction()((in_M_0, in_M_1, in_context_mask))
p_2 = PositionPrediction()((in_M_0, in_M_2, in_context_mask))

model = tf.keras.models.Model(
    inputs=[in_M_0, in_M_1, in_M_2, in_context_mask], outputs=[p_1, p_2])
model.compile(
    optimizer=tf.train.GradientDescentOptimizer(0.001),
    loss='mse')

In [5]:
p_start, p_end = model.predict([M0, M1, M2, context_mask])

In [6]:
p_start

array([[0.3444644 , 0.03433581, 0.02684279, 0.06284202, 0.0140499 ,
        0.14457478, 0.06538274, 0.30750763, 0.        , 0.        ],
       [0.00283944, 0.20249864, 0.09099659, 0.05963712, 0.4290224 ,
        0.0200462 , 0.1120894 , 0.08287019, 0.        , 0.        ]],
      dtype=float32)

In [None]:
# loss
# kerasだとpredictionになっているからlogitsを復帰して
# start、endそれぞれでy_trueとsoftmax_cross_entropy_with_logitsして足し算

In [None]:
print(p_start.shape)
start_labels = np.random.randint(2, size=(2, 10))
end_labels = np.random.randint(2, size=(2, 10))

In [None]:
def loss(y_true, y_pred):
    # TODO 確認したい、ここ、startとendが別個に渡ってくる？？？
    print(y_true, y_pred)
    # mean取る前にmaskかけてる…答えのないyがあるの？
    # tf.reduce_mean(- tf.reduce_sum(start_labels * tf.log(p_start), axis=1))
    return 1.

In [None]:
model = tf.keras.models.Model(
    inputs=[in_M_0, in_M_1, in_M_2], outputs=[p_1, p_2])
model.compile(
    optimizer=tf.train.GradientDescentOptimizer(0.001),
    loss='categorical_crossentropy',
    loss_weights=[.5, .5])
# なんかこれ指定するとこの重みでロス足し合わせたものをロスにしてくれるらしい
# マスクとか追加の処理したかったらloss関数作る

In [None]:
[start_labels, end_labels]

In [None]:
model.fit([M0, M1, M2], [start_labels, end_labels])