In [1]:
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

  from ._conv import register_converters as _register_converters


In [2]:
tf.executing_eagerly()

np.random.seed(1234)

batch_size = 2
N = 10 # number of maximum context length
M = 5 # number of maximum question length
d = 128

context = np.random.randn(batch_size, N, d)
query = np.random.randn(batch_size, M, d)

In [3]:
c = tf.constant(context)
q = tf.constant(query)

In [4]:
x = tf.tile(tf.expand_dims(c, 2), [1, 1, 5, 1])

print(x.shape)

# axis=2に同じデータが5つづず並んでいる
assert all(np.isclose(x[0][0][0], x[0][0][1]))
assert all(np.isclose(x[0][0][0], x[0][0][2]))
assert all(np.isclose(x[0][0][0], x[0][0][3]))
assert all(np.isclose(x[0][0][0], x[0][0][4]))

assert not all(np.isclose(x[0][0][0], x[0][1][0]))

(2, 10, 5, 128)


In [5]:
y = tf.tile(tf.expand_dims(q, 1), [1, 10, 1, 1])

print(y.shape)

# axis=3にデータが1つづつ並んでいる
assert all(np.isclose(y[0][0][0], y[0][1][0]))
assert all(np.isclose(y[0][0][1], y[0][1][1]))
assert all(np.isclose(y[0][0][2], y[0][1][2]))

assert not all(np.isclose(y[0][0][0], y[0][0][1]))

(2, 10, 5, 128)


In [6]:
x = tf.reshape(x, [-1, 10 * 5, 128])
y = tf.reshape(y, [-1, 10 * 5, 128])

In [7]:
xy = tf.concat([x, y], 2)
print(xy.shape)

(2, 50, 256)


In [8]:
W = tf.constant(1., shape=[256, 1], dtype=tf.float64)

In [9]:
tf.reshape(tf.tensordot(xy, W, [[2], [0]]), [-1, 10, 5])

<tf.Tensor: id=264, shape=(2, 10, 5), dtype=float64, numpy=
array([[[ -8.5116306 ,   1.17680749,   1.1454999 ,  12.24783227,
           3.4070994 ],
        [ -0.49786499,   9.1905731 ,   9.15926551,  20.26159789,
          11.42086501],
        [  7.58958553,  17.27802362,  17.24671603,  28.34904841,
          19.50831553],
        [-28.85097223, -19.16253414, -19.19384173,  -8.09150935,
         -16.93224222],
        [ -2.53536932,   7.15306877,   7.12176118,  18.22409356,
           9.38336069],
        [ -2.56116482,   7.12727327,   7.09596568,  18.19829806,
           9.35756518],
        [-16.05366551,  -6.36522742,  -6.39653501,   4.70579737,
          -4.1349355 ],
        [-12.73966298,  -3.05122489,  -3.08253248,   8.0197999 ,
          -0.82093298],
        [-12.93958368,  -3.25114559,  -3.28245318,   7.81987919,
          -1.02085368],
        [ -7.87552235,   1.81291574,   1.78160815,  12.88394052,
           4.04320765]],

       [[  8.0440727 ,   7.08567584,  13.8063611

In [10]:
c = tf.tile(tf.expand_dims(context, 2), [1, 1, M, 1])

q = tf.tile(tf.expand_dims(query, 2), [1, N, 1, 1])


from functools import reduce
from operator import mul

def flatten(tensor, keep):
    fixed_shape = tensor.get_shape().as_list()
    start = len(fixed_shape) - keep
    left = reduce(mul, [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start)])
    out_shape = [left] + [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start, len(fixed_shape))]
    flat = tf.reshape(tensor, out_shape)
    return flat

def reconstruct(tensor, ref, keep):
    ref_shape = ref.get_shape().as_list()
    tensor_shape = tensor.get_shape().as_list()
    ref_stop = len(ref_shape) - keep
    tensor_start = len(tensor_shape) - keep
    pre_shape = [ref_shape[i] or tf.shape(ref)[i] for i in range(ref_stop)]
    keep_shape = [tensor_shape[i] or tf.shape(tensor)[i] for i in range(tensor_start, len(tensor_shape))]
    # pre_shape = [tf.shape(ref)[i] for i in range(len(ref.get_shape().as_list()[:-keep]))]
    # keep_shape = tensor.get_shape().as_list()[-keep:]
    target_shape = pre_shape + keep_shape
    out = tf.reshape(tensor, target_shape)
    return out

print(c.shape)
print(q.shape)

print(flatten(c, 1).shape)
print(flatten(q, 1).shape)

(2, 10, 5, 128)
(2, 50, 1, 128)
(100, 128)
(100, 128)


In [11]:
def _linear(args,
            output_size,
            bias,
            bias_initializer=None,
            kernel_initializer=None):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_initializer: starting value to initialize the bias
      (default is all zeros).
    kernel_initializer: starting value to initialize the weight.
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  if args is None or (tf.contrib.framework.nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not tf.contrib.framework.nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  scope = tf.get_variable_scope()
  with tf.variable_scope(scope) as outer_scope:
    weights = tf.get_variable(
        "kernel", [total_arg_size, output_size],
        dtype=dtype,
        initializer=tf.constant_initializer(1.))
        #initializer=kernel_initializer)
    if len(args) == 1:
      res = tf.matmul(args[0], weights)
    else:
      res = tf.matmul(tf.concat(args, 1), weights)
    if not bias:
      return res
    with tf.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      if bias_initializer is None:
        bias_initializer = tf.constant_initializer(0.0, dtype=dtype)
      biases = tf.get_variable(
          "bias", [output_size],
          dtype=dtype,
          initializer=bias_initializer)
    return tf.nn.bias_add(res, biases)

In [15]:
# 本家を動かした結果、linearでweightを1に固定している
# この値と合えばOK
x = tf.squeeze(reconstruct(_linear([flatten(c, 1), flatten(q, 1),
                                   flatten(c, 1) * flatten(q, 1)], 1, False), c, 1), 3)

# linearのweightが1なので、単にcontextの単語とqueryの単語とそれらをかけ合わせたものsumになっている
print(x[0][0])
print(np.sum(context[0][0]) + np.sum(query[0][0]) + np.sum(context[0][0] * query[0][0]))
print(np.sum(context[0][0]) + np.sum(query[0][1]) + np.sum(context[0][0] * query[0][1]))
print(np.sum(context[0][0]) + np.sum(query[0][2]) + np.sum(context[0][0] * query[0][2]))

tf.Tensor([-26.68072809   8.70696234   4.05913972   1.02610918  20.79146408], shape=(5,), dtype=float64)
-26.680728087397405
8.706962343102592
4.059139716117496


In [13]:
print(np.sum(context[0][0]) + np.sum(query[0][0]) + np.sum(context[0][0] * query[0][0]))
print(np.sum(context[0][0]) + np.sum(query[0][1]) + np.sum(context[0][0] * query[0][1]))
print(np.sum(context[0][0]) + np.sum(query[0][2]) + np.sum(context[0][0] * query[0][2]))

-26.680728087397405
8.706962343102592
4.059139716117496


In [16]:
print(x[0])

tf.Tensor(
[[-26.68072809   8.70696234   4.05913972   1.02610918  20.79146408]
 [  8.20336242  26.70335526  10.09011566  28.83012336   4.40243293]
 [ -0.28737536  22.45798754  39.04284817  35.48367173  17.64851754]
 [-19.38573035 -25.33182801 -18.60546732  -4.20703449  -8.93810904]
 [ -7.73918602   8.75593932  29.60629937  23.94264757  -5.00442065]
 [ -8.55095274  13.35000193   5.27460929   1.45506716  -5.76866822]
 [-19.97297749  -9.35981837 -22.49833832  11.98896016  16.97111008]
 [ -9.5906913   -0.09886414  -7.15134309 -16.83037913 -17.28229327]
 [ -6.80150567   2.11880275   9.3623348   -0.92955709  -4.84385472]
 [  4.92427787   8.27363957  -4.16703072  24.31109154  -6.91405945]], shape=(10, 5), dtype=float64)


In [22]:
# 行方向のsoftmax
np.exp(x[0][0]) / np.sum(np.exp(x[0][0]))

array([2.41594153e-21, 5.64631497e-06, 5.41054958e-08, 2.60623130e-09,
       9.99994297e-01])

In [29]:
# 列方向
np.exp(x[0][:, 0]) / np.sum(np.exp(x[0][:, 0]))

array([6.82165299e-16, 9.63513129e-01, 1.97868687e-04, 1.00477052e-12,
       1.14840962e-07, 5.09977500e-08, 5.58507094e-13, 1.80301067e-08,
       2.93309564e-07, 3.62885247e-02])

In [27]:
tf.nn.softmax(x, axis=2)

<tf.Tensor: id=559, shape=(2, 10, 5), dtype=float64, numpy=
array([[[2.41594153e-21, 5.64631497e-06, 5.41054958e-08, 2.60623130e-09,
         9.99994297e-01],
        [9.84000473e-10, 1.06522196e-01, 6.49234858e-09, 8.93477796e-01,
         2.19923988e-11],
        [8.07072528e-18, 6.09674685e-08, 9.72325366e-01, 2.76745728e-02,
         4.97017728e-10],
        [2.53608267e-07, 6.63446652e-10, 5.53384941e-07, 9.91259268e-01,
         8.73992467e-03],
        [6.01945887e-17, 8.77605564e-10, 9.96542175e-01, 3.45782458e-03,
         9.27365554e-16],
        [3.07891844e-10, 9.99682170e-01, 3.11002217e-04, 6.82271785e-06,
         4.97426590e-09],
        [8.96226624e-17, 3.64463704e-12, 7.17245064e-18, 6.81257026e-03,
         9.93187430e-01],
        [7.53951561e-05, 9.99060068e-01, 8.64448073e-04, 5.40987174e-08,
         3.44289001e-08],
        [9.54570288e-08, 7.14247349e-04, 9.99251100e-01, 3.38815116e-05,
         6.76090631e-07],
        [3.80551777e-09, 1.08398446e-07, 4.286556

In [30]:
tf.nn.softmax(x, axis=1)

<tf.Tensor: id=612, shape=(2, 10, 5), dtype=float64, numpy=
array([[[6.82165299e-16, 1.50690439e-08, 6.40816707e-16, 1.08318037e-15,
         9.38900516e-01],
        [9.63513129e-01, 9.85870415e-01, 2.66657252e-13, 1.28774749e-03,
         7.16067202e-08],
        [1.97868687e-04, 1.41279797e-02, 9.99920251e-01, 9.98688506e-01,
         4.05186752e-02],
        [1.00477052e-12, 2.48443065e-23, 9.19644666e-26, 5.78063247e-18,
         1.15141006e-13],
        [1.14840962e-07, 1.58254522e-08, 7.97488252e-05, 9.71017251e-06,
         5.88313997e-12],
        [5.09977500e-08, 1.56506424e-06, 2.16075515e-15, 1.66339297e-15,
         2.73968529e-12],
        [5.58507094e-13, 2.14675522e-16, 1.87485490e-27, 6.24894159e-11,
         2.05807367e-02],
        [1.80301067e-08, 2.25820750e-12, 8.67127588e-21, 1.90426085e-23,
         2.73776932e-17],
        [2.93309564e-07, 2.07436101e-11, 1.28790013e-13, 1.53237730e-16,
         6.90783698e-12],
        [3.62885247e-02, 9.77003249e-09, 1.714562

In [None]:
tf.matmul(tf.nn.softmax(x, axis=2), qu)