In [2]:
import theano
import theano.tensor as T
import numpy as np

# computing tanh(x(t).dot(W) + b)

$$
tanh(Wx_t + b)
$$

## 前準備

In [53]:
X = T.matrix('X')
W = T.matrix('W')
b = T.vector('b')

results, updates = theano.scan(lambda x: T.tanh(T.dot(x, W) + b), sequences=X)
f = theano.function(inputs=[X, W, b], outputs=[results])

x = np.eye(2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2,), dtype=theano.config.floatX)
b[1] = 2

## データ

In [54]:
print 'x: ', x
print 'w: ', w
print 'b: ', b

x:  [[ 1.  0.]
 [ 0.  1.]]
w:  [[ 1.  1.]
 [ 1.  1.]]
b:  [ 1.  2.]


## numpy

In [55]:
# numpy
print 'numpy: ', np.tanh(x.dot(w) + b)

numpy:  [[ 0.96402758  0.99505478]
 [ 0.96402758  0.99505478]]


## theano

In [56]:
f(x, w, b)

[array([[ 0.96402758,  0.99505478],
        [ 0.96402758,  0.99505478]], dtype=float32)]

# computing the sequence

$$
x_t = tanh(Wx_{t-1} + Uy_t + Vp_{T-t})
$$

## 前準備

In [136]:
X = T.vector('X')
W = T.matrix('W')
b = T.vector('b')
U = T.matrix('U')
Y = T.matrix('Y')
V = T.matrix('V')
P = T.matrix('P')

results, updates = theano.scan(lambda a, b, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(a, U) + T.dot(b, V)),
   sequences=[Y, P[::-1]],  outputs_info=[X])
f = theano.function(inputs=[X, W, Y, U, P, V], outputs=[results])

## データ

In [132]:
x = np.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = np.ones((2, 2), dtype=theano.config.floatX)
y = np.ones((5, 2), dtype=theano.config.floatX)
y[0, :] = -3
u = np.ones((2, 2), dtype=theano.config.floatX)
p = np.ones((5, 2), dtype=theano.config.floatX)
p[0, :] = 3
v = np.ones((2, 2), dtype=theano.config.floatX)

print 'x: ', x
print 'w: ', w
print 'y: ', y
print 'u: ', u
print 'p: ', p
print 'v: ', v

x:  [ 0.  1.]
w:  [[ 1.  1.]
 [ 1.  1.]]
y:  [[-3. -3.]
 [ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
u:  [[ 1.  1.]
 [ 1.  1.]]
p:  [[ 3.  3.]
 [ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
v:  [[ 1.  1.]
 [ 1.  1.]]


## numpy

In [128]:
x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = x.dot(w) + y[0].dot(u) + p[4].dot(v)
for i in range(1, 5):
    x_res[i] = x_res[i-1].dot(w) + y[i].dot(u) + p[4-i].dot(v)
print x_res

x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
    x_res[i] = np.tanh(x_res[i-1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print x_res

[[ -3.  -3.]
 [ -2.  -2.]
 [  0.   0.]
 [  4.   4.]
 [ 16.  16.]]
[[-0.99505478 -0.99505478]
 [ 0.96471971  0.96471971]
 [ 0.99998587  0.99998587]
 [ 0.99998772  0.99998772]
 [ 1.          1.        ]]


## theano

In [137]:
f(x, w, y, u, p, v)

[array([[-0.99505478, -0.99505478],
        [ 0.96471971,  0.96471971],
        [ 0.99998587,  0.99998587],
        [ 0.99998772,  0.99998772],
        [ 1.        ,  1.        ]], dtype=float32)]

# Computing norms of lines (rows) of X

$$
\sqrt{||X||^2}
$$

## 前準備

In [124]:
X = T.matrix('X')

results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
f = theano.function(inputs=[X], outputs=[results])

## データ

In [125]:
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
x

array([[ 0.,  1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  2.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  3.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  4.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  5.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]], dtype=float32)

## numpy

In [64]:
np.sqrt((x**2).sum(1))

array([ 1.,  2.,  3.,  4.,  5.,  0.], dtype=float32)

## theano

In [66]:
f(x)

[array([ 1.,  2.,  3.,  4.,  5.,  0.], dtype=float32)]

# Computing norms of columns of X

$$
\sqrt{||X||^2}
$$

## 前準備

In [67]:
X = T.matrix('X')

results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
f = theano.function(inputs=[X], outputs=[results])

## データ

In [68]:
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
x

array([[ 0.,  1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  2.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  3.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  4.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  5.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]], dtype=float32)

## numpy

In [70]:
np.sqrt((x ** 2).sum(0))

array([ 0.,  1.,  2.,  3.,  4.,  5.], dtype=float32)

## theano

In [72]:
f(x)

[array([ 0.,  1.,  2.,  3.,  4.,  5.], dtype=float32)]

# Computing trace of X

$$
tr(X)
$$

## 前準備

In [88]:
floatX = 'float32'

X = T.matrix('X')

results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
                                sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
                                outputs_info=np.asarray(0, dtype=floatX))
result = results[-1]
f = theano.function(inputs=[X], outputs=[result])

## データ

In [89]:
x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5, dtype=theano.config.floatX)
x

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]], dtype=float32)

## numpy

In [90]:
np.diagonal(x).sum()

4.0

## theano

In [91]:
f(x)

[array(4.0, dtype=float32)]

# Computing the sequence

$$
\mathbf{x_t} = \mathbf{Ux_{t-2}} + \mathbf{Vx_{t-1}} + tanh(\mathbf{Wx_{t-1}} + \mathbf{b})
$$

## 前準備

In [92]:
X = T.matrix('X')
W = T.matrix('W')
b = T.vector('b')
U = T.matrix('U')
V = T.matrix('V')
n = T.iscalar('n')

results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b),
                               n_steps=n, outputs_info=[dict(initial=X, taps=[-2, -1])])
f = theano.function(inputs=[X, W, b, U, V, n], outputs=[results])

## データ

In [94]:
x = np.zeros((2, 2), dtype=theano.config.floatX)
x[1, 1] = 1
w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX)
v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = np.ones((2), dtype=theano.config.floatX)

print 'x: ', x
print 'w: ', w
print 'u: ', u
print 'v: ', v
print 'n: ', n
print 'b: ', b

x:  [[ 0.  0.]
 [ 0.  1.]]
w:  [[ 0.5  0.5]
 [ 0.5  0.5]]
u:  [[-0.5  0.5]
 [ 0.5 -0.5]]
v:  [[ 0.5  0.5]
 [ 0.5  0.5]]
n:  10
b:  [ 1.  1.]


## numpy

In [97]:
x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
    x_res[i] = x_res[i - 2].dot(u) + x_res[i - 1].dot(v) + np.tanh(x_res[i - 1].dot(w) + b)
x_res

array([[  1.40514827,   1.40514827],
       [  2.888989  ,   1.888989  ],
       [  3.38671444,   3.38671444],
       [  3.88640491,   4.88640491],
       [  5.38636299,   5.38636299],
       [  6.88635731,   5.88635731],
       [  7.38635654,   7.38635654],
       [  7.88635644,   8.88635644],
       [  9.38635643,   9.38635643],
       [ 10.88635642,   9.88635642]])

## theano

In [98]:
f(x, w, b, u, v, n)

[array([[  1.40514827,   1.40514827],
        [  2.88898897,   1.88898897],
        [  3.38671446,   3.38671446],
        [  3.88640499,   4.88640499],
        [  5.38636303,   5.38636303],
        [  6.88635731,   5.88635731],
        [  7.38635635,   7.38635635],
        [  7.88635635,   8.88635635],
        [  9.38635635,   9.38635635],
        [ 10.88635635,   9.88635635]], dtype=float32)]

# Computing the Jacobian

ヤコビ行列を計算．


## 前準備

In [100]:
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))

results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
f = theano.function(inputs=[A, v], outputs=[results], allow_input_downcast=True)

## データ

In [106]:
x = np.eye(5, dtype=theano.config.floatX)[0]
w = np.eye(5, 3, dtype=theano.config.floatX)
w[2] = np.ones((3), dtype=theano.config.floatX)

print 'x: ', x
print 'w: ', w
g = theano.function(inputs=[A, v], outputs=[y], allow_input_downcast=True)
g(w, x)

x:  [ 1.  0.  0.  0.  0.]
w:  [[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 1.  1.  1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


[array([ 0.76159418,  0.        ,  0.        ], dtype=float32)]

## numpy

In [108]:
((1 - np.tanh(x.dot(w)) ** 2) * w).T

array([[ 0.41997433,  0.        ,  0.41997433,  0.        ,  0.        ],
       [ 0.        ,  1.        ,  1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ]], dtype=float32)

## theano

In [109]:
f(w, x)

[array([[ 0.41997433,  0.        ,  0.41997433,  0.        ,  0.        ],
        [ 0.        ,  1.        ,  1.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ]], dtype=float32)]