In [1]:
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.slinalg

In [2]:
x = T.dscalar('x')
y = x ** 2
gy = T.grad(y, x)
theano.pp(gy)

'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'

In [3]:
f = theano.function([x], gy)
f(4)

array(8.0)

In [4]:
x = T.dvector('x')
y = x ** 2
J, updates = theano.scan(lambda i, y, x: T.grad(y[i], x),
                        sequences=T.arange(y.shape[0]),
                        non_sequences=[y, x])
f = theano.function([x], J, updates=updates)
f([4, 4])

array([[ 8.,  0.],
       [ 0.,  8.]])

### Define function to map function elementwise, and test it in supersimple instance

In [68]:
def elementwise_map(fn, matrix, non_sequences=None):
    flattened_m = T.flatten(matrix)
    if non_sequences is None:
        non_sequences = flattened_m
    else:
        non_sequences = [flattened_m] + non_sequences

    results, updates = theano.map(fn,
                                  sequences=T.arange(flattened_m.shape[0]),
                                  non_sequences=non_sequences)
    return T.reshape(results, matrix.shape)
M = T.dmatrix('M')
# our map should simply add 0.5 to every element of the matrix M
def map_func(i, X):
    return X[i] + .5
Mplus = elementwise_map(map_func, M)
f = theano.function([M], Mplus)
f([[1, 2], [3, 4]])

array([[ 1.5,  2.5],
       [ 3.5,  4.5]])

### Gradient of $\boldsymbol z = 5 x_1 + 7 x_2$ with respect to $\boldsymbol{x}\equiv (x_1, x_2)$:

In [128]:
x = T.dvector('x')
z = T.dot(x, np.array([5, 7]))
gz = T.grad(z, x)
output = theano.function(inputs=[x], outputs=gz)
output([1, 20])

array([ 5.,  7.])

### Given a vector $\boldsymbol x=(x_1, x_2)$ and two matrices $H_1, H_2$, compute the gradient (partial derivatives) of $x_1(x_1 H_1 + x_2 H_2)$ with respect to $x_1$ and $x_2$:

In [35]:
rm = np.arange(18).reshape(2, 3, 3)
x = T.dvector('x')
# random_matrices = np.random.randn(4, 4, 2)
H = x[0] * T.tensordot(x, rm, axes=1)
Hflat = T.flatten(H)
# expH = theano.tensor.slinalg.expm(H)
def update_func(i, matrix, x):
    return T.grad(matrix[i], x)
results, updates = theano.map(update_func,
                        sequences=T.arange(H.shape[0] * H.shape[1]),
                        non_sequences=[Hflat, x])
J1 = results.T[0].reshape(H.shape)
J2 = results.T[1].reshape(H.shape)
grad_f = theano.function(inputs=[x], outputs=[J1, J2])
grad_f([1, 3.12])

[array([[ 28.08,  33.2 ,  38.32],
        [ 43.44,  48.56,  53.68],
        [ 58.8 ,  63.92,  69.04]]), array([[  9.,  10.,  11.],
        [ 12.,  13.,  14.],
        [ 15.,  16.,  17.]])]

### Derivative of $e^{x_1 H_1 + x_2 H_2}$ with respect to $x_1$ and $x_2$, with
$$ 100 \, H_1 = \begin{pmatrix}0&1&2\\3&4&5\\6&7&8\end{pmatrix}, \qquad
100 \, H_2 = \begin{pmatrix}9&10&12\\13&14&15\\16&17&18\end{pmatrix},
$$
### calculated at $(x_1, x_2) = (1., 3.12)$.

In [30]:
def elementwise_grad_map(fn, matrix, non_sequences=None):
    flattened_m = T.flatten(matrix)
    if non_sequences is None:
        non_sequences = flattened_m
    else:
        non_sequences = [flattened_m] + non_sequences

    results, updates = theano.map(fn,
                                  sequences=T.arange(flattened_m.shape[0]),
                                  non_sequences=non_sequences)
#     output_m = results.T.reshape((2, 3, 3))
    results2, updates = theano.map(lambda m_i: T.reshape(m_i, matrix.shape),
                          sequences=results.T)
#     return output_m
    return results2

rm = np.arange(18).reshape(2, 3, 3) / 100
x = T.dvector('x')
H = T.tensordot(x, rm, axes=1)
expH = theano.tensor.slinalg.expm(H)

def update_func(i, matrix, x):
    return T.grad(matrix[i], x)
results = elementwise_grad_map(update_func, expH, non_sequences=[x])
# theano.pp(results3)
grad_f = theano.function(inputs=[x], outputs=results)

# from IPython.display import SVG
# SVG(theano.printing.pydotprint(grad_f, return_image=True, format='svg'))
grad_f([1., 3.12])

array([[[ 0.05591533,  0.07680824,  0.09770114],
        [ 0.13211221,  0.1597642 ,  0.18741618],
        [ 0.20830909,  0.24272016,  0.27713123]],

       [[ 0.34543385,  0.37919178,  0.41294972],
        [ 0.46303871,  0.50496217,  0.54688563],
        [ 0.58064356,  0.63073255,  0.68082154]]])

### Testing with complex numbers

In [45]:
x = T.dscalar('x')
y_R = T.real(2j) * x
y_I = T.imag(2j) * x
gy_R = T.grad(y_R, x)
gy_I = T.grad(y_I, x)
gy = gy_R + 1j * gy_I
f = theano.function([x], gy)
f(1.34)

array(2j)

In [71]:
import scipy.linalg
scipy.linalg.logm(np.arange(9).reshape(3, 3))

array([[ -5.46674063+2.90138724j,  12.10807062-0.30984047j,
         -5.66408476-0.37947553j],
       [ 12.42003105-0.73735708j, -22.77095536+2.19047727j,
         12.73199149-1.16487369j],
       [ -5.04016389-1.23450875j,  13.04395193-1.5923903j ,
         -4.21889888+1.1913208j ]])

### aaaaand... big fail for Theano!

In [85]:
x = T.dscalar('x')
expH = T.slinalg.expm(1j * x * H)
expH_flat = T.flatten(expH)
expH_flat_R = T.real(T.flatten(expH))
expH_flat_I = T.imag(T.flatten(expH))
def fn(i, mat, x):
    return T.grad(mat[i], x)
J_R, updates = theano.scan(fn, sequences=T.arange(expH_flat_R.shape[0]), non_sequences=[expH_flat_R, x])
J_I, updates = theano.scan(fn, sequences=T.arange(expH_flat_I.shape[0]), non_sequences=[expH_flat_I, x])
expH_J_R = J_R.reshape(expH.shape)
expH_J_I = J_I.reshape(expH.shape)
expH_J = expH_J_R + 1j * expH_J_I
f = theano.function([x], expH_J)
f(2)

TypeError: Elemwise{real,no_inplace}.grad illegally  returned an integer-valued variable. (Input index 0, dtype complex128)