In [5]:
from IPython.display import display
import numpy as np
import scipy.sparse
import scipy.linalg as la
import theano
import theano.tensor.slinalg
import theano.tensor as T

n = 4

A = np.zeros(shape=(n, n), dtype=np.float)
A += scipy.sparse.rand(4, 4, density=.8)
A = np.asarray(A)


# ----------- numerical differentiation
x = 2.
h = 0.001
numerical_diff = (la.expm((x + h) * A) - la.expm(x * A)) / h

# display gradient computed with numerical differentiation
display(numerical_diff)



# ----------- automatic differentiation with theano
x = T.dscalar('x')
expA = T.slinalg.expm(x * A)
# the flattening is for more easily scan through the elements of the matrix
# (theano.grad only accepts scalar cost)
expA_flat = T.flatten(expA)

def compute_element_grad(idx, flattened_matrix):
    return T.grad(flattened_matrix[idx], wrt=x)
# `theano.scan` basically loops over the elements of the matrix, and returns the gradient of each 
g_x_flat, _ = theano.scan(
    fn=compute_element_grad,
    sequences=T.arange(expA_flat.shape[0]),
    non_sequences=[expA_flat]
)
# deflatten result
g_x = T.reshape(g_x_flat, newshape=expA.shape)

# here is where the computational graph is actually compiled
gradient = theano.function(inputs=[x], outputs=g_x)


# compute and display gradient computed with AD
display(gradient(2.))

array([[  47.30089773,   41.58751062,   11.79613877,   66.60876992],
       [  78.81358454,   75.40439209,   24.08270378,  111.51496174],
       [  39.3271675 ,   37.86378273,   12.24411345,   55.23865786],
       [  84.22034043,   77.52048578,   23.29762595,  118.82826057]])

array([[  47.35543075,   41.6371479 ,   11.8108558 ,   66.68567484],
       [  78.9067194 ,   75.49150835,   24.10962867,  111.64639196],
       [  39.373539  ,   37.90726408,   12.25764708,   55.30415484],
       [  84.31863947,   77.61125404,   23.32517586,  118.96702555]])