# Derivates in Theano

## Computing Gradients

In [1]:
import numpy
import theano
import theano.tensor as T
from theano import pp
x = T.dscalar('x')
y = x ** 2
gy = T.grad(y, x)
pp(gy)  # print out the gradient prior to optimization

'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'

In [2]:
f = theano.function([x], gy)

In [3]:
f(4)

array(8.0)

In [4]:
numpy.allclose(f(94.2), 188.4)

True

In [5]:
pp(f.maker.fgraph.outputs[0])

'(TensorConstant{2.0} * x)'

In [7]:
x = T.dmatrix('x')
s = T.sum(1 / (1 + T.exp(-x)))
gs = T.grad(s, x)
dlogistic = theano.function([x], gs)
dlogistic([[0, 1], [-1, -2]])

array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])

## Computing the Jacobian

In [10]:
import theano
import theano.tensor as T
x = T.dvector('x')
y = x ** 2
J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y,x])
f = theano.function([x], J, updates=updates)
f([4, 4])

array([[ 8.,  0.],
       [ 0.,  8.]])

## Computing the Hessian

In [14]:
x = T.dvector('x')
y = x ** 2
cost = y.sum()
gy = T.grad(cost, x)
H, updates = theano.scan(lambda i, gy, x: T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
f = theano.function([x], H, updates=updates)

## Jacobian times a Vector

## R-operator

In [17]:
W = T.dmatrix('W')
V = T.dmatrix('V')
x = T.dvector('x')
y = T.dot(x, W)
JV = T.Rop(y, W, V)
f = theano.function([W, V, x], JV)
f([[1, 1], [1,1]], [[2,2],[2,2]], [0,1])

array([ 2.,  2.])

## L-operator

In [18]:
W = T.dmatrix('W')
v = T.dvector('v')
x = T.dvector('x')
y = T.dot(x, W)
VJ = T.Lop(y, W, v)
f = theano.function([v, x], VJ)
f([2,2], [0,1])

array([[ 0.,  0.],
       [ 2.,  2.]])

## Hessian times a vector

In [20]:
x = T.dvector('x')
y = T.dvector('v')
y = T.sum(x ** 2)
gy = T.grad(y, x)
vH = T.grad(T.sum(gy * v), x)
f = theano.function([x, v], vH)
f([4, 4], [2, 2])

array([ 4.,  4.])

Or, making use of R-operator

In [21]:
x = T.dvector('x')
y = T.dvector('v')
y = T.sum(x ** 2)
gy = T.grad(y, x)
Hv = T.Rop(gy, x, v)
f = theano.function([x, v], Hv)
f([4,4], [2,2])

array([ 4.,  4.])