In [1]:
import autograd.numpy as np
from autograd.core import primitive
from autograd import grad, jacobian, hessian
from autograd.numpy.numpy_grads import unbroadcast
import scipy.stats

In [2]:
foo = [1, 2, 3, 4, 5]
print foo[0:2]
print foo[2:5]

x = np.matrix([1., 3., 4.]).T
x_info = np.outer(0.1 * x, x) + np.eye(3)
mu = np.matrix([0.1, 0.2, 0.5]).T
mu_cov = np.matrix(np.outer(mu, mu) + np.eye(3))
print mu
print mu_cov
print np.concatenate((mu.A1, mu_cov.A1))

[1, 2]
[3, 4, 5]
[[ 0.1]
 [ 0.2]
 [ 0.5]]
[[ 1.01  0.02  0.05]
 [ 0.02  1.04  0.1 ]
 [ 0.05  0.1   1.25]]
[ 0.1   0.2   0.5   1.01  0.02  0.05  0.02  1.04  0.1   0.05  0.1   1.25]


In [3]:
x = np.array([1., 3., 5.])

def Mag(x):
    return np.dot(x.T, x)

MagGrad = grad(Mag)
MagGrad(x)

array([  2.,   6.,  10.])

In [10]:
def LogLikelihood(x_row, x_info, e_mu, e_mu_cov):
    # TODO: if you're not using autodiff you can just do the matrix multiply once.
    retval =  0.5 * (np.dot(e_mu.T, np.matmul(x_info, x_row)) + \
                     np.dot(x_row.T, np.matmul(x_info, e_mu)) - \
                     np.dot(e_mu.T, np.matmul(x_info, e_mu)) - \
                     np.trace(np.matmul(x_info, e_mu_cov)))
    return retval

@primitive
def LogLikelihoodAD(x_row, x_info, e_mu, e_mu_cov):
    return LogLikelihood(x_row, x_info, e_mu, e_mu_cov)

def LogLikelihoodGrad_e_mu(x_row, x_info, e_mu):
    return np.matmul(x_info, x_row - e_mu)

def LogLikelihoodGrad_e_mu_cov(x_info):
    return -0.5 * x_info

def LogLikelihoodAD_e_mu_vjp(g, ans, vs, gvs, x_row, x_info, e_mu, e_mu_cov):
    return g * LogLikelihoodGrad_e_mu(x_row, x_info, e_mu)

def LogLikelihoodAD_e_mu_cov_vjp(g, ans, vs, gvs, x_row, x_info, e_mu, e_mu_cov):
    return g * LogLikelihoodGrad_e_mu_cov(x_info)


LogLikelihoodAD.defvjp(LogLikelihoodAD_e_mu_vjp, argnum=2)
LogLikelihoodAD.defvjp(LogLikelihoodAD_e_mu_cov_vjp, argnum=3)

print LogLikelihood(x, x_info, mu, mu_cov)
print LogLikelihoodAD(x, x_info, mu, mu_cov)

x = np.array([1., 3., 4.])
x_info = np.outer(0.1 * x, x) + np.eye(3)
mu = np.array([0.1, 0.2, 0.5])
mu_cov = np.array(np.outer(mu, mu) + np.eye(3))

LogLikelihoodADGrad2 = grad(LogLikelihoodAD, argnum=2)
LogLikelihoodADGrad3 = grad(LogLikelihoodAD, argnum=3)
LogLikelihoodGrad2 = grad(LogLikelihood, argnum=2)
LogLikelihoodGrad3 = grad(LogLikelihood, argnum=3)

print LogLikelihoodADGrad2(x, x_info, mu, mu_cov)
print LogLikelihoodGrad2(x, x_info, mu, mu_cov)
print LogLikelihoodADGrad3(x, x_info, mu, mu_cov)
print LogLikelihoodGrad3(x, x_info, mu, mu_cov)

LogLikelihoodHess2 = hessian(LogLikelihood, argnum=2)
print LogLikelihoodHess2(x, x_info, mu, mu_cov)

LogLikelihoodHess3 = hessian(LogLikelihood, argnum=3)
print LogLikelihoodHess3(x, x_info, mu, mu_cov)


5.891
5.891
[  3.23   9.79  12.82]
[  3.23   9.79  12.82]
[[-0.55 -0.15 -0.2 ]
 [-0.15 -0.95 -0.6 ]
 [-0.2  -0.6  -1.3 ]]
[[-0.55 -0.15 -0.2 ]
 [-0.15 -0.95 -0.6 ]
 [-0.2  -0.6  -1.3 ]]
[[-1.1 -0.3 -0.4]
 [-0.3 -1.9 -1.2]
 [-0.4 -1.2 -2.6]]
[[[[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]]


 [[[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]]


 [[[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]]]




In [5]:

x = np.array([1., 3., 4.])
x_info = np.outer(0.1 * x, x) + np.eye(3)
mu = np.array([0.1, 0.2, 0.5]).T
mu_cov = np.array(np.outer(mu, mu) + np.eye(3))

print np.dot(x.T, np.matmul(x_info, mu))
print np.trace(np.matmul(x_info, mu_cov))


9.72
6.629


In [6]:
# It can handle matrices, but not matrix return types.
def MyFun(x):
    x_mat = np.matrix(x).T
    return 3 * np.dot(x_mat.T, x_mat)[0,0]

x = np.array([3., 2., 1.])
x_mat = np.matrix(x).T
print 3 * np.ravel(np.dot(x_mat.T, x_mat))
MyFunGrad = grad(MyFun)
MyFunGrad(x)

[ 42.]


array([ 18.,  12.,   6.])

In [52]:
def Wrapper(vec, betax, betay):
    return(MyTwoArgFun(vec[0:2], vec[2:4], betax, betay))

def WrapperRaw(vec, betax, betay):
    return(MyTwoArgFunRaw(vec[0:2], vec[2:4], betax, betay))

def MyTwoArgFunRaw(x, y, betax, betay):
    return np.exp(np.dot(betax, x) + np.dot(betay, y))

@primitive
def MyTwoArgFun(x, y, betax, betay):
    return MyTwoArgFunRaw(x, y, betax, betay)

@primitive
def MyTwoArgFunGrad_x(x, y, betax, betay):
    return MyTwoArgFun(x, y, betax, betay) * betax

@primitive
def MyTwoArgFunGrad_y(x, y, betax, betay):
    return MyTwoArgFun(x, y, betax, betay) *  betay

def MyTwoArgFun_x_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return g * MyTwoArgFunGrad_x(x, y, betax, betay)
MyTwoArgFun.defvjp(MyTwoArgFun_x_vjp, argnum=0)

def MyTwoArgFun_y_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return g * MyTwoArgFunGrad_y(x, y, betax, betay)
MyTwoArgFun.defvjp(MyTwoArgFun_y_vjp, argnum=1)

# Terms of the Hessian
def MyTwoArgFunHess_x_x(x, y, betax, betay):
    return MyTwoArgFun(x, y, betax, betay) * np.outer(betax, betax)

def MyTwoArgFunHess_x_y(x, y, betax, betay):
    return MyTwoArgFun(x, y, betax, betay) * np.outer(betax, betay)

def MyTwoArgFunHess_y_x(x, y, betax, betay):
    return MyTwoArgFunHess_x_y(x, y, betax, betay).T

def MyTwoArgFunHess_y_y(x, y, betax, betay):
    return MyTwoArgFun(x, y, betax, betay) * np.outer(betay, betay)

def MyTwoArgFunGrad_x_x_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return np.matmul(MyTwoArgFunHess_x_x(x, y, betax, betay), g)
MyTwoArgFunGrad_x.defvjp(MyTwoArgFunGrad_x_x_vjp, argnum=0)

# This returns a derivative with respect to y, given a derivative
# with respect to MyTwoArgFunGrad_x, which explains the use of hess_y_x.
def MyTwoArgFunGrad_x_y_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return np.matmul(MyTwoArgFunHess_y_x(x, y, betax, betay), g)
MyTwoArgFunGrad_x.defvjp(MyTwoArgFunGrad_x_y_vjp, argnum=1)

def MyTwoArgFunGrad_y_y_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return np.matmul(MyTwoArgFunHess_y_y(x, y, betax, betay), g)
MyTwoArgFunGrad_y.defvjp(MyTwoArgFunGrad_y_x_vjp, argnum=0)

# This returns a derivative with respect to y, given a derivative
# with respect to MyTwoArgFunGrad_y, which explains the use of hess_x_y.
def MyTwoArgFunGrad_y_x_vjp(g, ans, vs, gvs, x, y, betax, betay):
    return np.matmul(MyTwoArgFunHess_x_y(x, y, betax, betay), g)
MyTwoArgFunGrad_y.defvjp(MyTwoArgFunGrad_y_y_vjp, argnum=1)


x = np.array([3., 2. ])
y = x + 5.
betax = np.array([1.1, 2.4]) * 0.1
betay = np.array([-0.7, 0.6]) * 0.2
print np.concatenate((x, y))

WrapperHess = hessian(Wrapper)
WrapperRawHess = hessian(WrapperRaw)
print WrapperHess(np.concatenate((x, y)), betax, betay) - WrapperRawHess(np.concatenate((x, y)), betax, betay)

[ 3.  2.  8.  7.]
[[  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [ -3.46944695e-18   7.95100320e-02   0.00000000e+00  -3.46944695e-18]
 [ -7.95100320e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00]]


In [99]:
print beta
print beta.T
print np.outer(beta, beta.T)
print np.outer(beta, beta)
print np.dot(beta, beta) # WTF
print np.dot(beta, beta.T) # WTF
print sum(beta * beta)
print np.outer(np.array([1, 2]), np.array([1, 1]))

foo = np.random.rand(9).reshape(3, 3)
bar = np.random.rand(3)
print foo.shape
print bar.shape
print np.outer(foo, bar).shape
print np.outer(bar, foo).shape

[ 0.11  0.24]
[ 0.11  0.24]
[[ 0.0121  0.0264]
 [ 0.0264  0.0576]]
[[ 0.0121  0.0264]
 [ 0.0264  0.0576]]
0.0697
0.0697
0.0697
[[1 1]
 [2 2]]
(3, 3)
(3,)
(9, 3)
(3, 9)


In [74]:
def ArrayFunAD(x, b11, b12, b21, b22):
    return np.array([[np.dot(b11, x), np.dot(b12, x)], [np.dot(b21, x), np.dot(b22, x)]])

@primitive
def ArrayFun(x, b11, b12, b21, b22):
    return ArrayFunAD(x, b11, b12, b21, b22)

def ArrayFun_grad_x(x, b11, b12, b21, b22):
    # The last array axis is the x dimension.
    return np.array([[b11, b12], [b21, b22]])

def ArrayFun_vjp_x(g, ans, vs, gvs, x, b11, b12, b21, b22):
    mul = np.expand_dims(g, 2) * ArrayFun_grad_x(x, b11, b12, b21, b22)
    return mul.sum(axis=(0,1))
ArrayFun.defvjp(ArrayFun_vjp_x, argnum=0)


K = 3
b11 = np.random.rand(K)
b12 = np.random.rand(K)
b21 = np.random.rand(K)
b22 = np.random.rand(K)
x = np.random.rand(K)

# print ArrayFun(x, b11, b12, b21, b22)
# print ArrayFun_grad_x(x, b11, b12, b21, b22)

ArrayFunJac = jacobian(ArrayFun)
ArrayFunADJac = jacobian(ArrayFunAD)
print ArrayFunJac(x, b11, b12, b21, b22) - ArrayFunADJac(x, b11, b12, b21, b22)

[[[ 0.  0.  0.]
  [ 0.  0.  0.]]

 [[ 0.  0.  0.]
  [ 0.  0.  0.]]]
[[[ 0.07231331  0.05633414  0.72212465]
  [ 0.68453149  0.80449399  0.80044326]]

 [[ 0.06477375  0.5687207   0.64336569]
  [ 0.32819827  0.25632403  0.8521798 ]]]


In [92]:
# Propotional only.
def MVNLikAD(mu, info, obs):
    obs_bar = obs - mu
    sign, info_logdet = np.linalg.slogdet(info)
    assert sign > 0
    return np.exp(-0.5 * np.dot(obs_bar, np.matmul(info, obs_bar)) - 0.5 * info_logdet)

@primitive
def MVNLik(mu, info, obs):
    return MVNLikAD(mu, info, obs)

# Mu gradient
def MVNLik_grad_mu(mu, info, obs, ans):
    return ans * (np.matmul(info, obs - mu))

def MVNLik_vjp_mu(g, ans, vs, gvs, mu, info, obs):
    return g * MVNLik_grad_mu(mu, info, obs, ans)
MVNLik.defvjp(MVNLik_vjp_mu, argnum=0)

# Info gradient
def MVNLik_grad_info(mu, info, obs, ans):
    return ans * (-0.5 * np.outer(obs - mu, obs - mu) - 0.5 * np.linalg.inv(info))

def MVNLik_vjp_info(g, ans, vs, gvs, mu, info, obs):
    return g * MVNLik_grad_info(mu, info, obs, ans)
MVNLik.defvjp(MVNLik_vjp_info, argnum=1)

# Hessians
def MVNLik_grad_mu_grad_mu(mu, info, obs):
    # Derivative wrt the exponential term pulls down another info * (obs - mu)
    mvn_lik = MVNLik(mu, info, obs)
    info_obs_bar = np.matmul(info, obs - mu)
    exp_grad = mvn_lik * np.outer(info_obs_bar, info_obs_bar)

    # Derivative wr the obs - mu term
    mat_grad = -MVNLikAD(mu, info, obs) * info
    return exp_grad = mat_grad

def MVNLik_grad_mu_grad_info(mu, info, obs):
    mvn_lik = MVNLik(mu, info, obs)
    info_obs_bar = np.matmul(info, obs - mu)
    log_mvn_info_grad = -0.5 * np.outer(obs - mu, obs - mu) - 0.5 * np.linalg.inv(info)

    exp_grad = np.outer(ans, np.matmul(info, obs - mu))
    mat_grad = -MVNLikAD(mu, info, obs) * info
    return mvn_lik * (np.einsum('i,jk->ijk', log_mvn_info_grad, log_mvn_info_grad) +
                      )


K = 2
mu = np.random.rand(K)
info = np.full((K, K), 0.1) + np.eye(K)
obs = np.random.rand(K)

MVNLikGrad0 = grad(MVNLik, argnum=0)
MVNLikADGrad0 = grad(MVNLikAD, argnum=0)
MVNLikGrad1 = grad(MVNLik, argnum=1)
MVNLikADGrad1 = grad(MVNLikAD, argnum=1)

print MVNLikAD(mu, info, obs) - MVNLik(mu, info, obs)
print MVNLikADGrad0(mu, info, obs) - MVNLikGrad0(mu, info, obs)
print MVNLikADGrad1(mu, info, obs) - MVNLikGrad1(mu, info, obs)




0.0
[  0.00000000e+00  -6.93889390e-18]
[[ -5.55111512e-17  -6.93889390e-18]
 [ -6.93889390e-18  -5.55111512e-17]]


In [107]:
x = (np.arange(3) + 1) * 10
y = np.arange(9).reshape(3, 3)
print x
print y
print np.einsum('i,jk->ijk', x, y) # This produces a general outer product

np.full((3, 3), x)

[10 20 30]
[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[[  0  10  20]
  [ 30  40  50]
  [ 60  70  80]]

 [[  0  20  40]
  [ 60  80 100]
  [120 140 160]]

 [[  0  30  60]
  [ 90 120 150]
  [180 210 240]]]


array([10, 20, 30])