In [1]:
import autograd.numpy as np
from autograd.core import primitive
from autograd import grad, jacobian, hessian
from autograd.numpy.numpy_grads import unbroadcast
import scipy.stats

In [28]:
def LogitTermAD(beta, beta_cov, x_mat, std_vec):
    x_outer = np.einsum('ij,ik->ijk', x_mat, x_mat)
    sigma = np.einsum('ijk,jk->i', x_outer, beta_cov)
    mu = np.einsum('ij,j->i', x_mat, beta)
    z = np.einsum('i,j->ij', sigma, std_vec) + np.expand_dims(mu, 1)
    return np.sum(np.log(1 + np.exp(z)))

@primitive
def LogitTerm(beta, beta_cov, x_mat, std_vec):
    return LogitTermAD(beta, beta_cov, x_mat, std_vec)

# Every gradient is of the form of weighted sums of p, so writing everything
# in terms of this makes it easy to differentiate.
@primitive
def WeightedPSum(beta, beta_cov, p, x_mat, std_vec, weights):
    return np.sum(p * weights)


@primitive
def LogitTerm_grad_beta_term(beta, beta_cov, x_mat, std_vec, a):
    x_outer = np.einsum('ij,ik->ijk', x_mat, x_mat)
    sigma = np.einsum('ijk,jk->i', x_outer, beta_cov)
    mu = np.einsum('ij,j->i', x_mat, beta)
    z = np.einsum('i,j->ij', sigma, std_vec) + np.expand_dims(mu, 1)
    p = np.exp(z) / (1 + np.exp(z))
    return WeightedPSum(beta, beta_cov, p, x_mat, std_vec, np.expand_dims(x_mat[:, a], 1))
    # return np.sum(p * np.expand_dims(x_mat[:, a], 1))

def LogitTerm_grad_beta(beta, beta_cov, x_mat, std_vec):
    K = beta.size
    return np.array([ LogitTerm_grad_beta_term(beta, beta_cov, x_mat, std_vec, a) for a in range(K)])

def LogitTerm_vjp_beta(g, ans, vs, gvs, beta, beta_cov, x_mat, std_vec):
    return g * LogitTerm_grad_beta(beta, beta_cov, x_mat, std_vec)
LogitTerm.defvjp(LogitTerm_vjp_beta, argnum=0)


@primitive
def LogitTerm_grad_beta_cov_term(beta, beta_cov, x_mat, std_vec, a, b):
    x_outer = np.einsum('ij,ik->ijk', x_mat, x_mat)
    sigma = np.einsum('ijk,jk->i', x_outer, beta_cov)
    mu = np.einsum('ij,j->i', x_mat, beta)
    z = np.einsum('i,j->ij', sigma, std_vec) + np.expand_dims(mu, 1)
    p = np.exp(z) / (1 + np.exp(z))
    weights = np.einsum('j,i->ij', std_vec, x_outer[:, a, b])
    return WeightedPSum(beta, beta_cov, p, x_mat, std_vec, weights)
    # return np.einsum('ji,i,j', p, std_vec, x_outer[:, a, b])

def LogitTerm_grad_beta_cov(beta, beta_cov, x_mat, std_vec):
    K = beta.size
    GradTerm = lambda a, b: LogitTerm_grad_beta_cov_term(beta, beta_cov, x_mat, std_vec, a, b)
    return np.array([[ GradTerm(a, b) for a in range(K) ] for b in range(K) ])

def LogitTerm_vjp_beta_cov(g, ans, vs, gvs, beta, beta_cov, x_mat, std_vec):
    return g * LogitTerm_grad_beta_cov(beta, beta_cov, x_mat, std_vec)
LogitTerm.defvjp(LogitTerm_vjp_beta_cov, argnum=1)


# Define Hessians



# Wrapping functions
def UnWrap(par_vec, K):
    beta = par_vec[0:K]
    beta_cov = par_vec[K:].reshape(K, K)
    return beta, beta_cov

def Wrap(beta, beta_cov):
    K = beta.size
    return np.concatenate((beta, beta_cov.ravel()))

def LogitTermWrap(par_vec, x_mat, std_vec):
    beta, beta_cov = UnWrap(par_vec, 2)
    return LogitTerm(beta, beta_cov, x_mat, std_vec)

def LogitTermWrapAD(par_vec, x_mat, std_vec):
    beta, beta_cov = UnWrap(par_vec, 2)
    return LogitTermAD(beta, beta_cov, x_mat, std_vec)


In [27]:
N = 10
beta = np.array([1., -0.5])
beta_cov = np.full((2, 2), 0.1) + np.eye(2)
x_mat = np.random.rand(10 * 2).reshape(N, 2)
std_vec = np.array([-0.8, -0.3, 0.3, 0.8])

LogitTerm(beta, beta_cov, x_mat, std_vec)

par_vec = Wrap(beta, beta_cov)
UnWrap(par_vec, 2)

LogitTermWrapADGrad = grad(LogitTermWrapAD, argnum=0)
LogitTermWrapGrad = grad(LogitTermWrap, argnum=0)
print np.max(np.abs(LogitTermWrapADGrad(par_vec, x_mat, std_vec) -
                    LogitTermWrapGrad(par_vec, x_mat, std_vec)))

# LogitTermWrapADHess = hessian(LogitTermWrapAD, argnum=0)
# LogitTermWrapHess = hessian(LogitTermWrap, argnum=0)
# print np.max(np.abs(LogitTermWrapADHess(par_vec, x_mat, std_vec) -
#                     LogitTermWrapADHess(par_vec, x_mat, std_vec)))


10.0964436978


[0, 10, 20, 1, 11, 21]