In [24]:
import autograd.numpy as np
import autograd.scipy as sp
import autograd
from autograd.core import primitive
import copy
import time

from collections import OrderedDict

from VariationalBayes import Parameters
from VariationalBayes import \
    ScalarParam, VectorParam, ArrayParam, \
    PosDefMatrixParam, PosDefMatrixParamVector, SimplexParam
from VariationalBayes.ParameterDictionary import ModelParamsDict
import scipy as osp
from scipy.sparse import csr_matrix
from scipy import sparse

In [15]:
k = 2

mat = np.full(k ** 2, 0.2).reshape(k, k) + np.eye(k)
vp_array = ArrayParam('array', shape=(4, 5, 7))
vp_mat = PosDefMatrixParam('mat', k, val=mat)
vp_simplex = SimplexParam('simplex', shape=(5, 3))


mp = ModelParamsDict()
mp.push_param(vp_mat)
mp.push_param(vp_simplex)
mp.push_param(vp_array)

def model(mp):
    mat = mp['mat'].get()
    array = mp['array'].get()
    simplex = mp['simplex'].get()
    
    return np.sum(mat)**2 * np.sum(array)**2 * np.sum(simplex)**2

def model_wrap_free(free_param, mp):
    mp.set_free(free_param)
    return model_wrap_vec(mp.get_vector(), mp)

def model_wrap_vec(vec_param, mp):
    mp.set_vector(vec_param)
    return model(mp)

free_vec = np.random.random(mp.free_size())
mp.set_free(free_vec)
mp_vec = mp.get_vector()

print(model_wrap_free(free_vec, mp))
print(model_wrap_vec(mp_vec, mp))





3123354.63772
3123354.63772


In [31]:
foo = csr_matrix(([2., 3.], ([1, 1], [2, 3])), (4, 4))
bar = csr_matrix(([4., 4.], ([0, 3], [1, 2])), (4, 4))
print(foo)
print(foo.toarray())
#print(sparse.block_diag((foo, bar)).toarray())
foo_offset = sparse.block_diag((csr_matrix(((), ((), ())), (2, 2)), foo))
print(foo_offset.toarray())
print(foo_offset)


  (1, 2)	2.0
  (1, 3)	3.0
[[ 0.  0.  0.  0.]
 [ 0.  0.  2.  3.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[[ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  2.  3.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]]
  (3, 4)	2.0
  (3, 5)	3.0


I think we need two steps.  Let $L$ be the objective and $f$ be the constraining function, so that 

$$
\theta = f(z) \\
L(\theta) = L(f(z))
$$

We need

$$
\frac{dL}{dz^T} = \frac{dL}{d\theta^T} \frac{d\theta}{dz} = \frac{dL}{d\theta^T} \frac{df}{dz^T}
$$

and, using Einstein summation notation,

$$
\frac{d^2 L}{dz_i dz_j} =
    \frac{d^2 L}{d\theta_a d\theta_b} \frac{d\theta_a}{dz_i} \frac{d\theta_b}{dz_j} +
    \frac{d L}{d\theta_a} \frac{d^2 \theta_a}{dz_i dz_j}
$$

The term $\frac{d^2 L}{d\theta_a d\theta_b}$ can be expressed using a combination of our ```get_vector()``` functions and a sparse matrix for the local variables.  $\frac{d\theta_a}{dz_i}$ can also be represented as a sparse matrix.  It may be best to store the term $\frac{d^2 \theta_a}{dz_i dz_j}$ in ```(value, a, i, j)``` format, and write a custom aggregator to return a sparse matrix when multiplied by $\frac{d L}{d\theta_a}$, since it is possible that this is not efficient in general [(discussion)](https://stackoverflow.com/questions/29871669/python-multi-dimensional-sparse-array).


In [10]:
# For an ArrayParam
s = ArrayParam(name='scalar', shape=(5, 3), lb=0.0)
s.set(np.exp(np.random.random(s.shape())))

set_free_and_get_jacobian = autograd.jacobian(s.free_to_vector)
set_free_and_get_hessian = autograd.hessian(s.free_to_vector)

target_jac = set_free_and_get_jacobian(s.get_free())
target_hess = set_free_and_get_hessian(s.get_free())

sparse_jac = s.free_to_vector_jac(s.get_free())
sparse_hess = s.free_to_vector_hess(s.get_free())

print(np.max(np.abs(sparse_jac - target_jac)))
print(np.max(np.abs([ sparse_hess[ind].toarray() - target_hess[ind] for ind in range(s.vector_size()) ])))

0.0
0.0


In [None]:
# For a SimplexParam

from VariationalBayes.MultinomialParams import SimplexParam
s = SimplexParam(name='simplex', shape=(10, 3))
s_val = np.random.random(s.shape())
s_val = s_val / np.expand_dims(np.sum(s_val, 1), axis=1)
s.set(s_val)

free_val = s.get_free()

# Evidently the free params are in the columns and the vector params are in the rows.
print(target_jac.shape)

jac_time = time.time()
target_jac = set_free_and_get_jacobian(s, free_val)
jac_time = time.time() - jac_time
print(jac_time)

hess_time = time.time()
target_hess = set_free_and_get_hessian(s, free_val)
hess_time = time.time() - hess_time
print(hess_time)


In [None]:
import time

def constrain_simplex_vector(free_vec):
    # The first column is the reference value.
    free_vec_aug = np.hstack([[0.], free_vec])
    log_norm = sp.misc.logsumexp(free_vec_aug)
    return np.exp(free_vec_aug - log_norm)

constrain_simplex_vector(free_vec)
constrain_grad = autograd.jacobian(constrain_simplex_vector)
constrain_hess = autograd.hessian(constrain_simplex_vector)

constrain_grad(free_vec)
constrain_hess(free_vec).shape


In [None]:
sparse_jac_time = time.time()

jac_rows = []
jac_cols = []
grads = []
free_cols = range(s.free_shape()[1])
vec_cols = range(s.shape()[1])
for row in range(s.shape()[0]):
    # Each of the output depends only on one row of the input.
    free_inds = np.ravel_multi_index([[row], free_cols], s.free_shape())
    vec_inds = np.ravel_multi_index([[row], vec_cols], s.shape())
    row_jac = constrain_grad(free_val[free_inds])
    for vec_col in vec_cols:
        for free_col in free_cols: 
            jac_rows.append(vec_inds[vec_col])
            jac_cols.append(free_inds[free_col])
            grads.append(row_jac[vec_col,free_col])

jac_sparse = csr_matrix((grads, (jac_rows, jac_cols)), (s.vector_size(), s.free_size()))
print(np.max(np.abs(jac_sparse - target_jac)))

sparse_jac_time = time.time() - sparse_jac_time


In [None]:
sparse_hess_time = time.time()

free_cols = range(s.free_shape()[1])
vec_cols = range(s.shape()[1])
hesses = []
hess_shape = (s.free_size(), s.free_size())

for row in range(s.shape()[0]):
    # Each of the output depends only on one row of the input.
    free_inds = np.ravel_multi_index([[row], free_cols], s.free_shape())
    vec_inds = np.ravel_multi_index([[row], vec_cols], s.shape())
    row_hess = constrain_hess(free_val[free_inds])
    #print(row_hess)
    for vec_col in vec_cols:
        vec_ind = vec_inds[vec_col]
        hess_rows = []
        hess_cols = []
        hess_vals = []
        for free_col1 in free_cols:
            for free_col2 in free_cols:
                hess_rows.append(free_inds[free_col1])
                hess_cols.append(free_inds[free_col2])
                hess_vals.append(row_hess[vec_col, free_col1, free_col2])
        hesses.append(csr_matrix((hess_vals, (hess_rows, hess_cols)), hess_shape))

        
sparse_hess_time = time.time() - sparse_hess_time

print(np.max(np.abs([ hesses[ind].toarray() - target_hess[ind] for ind in range(s.vector_size()) ])))

In [None]:
print('-----------')
print(sparse_hess_time)
print(hess_time)

print('-----------')
print(sparse_jac_time)
print(jac_time)
