In [1]:
import LinearResponseVariationalBayes as vb

import matplotlib.pyplot as plt
%matplotlib inline

import autograd
import autograd.numpy as np

from copy import deepcopy
import scipy
from scipy import optimize

Define a model with observations $y_n$ and $x_n$, for $n = 1,...,N$, and matrix parameters $\beta$ and $\Lambda$, where

$$
y_n \vert x_n, \beta, \Lambda \sim \mathcal{N} \left(\beta x_n, \Lambda^{-1} \right)
$$

For fun, let's assume that all the entries of $\beta$ are constrained to be positive. We will compute the maximum likelihood estimate for $\beta$ and $\Lambda$.


In [2]:
# Define a multivariate normal model with a constrained mean.
x_dim = 2
n_obs = 10000

true_beta = np.exp(np.random.random((x_dim, x_dim)))
true_lambda = np.eye(x_dim) + np.full((x_dim, x_dim), 0.5)
true_cov = np.linalg.inv(true_lambda)

x = np.random.random((n_obs, x_dim))
true_mean = np.matmul(x, true_beta)
y = np.array([ np.random.multivariate_normal(true_mean[n], true_cov) for n in range(n_obs) ])

We'll define the model as a function of parameters.  A ```Parameter``` object called, say, ```par```, needs to be able to do a few things:

Read and set in its standard form:
* ```par.get()```
* ```par.set(value)```

These methods return or set a parameter value that you can use in a model.  For example, if the parameter is a matrix, ```get()``` returns a positive definite matrice, and ```set()``` must be passed a positive definite matrix.

Convert to and from vectors:
* ```par.get_vector()```
* ```par.set_vector(vector_value)```
* ```par.vector_size()```

These methods unpack or pack the parameter into a one-dimensional numpy array without changing any of the values.  The length of this vector is given by ```vector_size()```.  If you run ```set_vector()```, the expectation is that you are setting the parameter to a legal value.

Convert to and from unconstrained parameterizations:
* ```par.get_free()```
* ```par.set_free(free_vector_value)```
* ```par.free_size()```

These methods also use 1d representations of the parameter, only they are expressed in an unconstrained space.  Up to numerical issues, you should be able run ```set_free(free_vector_value)``` with any ```free_vector_value``` that is the correct length.  The length of the free vector is given by ```free_size()```.  Note that ```free_size()``` and ```vector_size()``` may not be the same.  For example, a simplex parameter requires fewer free values than are contained in the vectorized representation of the simplex.

Importantly, all these methods must be differentiable with autograd.  See the test ```execute_required_methods``` for full details of what is required of a ```Parameter```.



In [3]:
# Define our parameters.

# By setting lb = 0., we make all the entries of beta positive.
beta = vb.ArrayParam(name='beta', shape=(x_dim, x_dim), lb=0.)
lamb = vb.PosDefMatrixParam('lambda', size=x_dim)

print(beta)
print(lamb)

print('Unconstrained values:')
print('beta: ', beta.get_free())
print('lamb: ', lamb.get_free())

print('Vector values:')
print('beta: ', beta.get_vector())
print('lamb: ', lamb.get_vector())

beta:
[[ 1.  1.]
 [ 1.  1.]]
lambda:
[[ 1.  0.]
 [ 0.  1.]]
Unconstrained values:
beta:  [ 0.  0.  0.  0.]
lamb:  [ 0.  0.  0.]
Vector values:
beta:  [ 1.  1.  1.  1.]
lamb:  [ 1.  0.  1.]


Parameters can be combined into parameter dictionaries using the ```ModelParamsDict``` type.  Importantly, a ```ModelParamsDict``` is also a ```Parameter``` with all the methods described above.  By combining parameter into a dictionary, you can easily get or set an unconstrained or vectorized representation of a parameter set all at once. 

In [4]:
# Combine the parameters into a dictionary:

par = vb.ModelParamsDict('params')
par.push_param(beta)
par.push_param(lamb)

initial_free_par = deepcopy(par.get_free())

print(par)
print(par['beta'])
print(par['lambda'])

params:
	beta:
[[ 1.  1.]
 [ 1.  1.]]
	lambda:
[[ 1.  0.]
 [ 0.  1.]]
beta:
[[ 1.  1.]
 [ 1.  1.]]
lambda:
[[ 1.  0.]
 [ 0.  1.]]


In [5]:
# Generate some random parameters for demonstration purposes.
beta_free_param = np.random.random(beta.free_size())
lamb_free_param = np.random.random(lamb.free_size())

par['beta'].set_free(beta_free_param)
par['lambda'].set_free(lamb_free_param)

print(par)
par_free = par.get_free()

print('\nSet par back to initial values:')
par.set_free(initial_free_par)
print(par)

print('\nSet par using the combined free vector:')
par.set_free(par_free)
print(par)


params:
	beta:
[[ 1.99935453  1.25489014]
 [ 1.37444099  2.31354934]]
	lambda:
[[ 2.27497606  0.74887345]
 [ 0.74887345  2.73479722]]

Set par back to initial values:
params:
	beta:
[[ 1.  1.]
 [ 1.  1.]]
	lambda:
[[ 1.  0.]
 [ 0.  1.]]

Set par using the combined free vector:
params:
	beta:
[[ 1.99935453  1.25489014]
 [ 1.37444099  2.31354934]]
	lambda:
[[ 2.27497606  0.74887345]
 [ 0.74887345  2.73479722]]


You can build a model with parameters and data as attributes and a method that evaluates to an objective.

In [6]:
class Model(object):
    def __init__(self, y, x, par):
        # You may want to deepcopy the parameters to avoid confusing things happening,
        # especially with autograd.
        self.par = deepcopy(par)
        self.x = x
        self.y = y
        self.weights = np.full(x.shape[0], 1.0)
        
    def loglik(self):
        # Evaluate the objective at the current parameter value.
        beta = self.par['beta'].get()
        lamb = self.par['lambda'].get()

        y_centered = self.y - np.matmul(self.x, beta)
        y_term = -0.5 * np.einsum('ni,ij,nj,n', y_centered, lamb, y_centered, self.weights)
        
        s, logdet = np.linalg.slogdet(lamb)
        assert s > 0
        
        return y_term + 0.5 * np.sum(self.weights) * logdet
    
    def eval_objective(self, free_par):
        # scipy minimizes, so return the negative log likelihood.
        self.par.set_free(free_par)
        ll = self.loglik()
        return -1. * ll

The model's objective function can now be passed directly to autograd and optimization routines.

Some of this boilerplate is wrapper in the module ```SparseObjectives.py```, which also tries to accommodate hand-coded sparse Hessians, though that is still in development. 

In [7]:
model = Model(y, x, par)

# Now the objective is a function of the free parameter:
print(model.eval_objective(par_free - 1.0))
print(model.eval_objective(par_free))
print(model.eval_objective(par_free + 1.0))

# And it is differentiable.
eval_objective_grad = autograd.grad(model.eval_objective)
eval_objective_hess = autograd.hessian(model.eval_objective)
eval_objective_hvp = autograd.hessian_vector_product(model.eval_objective)

grad = eval_objective_grad(par_free)
hess = eval_objective_hess(par_free)
hvp = eval_objective_hvp(par_free, grad)

16721.6275444
11598.8477658
3071779.63632


In [8]:
# Drop into an optimization method

print('Running Newton Trust Region')
vb_opt = optimize.minimize(
    model.eval_objective,
    jac=eval_objective_grad,
    hessp=eval_objective_hvp,
    x0=initial_free_par,
    method='trust-ncg')

print('Done.')
opt_free_par = deepcopy(vb_opt.x)

Running Newton Trust Region
Done.


In [9]:
model.par.set_free(opt_free_par)

print('True beta:\n', true_beta)
print('Fit beta: ', model.par['beta'])

print('True lambda:\n', true_lambda)
print('Fit lambda: ', model.par['lambda'])

True beta:
 [[ 2.22639279  1.53736923]
 [ 1.22294226  1.22112609]]
Fit beta:  beta:
[[ 2.20862199  1.57028409]
 [ 1.23491386  1.21065658]]
True lambda:
 [[ 1.5  0.5]
 [ 0.5  1.5]]
Fit lambda:  lambda:
[[ 1.51052283  0.49646839]
 [ 0.49646839  1.53826346]]


By defining a vector of summarizing statistics (the things you want to know about the model), you can then do sensitivity calculations.

In [10]:
# Suppose we're interested in the sensitivity of beta to the weights.  Define a
# summary class that takes in the free parameters are returns the summary of
# interest.

class Summary(object):
    def __init__(self, par):
        self.par = deepcopy(par)
        self.summary_jacobian = autograd.jacobian(self.eval_summary)
        
    def summary(self):
        return self.par['beta'].get_vector()
    
    def eval_summary(self, par_free):
        self.par.set_free(par_free)
        return self.summary()

summary = Summary(par)
summary_jac = summary.summary_jacobian(opt_free_par)

objective_hess = eval_objective_hess(opt_free_par)

summary_sens_operator = -1. * np.linalg.solve(objective_hess, summary_jac.T)

In [11]:
# Define a new objective that's a function of the weights.
def objective_weights(weights, free_par, model):
    model.weights = weights
    return model.eval_objective(free_par)

# Get the cross Hessian between the weights and parameters by taking the
# Jacobian of a gradient.

# Autograd runs faster when you do the smaller dimension first.
eval_objective_par_grad = autograd.grad(objective_weights, argnum=1)
eval_objective_par_weight_hess = autograd.jacobian(eval_objective_par_grad, argnum=0)

initial_weights = np.full(x.shape[0], 1.)
par_weight_hess = eval_objective_par_weight_hess(initial_weights, opt_free_par, model)

# Weight sens now contains the sensitivity of beta to individual points' weights
# in its rows.
weight_sens = np.matmul(par_weight_hess.T, summary_sens_operator)

In [12]:
# Let's check the accuracy of weight_sens by manually removing a data point and re-fitting.

eval_objective_par_hvp = \
    autograd.hessian_vector_product(objective_weights, argnum=1)

# In order to avoid some lambda function weirdness, wrap the hessian vector product.
def eval_objective_par_hvp_wrapper(free_par, grad):
    return eval_objective_par_hvp(weights, free_par, model, grad)

# To verify this, perturb a weight and re-fit.
perturb_row = 25
weights = np.full(x.shape[0], 1.)
weights[perturb_row] = 0.
print('Running Newton Trust Region')
vb_opt_perturbed = optimize.minimize(
    lambda free_par: objective_weights(weights, free_par, model),
    jac=lambda free_par: eval_objective_par_grad(weights, free_par, model),
    hessp=eval_objective_par_hvp_wrapper,
    x0=opt_free_par,
    method='trust-ncg')
print('Done.')

opt_free_par_perturbed = deepcopy(vb_opt_perturbed.x)

Running Newton Trust Region
Done.


In [13]:
# The perturbation matches the prediction.

print('Preturbing row ', perturb_row)

print('Before:\t', summary.eval_summary(opt_free_par))
print('After:\t', summary.eval_summary(opt_free_par_perturbed))

print('Actual difference:\t',
      summary.eval_summary(opt_free_par) -
      summary.eval_summary(opt_free_par_perturbed))
print('Predicted difference:\t',
      weight_sens[perturb_row])

Preturbing row  25
Before:	 [ 2.20862199  1.57028409  1.23491386  1.21065658]
After:	 [ 2.20855098  1.57009578  1.23496579  1.21079429]
Actual difference:	 [  7.10120342e-05   1.88308808e-04  -5.19255622e-05  -1.37705753e-04]
Predicted difference:	 [  7.09962804e-05   1.88263485e-04  -5.19199793e-05  -1.37678146e-04]
