# Sensitivity Analysis.

In [1]:
import paragami

import autograd
from autograd import numpy as np
import copy

# Use the original scipy for functions we don't need to differentiate.
import scipy as osp

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
np.random.seed(42)

num_obs = 500
data_dim = 3

# True values of parameters
true_sigma = \
    np.eye(3) * np.diag(np.arange(0, data_dim)) + \
    np.random.random((data_dim, data_dim)) * 0.1
true_sigma = 0.5 * (true_sigma + true_sigma.T)
true_mu = np.arange(0, data_dim)

true_norm_param_dict = dict()
true_norm_param_dict['mu'] = true_mu
true_norm_param_dict['sigma'] = true_sigma

# Data
data = np.random.multivariate_normal(
    mean=true_norm_param_dict['mu'],
    cov=true_norm_param_dict['sigma'],
    size=(num_obs, ))

In [3]:
def get_mvn_log_probs(obs, mean, cov):
    cov_inv = np.linalg.inv(cov)
    cov_det_sign, cov_log_det = np.linalg.slogdet(cov)
    if cov_det_sign <= 0:
        return np.full(float('inf'), obs.shape[0])
    else:
        obs_centered = obs - np.expand_dims(mean, axis=0)
        return -0.5 * (
            np.einsum('ni,ij,nj->n', obs_centered, cov_inv, obs_centered) + \
            cov_log_det)

def get_data_lp(data, norm_param_dict, weights):
    data_lp = np.sum(weights *
                     get_mvn_log_probs(
                         data,
                         mean=norm_param_dict['mu'],
                         cov=norm_param_dict['sigma']))
    return data_lp

def get_prior_lp(norm_param_dict, prior_param_dict):
    data_dim = len(prior_param_dict['prior_mean']) 
    prior_cov = np.eye(data_dim) * (prior_param_dict['prior_sd'] ** 2)
    prior_lp = get_mvn_log_probs(
        obs=np.expand_dims(norm_param_dict['mu'], axis=0),
        mean=prior_param_dict['prior_mean'],
        cov=prior_cov)

    # Sum so as to return a scalar.
    return np.sum(prior_lp)

def get_loss(data, norm_param_dict, prior_param_dict, weights):
    return -1 * (get_prior_lp(norm_param_dict, prior_param_dict) +
                 get_data_lp(data, norm_param_dict, weights))
    
class NormalModel():
    def __init__(self, data):
        self.data = data
        self.num_obs = self.data.shape[0]
        self.data_dim = self.data.shape[1]
                
        # Reasonable defaults for the priors and weights.
        self.set_prior(np.full(self.data_dim, 0.), 10)
        self.set_weights(np.full(self.num_obs, 1.0))
                
    def set_weights(self, weights):
        self.weights = weights
    
    def set_prior(self, prior_mean, prior_sd):
        self.prior_dict = dict()
        self.prior_dict['prior_mean'] = prior_mean
        self.prior_dict['prior_sd'] = prior_sd

    def get_loss_for_opt(self, norm_param_dict):
        return get_loss(
            self.data, norm_param_dict, self.prior_dict, self.weights)
    
    def get_loss_by_prior(self, norm_param_dict, prior_dict):
        return get_loss(
            self.data, norm_param_dict, prior_dict, self.weights)

    def get_loss_by_weights(self, norm_param_dict, weights):
        return get_loss(
            self.data, norm_param_dict, self.prior_dict, weights)

    
model = NormalModel(data)
orig_prior_dict = copy.deepcopy(model.prior_dict)
orig_weights = copy.deepcopy(model.weights)
print('Loss at true parameter: {}'.format(model.get_loss_for_opt(true_norm_param_dict)))

Loss at true parameter: 69.74233683848566


Define patterns.

In [4]:
norm_pattern = paragami.PatternDict()
norm_pattern['mu'] = paragami.NumericArrayPattern(shape=(data_dim, ))
norm_pattern['sigma'] = paragami.PSDSymmetricMatrixPattern(size=data_dim)

prior_pattern = paragami.PatternDict()
prior_pattern['prior_mean'] = paragami.NumericArrayPattern(shape=(data_dim, ))
prior_pattern['prior_sd'] = paragami.NumericArrayPattern(shape=(1, ), lb=0.0)

weight_pattern = paragami.NumericArrayPattern(shape=(num_obs, ))

In [5]:
# Optimize.
opt_fun = paragami.FlattenedFunction(
    original_fun=model.get_loss_for_opt,
    patterns=norm_pattern,
    free=True)
opt_fun_grad = autograd.grad(opt_fun)
opt_fun_hessian = autograd.hessian(opt_fun)

def get_optimum(init_param):
    return osp.optimize.minimize(
        method='trust-ncg',
        x0=init_param,
        fun=opt_fun,
        jac=opt_fun_grad,
        hess=opt_fun_hessian,
        options={'gtol': 1e-16, 'disp': True})

# Initialize with zeros.
init_param = np.zeros(norm_pattern.flat_length(free=True))
mle_opt = get_optimum(init_param)

         Current function value: 65.200672
         Iterations: 18
         Function evaluations: 20
         Gradient evaluations: 17
         Hessian evaluations: 17


In [6]:
opt_norm_param_dict = norm_pattern.fold(mle_opt.x, free=True)
print(opt_norm_param_dict)
print(true_norm_param_dict)

OrderedDict([('mu', array([0.00837093, 0.95353189, 1.89852799])), ('sigma', array([[0.03865698, 0.08549516, 0.03813797],
       [0.08549516, 1.05532368, 0.09781193],
       [0.03813797, 0.09781193, 1.91488823]]))])
{'mu': array([0, 1, 2]), 'sigma': array([[0.03745401, 0.07746864, 0.03950388],
       [0.07746864, 1.01560186, 0.05110853],
       [0.03950388, 0.05110853, 2.0601115 ]])}


In [7]:
model.prior_dict = orig_prior_dict
model.weights = orig_weights
prior_sens = \
    paragami.HyperparameterSensitivityLinearApproximation(
        objective_fun=model.get_loss_by_prior,
        opt_par_pattern=norm_pattern,
        hyper_par_pattern=prior_pattern,
        opt_par_folded_value=opt_norm_param_dict,
        hyper_par_folded_value=orig_prior_dict,
        validate_optimum=False,
        opt_par_is_free=True,
        hyper_par_is_free=True)

In [8]:
# This helper function lets us easily see the differences in parameters.
def get_norm_param_diff(par1, par2):
    diff = \
        norm_pattern.flatten(par1, free=False, validate=False) - \
        norm_pattern.flatten(par2, free=False, validate=False)
    return norm_pattern.fold(diff, free=False, validate=False)

In [13]:
# Change the prior.
new_prior_dict = copy.deepcopy(orig_prior_dict)
change_mean = False

# Note: it does not seem to be working for the prior cov.  Is this
# real nonlinearity or a bug?
change_cov = True
if change_mean:
    new_prior_dict['prior_mean'] = orig_prior_dict['prior_mean'] + 100
if change_cov:
    new_prior_dict['prior_sd'] = 0.1 * orig_prior_dict['prior_sd']

# Make sure we use the original weights.
model.weights = orig_weights

# Get the linear prediction at the new prior.
pred_norm_param_dict = \
    prior_sens.predict_opt_par_from_hyper_par(new_prior_dict)

# Re-optimize to check the prior sensitivity.
model.prior_dict = new_prior_dict
new_opt_par = get_optimum(mle_opt.x)
new_norm_param_dict = norm_pattern.fold(new_opt_par.x, free=True)


         Current function value: 60.519174
         Iterations: 5
         Function evaluations: 7
         Gradient evaluations: 6
         Hessian evaluations: 6


In [14]:
# Look at the differences.
pred_diff = get_norm_param_diff(
    pred_norm_param_dict, opt_norm_param_dict)
true_diff = get_norm_param_diff(
    new_norm_param_dict, opt_norm_param_dict)
for param in ['mu', 'sigma']:
    print('Predicted {} differences:\n{}'.format(param, pred_diff[param]))
    print('Actual {} differences:\n{}'.format(param, true_diff[param]))


Predicted mu differences:
[-1.42066697e-05 -1.09848638e-04 -3.43445277e-04]
Actual mu differences:
[-0.00030444 -0.00235516 -0.00735512]
Predicted sigma differences:
[[8.76560699e-11 6.77772394e-10 2.11908112e-09]
 [6.77772394e-10 5.24065769e-09 1.63851163e-08]
 [2.11908112e-09 1.63851163e-08 5.12286895e-08]]
Actual sigma differences:
[[9.45669713e-08 7.31584245e-07 2.28446229e-06]
 [7.31584245e-07 5.65931792e-06 1.76737509e-05]
 [2.28446229e-06 1.76737509e-05 5.51946612e-05]]


In [11]:
# Sensitivity to data weights.
model.prior_dict = orig_prior_dict
model.weights = orig_weights
weight_sens = \
    paragami.HyperparameterSensitivityLinearApproximation(
        objective_fun=model.get_loss_by_weights,
        opt_par_pattern=norm_pattern,
        hyper_par_pattern=weight_pattern,
        opt_par_folded_value=opt_norm_param_dict,
        hyper_par_folded_value=orig_weights,
        validate_optimum=False,
        opt_par_is_free=True,
        hyper_par_is_free=False)


In [16]:
# Change the weights.
obs = [ 15, 20, 50 ]
new_weights = np.ones(num_obs)
new_weights[obs] = 0.0

# Make sure we use the original prior.
model.prior_dict = orig_prior_dict

# Get the linear prediction at the new prior.
pred_norm_param_dict = \
    weight_sens.predict_opt_par_from_hyper_par(new_weights)

# Re-optimize to check the prior sensitivity.
model.weights = new_weights
new_opt_par = get_optimum(mle_opt.x)
new_norm_param_dict = norm_pattern.fold(new_opt_par.x, free=True)

# Look at the differences.
pred_diff = get_norm_param_diff(pred_norm_param_dict, opt_norm_param_dict)
true_diff = get_norm_param_diff(new_norm_param_dict, opt_norm_param_dict)
for param in ['mu', 'sigma']:
    print('Predicted {} differences:\n{}'.format(param, pred_diff[param]))
    print('Actual {} differences:\n{}'.format(param, true_diff[param]))

         Current function value: 61.428577
         Iterations: 7
         Function evaluations: 9
         Gradient evaluations: 8
         Hessian evaluations: 8
Predicted mu differences:
[-0.00097272  0.0014095  -0.00779786]
Actual mu differences:
[-0.00097859  0.00141801 -0.00784493]
Predicted sigma differences:
[[-0.00050164  0.00077615 -0.00224773]
 [ 0.00077615  0.00481816  0.00155291]
 [-0.00224773  0.00155291 -0.00174182]]
Actual sigma differences:
[[-0.00050893  0.00078918 -0.00228297]
 [ 0.00078918  0.00479762  0.00164688]
 [-0.00228297  0.00164688 -0.00194664]]
