In [1]:
import autograd
import autograd.numpy as np
import autograd.scipy as sp
from autograd.test_util import check_grads

import paragami
import scipy as osp

import copy
import example_utils

import matplotlib.pyplot as plt
%matplotlib inline
def comparison_plot(x, y):
    plt.plot(x, x, 'k')
    plt.plot(x, y, 'r+')

In [2]:
np.random.seed(42)

num_obs = 20

# True values of parameters
true_sigma = \
    np.eye(3) * np.diag(np.array([1, 2, 3])) + \
    np.random.random((3, 3)) * 0.1
true_sigma = 0.5 * (true_sigma + true_sigma.T)

true_mu = np.array([0, 1, 2])

# Data
x = np.random.multivariate_normal(
    mean=true_mu, cov=true_sigma, size=(num_obs, ))

# Original weights.
original_weights = np.ones(num_obs)

# Original regularizer.
original_lam = 1.0

In [3]:
def model_logpdf(norm_par, weights, x):
#     data_lpdf = example_utils.get_normal_log_prob(
#         x, mu=norm_par['mu'], sigma=norm_par['sigma'])
    data_lpdf = autograd.scipy.stats.multivariate_normal.logpdf(
        x, norm_par['mu'], norm_par['sigma'])
    return np.sum(weights * data_lpdf)

def objective_fun(norm_par, weights, lam, x):
    return \
        -model_logpdf(norm_par, weights, x) + \
        example_utils.regularizer(norm_par, lam)

norm_par = dict()
norm_par['mu'] = true_mu
norm_par['sigma'] = true_sigma

objective_fun(norm_par, original_weights, original_lam, x)

110.0015452043328

In [14]:
np.random.seed(42)

norm_param_pattern = paragami.PatternDict()
norm_param_pattern['sigma'] = paragami.PSDSymmetricMatrixPattern(size=3)
norm_param_pattern['mu'] = paragami.NumericArrayPattern(shape=(3, ))

def objective_fun_lambda(par_dict):
    return objective_fun(par_dict, original_weights, original_lam, x)

# objective_flat = paragami.FlattenFunctionInput(
#     lambda par_dict: objective_fun(par_dict, original_weights, original_lam, x),
#     patterns=norm_param_pattern,
#     free=True)

objective_flat = paragami.FlattenFunctionInput(
    objective_fun_lambda,
    patterns=norm_param_pattern,
    free=True)

# It's always a good idea to double check for bugs.

# This checks that the two functions return the same value.
norm_par_flat = norm_param_pattern.flatten(norm_par, free=True)
assert(np.linalg.norm(
    objective_flat(norm_par_flat) - \
    objective_fun(norm_par, original_weights, original_lam, x)) < 1e-8)

# This verifies that the objective is being differentiated correctly. 
#check_grads(objective_flat)(norm_par_flat) # FWD mode not working
check_grads(objective_flat, modes=['rev'])(norm_par_flat)

AssertionError: Derivative (VJP) check of [unknown name] failed with arg [0.01838482 0.0760574  0.34902187 0.03878424 0.03397008 0.55879116
 0.         1.         2.        ]:
analytic: 2.1897183261330797
numeric:  -4.661007817642502

In [None]:
objective_wrapper = paragami.OptimizationObjective(objective_flat)
objective_wrapper.set_print_every(5)

init_param = np.ones(norm_param_pattern.flat_length(free=True))

mle_opt = osp.optimize.minimize(
    method='trust-ncg',
    x0=init_param,
    fun=objective_wrapper.f,
    jac=objective_wrapper.grad,
    hessp=objective_wrapper.hessian_vector_product,
    options={'gtol': 1e-12, 'disp': False})

In [None]:
print(mle_opt.x)
norm_par_opt = norm_param_pattern.fold(mle_opt.x, free=True)
print(norm_par_opt['sigma'])
print(true_sigma)
print(norm_par_opt['mu'])
print(true_mu)

par_flat0 = copy.deepcopy(mle_opt.x)
hess0 = objective_wrapper.hessian(par_flat0)

In [None]:
lam_free = True # Works better

# To evaluate the regularization parameter
lam_pattern = paragami.NumericScalarPattern(lb=0)
objective_reg_flat = \
    paragami.FlattenFunctionInput(
        lambda par_dict, lam: objective_fun(par_dict, original_weights, lam, x),
    patterns=[norm_param_pattern, lam_pattern],
    free=[True, lam_free])
lam_flat0 = lam_pattern.flatten(original_lam, free=lam_free)

# Sanity check
assert(objective_reg_flat(par_flat0, lam_flat0) ==
       objective_fun(norm_par_opt, original_weights, original_lam, x))

weight_sens = paragami.ParametricSensitivityTaylorExpansion(
    objective_reg_flat,
    input_val0=par_flat0,
    hyper_val0=lam_flat0,
    hess0=hess0,
    order=4)

In [None]:
def reoptimize_lam(lam_flat1, print_every=10):
    new_objective_wrapper = \
        paragami.OptimizationObjective(
            lambda par_flat: objective_reg_flat(par_flat, lam_flat1))
    new_objective_wrapper.set_print_every(10)
    new_mle_opt = osp.optimize.minimize(
        method='trust-ncg',
        x0=par_flat0,
        fun=new_objective_wrapper.f,
        jac=new_objective_wrapper.grad,
        hessp=new_objective_wrapper.hessian_vector_product,
        options={'gtol': 1e-12, 'disp': False})
    return new_mle_opt

In [None]:
new_lam = 5
#new_lam = 0.001

lam_flat1 = lam_pattern.flatten(new_lam, free=lam_free)
print('dhyper norm', np.linalg.norm(lam_flat1 - lam_flat0))
par1 = reoptimize_lam(lam_flat1).x

plt.figure()
par1_pred = weight_sens.evaluate_taylor_series(lam_flat1, max_order=1)
comparison_plot(par1, par1_pred)
print(np.linalg.norm(par1 - par1_pred))

plt.figure()
par1_pred = weight_sens.evaluate_taylor_series(lam_flat1, max_order=2)
comparison_plot(par1, par1_pred)
print(np.linalg.norm(par1 - par1_pred))

plt.figure()
par1_pred = weight_sens.evaluate_taylor_series(lam_flat1, max_order=3)
comparison_plot(par1, par1_pred)
print(np.linalg.norm(par1 - par1_pred))

plt.figure()
par1_pred = weight_sens.evaluate_taylor_series(lam_flat1, max_order=4)
comparison_plot(par1, par1_pred)
print(np.linalg.norm(par1 - par1_pred))


In [None]:
# To evaluate the regularization parameter
weight_pattern = paragami.NumericVectorPattern(length=num_obs)
objective_weight_flat = \
    paragami.FlattenFunctionInput(
        lambda par_dict, weights: objective_fun(par_dict, weights, original_lam, x),
    patterns=[norm_param_pattern, weight_pattern],
    free=[True, False])
weight_flat0 = weight_pattern.flatten(original_weights, free=False)

# Sanity check
assert(objective_weight_flat(par_flat0, weight_flat0) ==
       objective_fun(norm_par_opt, original_weights, original_lam, x))

weight_sens = paragami.ParametricSensitivityTaylorExpansion(
    objective_weight_flat,
    input_val0=par_flat0,
    hyper_val0=weight_flat0,
    hess0=hess0,
    order=4)

In [None]:
def reoptimize_weights(weight_flat1, print_every=10):
    new_objective_wrapper = \
        paragami.OptimizationObjective(
            lambda par_flat: objective_weight_flat(par_flat, weight_flat1))
    new_objective_wrapper.set_print_every(10)
    new_mle_opt = osp.optimize.minimize(
        method='trust-ncg',
        x0=par_flat0,
        fun=new_objective_wrapper.f,
        jac=new_objective_wrapper.grad,
        hessp=new_objective_wrapper.hessian_vector_product,
        options={'gtol': 1e-12, 'disp': False})
    return new_mle_opt

In [None]:
new_weights = example_utils.draw_bootstrap_weights(num_obs)
weight_flat1 = weight_pattern.flatten(new_weights, free=False)

print('dhyper norm', np.linalg.norm(weight_flat1 - weight_flat0))
par1 = reoptimize_weights(weight_flat1).x

for order in range(1, 5):
    plt.figure()
    plt.title('Order {}'.format(order))
    par1_pred = weight_sens.evaluate_taylor_series(weight_flat1, max_order=order)
    comparison_plot(par1, par1_pred)
    print(np.linalg.norm(par1 - par1_pred))
