In [95]:
import os
os.environ['PYTENSOR_FLAGS'] = 'mode=FAST_COMPILE,optimizer=fast_compile,floatX=float32,cxx='

import numpy as np
import pymc as pm
import pytensor.tensor as pt

In this notebook, we demonstrate the initialization capability of PyMC's ADVI.

We will fit a simple 1d normal distribution using ADVI and compare the initializations.


In [96]:
init_mean = 0.2
with pm.Model() as init_model:
    # Define priors
    mu = pm.Normal("mu", 0, 1)
    likelihood = pm.Normal('y', mu=mu, sigma=1, observed=None)
    advi = pm.ADVI(start={'mu': init_mean})
    init_fit = advi.fit(0)

with pm.Model() as default_model:
    # Define priors
    mu = pm.Normal("mu", 0, 1)
    likelihood = pm.Normal('y', mu=mu, sigma=1, observed=None)
    default_fit = pm.fit(0, method=pm.ADVI())



Output()

Initialization only


Output()

Initialization only


In [97]:
# we figure out which index mu is - just to make sure we are looking at the correct value
default_mu_idx = [var.name for var in default_model.unobserved_RVs].index("mu")
init_mu_idx = [var.name for var in init_model.unobserved_RVs].index("mu")
default_mu_idx, init_mu_idx

(0, 0)

In [98]:
# This is what the default initialized to
default_fit.mean.eval()[default_mu_idx], default_fit.std.eval()[default_mu_idx]

(np.float32(0.0), np.float32(0.6931472))

In [99]:
# This is what the init initialized to, 
# when we told it to use start={'mu': 1.0}
init_fit.mean.eval()[init_mu_idx], init_fit.std.eval()[init_mu_idx]

(np.float32(0.2), np.float32(0.6931472))

Now, let's use a Beta model to determine whether this initialization is changing the constrained value in $(0, 1)$ or unconstrained mean in $\mathbb{R}$.

We will sample from the posterior and take the empirical mean and std and compare it to the initial parameter value of $0.5$.

In [None]:
init_mean = 0.2
with pm.Model() as beta_model:
    # Define priors
    theta = pm.Beta("theta", 2, 5)
    likelihood = pm.Binomial('obs', n=10, p=theta, observed=2)
    advi = pm.ADVI(start={'theta': init_mean})
    approx = advi.fit(0)


Output()

Initialization only


Let's look at what samples from the posterior give us empirically for the constrained mean and std.

In [126]:

idata = approx.sample(100_000, random_seed=0)
theta_samps = np.asarray(idata.posterior["theta"]).ravel()
print("Posterior samples (constrained theta):")
print("  mean =", theta_samps.mean(), "std =", theta_samps.std(ddof=1))

# ADVI mean-field uses 'mu' and 'rho' (rho -> sigma via softplus) in *unconstrained space*.
mu = approx.params_dict.get("mu")
rho = approx.params_dict.get("rho")

mu_z = mu[0].eval()
sigma_z = float(np.log1p(np.exp(rho[0].eval())))  # softplus(rho)

print("\nUnconstrained (R) params for theta after initialization:")
print("  mu_z    =", mu_z)
print("  sigma_z =", sigma_z)

# Let's take samples in unconstrained space and transform them to constrained space to verify the empirical mean/std
eps = np.random.randn(200_000)
theta_mc = 1.0 / (1.0 + np.exp(-(mu_z + sigma_z * eps)))
print("\nMC from z-space params (should match approx.sample mean):")
print("  mean =", theta_mc.mean(), "std =", theta_mc.std(ddof=1))


Posterior samples (constrained theta):
  mean = 0.22040887 std = 0.11290903

Unconstrained (R) params for theta after initialization:
  mu_z    = -1.3862944
  sigma_z = 0.6931471824645996

MC from z-space params (should match approx.sample mean):
  mean = 0.22057717815746278 std = 0.11323499329891867


Since the posterior samples and MC samples have matching summary statistics, we can see that measure-transport like we just performed is what is happening under the hood when the "constrained mean" is initialized.