In this script, we demonstrate how to initialize and change values in TFP.

In [9]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
tfb = tfp.bijectors


Let's set up a Beta prior/posterior so that we have a clearly constrained posterior support of (0, 1).

In [29]:
default_fit = tfp.experimental.vi.build_factored_surrogate_posterior(
    event_shape=(),
    bijector=tfb.Sigmoid(),
)

init_fit = tfp.experimental.vi.build_factored_surrogate_posterior(
    event_shape=(),
    bijector=tfb.Sigmoid(),
    initial_parameters={
        'loc': tf.Variable(1.0),
        'scale': 0.8,
    }
)

help(tfp.experimental.vi.build_factored_surrogate_posterior)

Help on function build_stateful_trainable in module tensorflow_probability.python.internal.trainable_state_util:

build_stateful_trainable(*args, seed=None, **kwargs)
    Builds a joint variational posterior that factors over model variables.

    By default, this method creates an independent trainable Normal distribution
    for each variable, transformed using a bijector (if provided) to
    match the support of that variable. This makes extremely strong
    assumptions about the posterior: that it is approximately normal (or
    transformed normal), and that all model variables are independent.

    Args:
      event_shape: `Tensor` shape, or nested structure of `Tensor` shapes,
        specifying the event shape(s) of the posterior variables.
      bijector: Optional `tfb.Bijector` instance, or nested structure of such
        instances, defining support(s) of the posterior variables. The structure
        must match that of `event_shape` and may contain `None` values. A
        p

Let's examine whether the initialized values are in the constrained or unconstrained space.

Note: (on line 62-64 of _default_parameter_init_fn in trainable.py of tfp) the scale values get case to float32 which cannot represent 0.01 or 0.8 exactly, so we check that what they return is the same as the floating point representation. Note that they are first being stored as float64, so we have to cast to float64 and then to float32 to check this precision.

In [None]:
default_initial_values = default_fit.distribution.loc.numpy(), default_fit.distribution.scale.numpy()
init_initial_values = init_fit.distribution.loc.numpy(), init_fit.distribution.scale.numpy()
print(f"The initialized values by default are {default_initial_values[0]}, {default_initial_values[1]}")
print(f"The initialized values when we set loc=1.0 and scale=0.8 are {init_initial_values[0]}, {init_initial_values[1]}")


def pass_through_scale_initialization(initial_value, print_steps=True):
    # these are the internal steps that happen when asking for .distribution.scale from a distribution.

    
    if print_steps:
        print(f"Step 1 - Initial value: {initial_value}")

    # Step 2: Cast to float32 (happens in _default_parameter_init_fn line 62)
    # experimental/util/trainable.py line 62
    dtype = tf.float32
    cast_value = tf.cast(initial_value, dtype=dtype)
    if print_steps:
        print(f"Step 2 - After tf.cast: {cast_value.numpy()}")

    # Step 3: Broadcast to shape (happens in _default_parameter_init_fn line 62-64)
    # experimental/util/trainable.py line 62-64
    shape = ()  # Scalar shape for event_shape=()
    broadcast_value = tf.broadcast_to(cast_value, shape)
    if print_steps:
        print(f"Step 3 - After tf.broadcast_to: {broadcast_value.numpy()}")

    # Step 4: Get constraining bijector (Normal's scale uses Softplus with low=eps)
    # For float32, eps is approximately 1.1920929e-07
    # distributions/normal.py lines 153-155 and trainable.py line 168
    eps = tf.cast(1.1920929e-07, dtype=dtype)
    constraining_bijector = tfb.Softplus(low=eps)
    if print_steps:
        print(f"Step 4 - Bijector: {constraining_bijector}")

    # Step 5: Apply inverse bijector to get unconstrained value (happens in _initialize_parameters line 236)
    # internal/trainable_state_util.py line 234, 236
    unconstrained_value = constraining_bijector.inverse(broadcast_value)
    if print_steps:
        print(f"Step 5 - After inverse bijector (unconstrained): {unconstrained_value.numpy()}")

    # Step 6: Create Variable from unconstrained value (happens in as_stateful_builder line 338)
    # internal/trainable_state_util.py line 338
    variable = tf.Variable(unconstrained_value, name='scale')
    if print_steps:
        print(f"Step 6 - Variable value: {variable.numpy()}")

    # Step 7: Read Variable value (happens when accessing .distribution)
    # util/deferred_module.py line 152 and trainable_state_util.py line 257
    read_value = variable.value()
    if print_steps:
        print(f"Step 7 - Variable read: {read_value.numpy()}")

    # Step 8: Apply tf.identity (happens in _apply_parameters line 286)
    # internal/trainable_state_util.py line 286
    identity_value = tf.identity(read_value)
    if print_steps:
        print(f"Step 8 - After tf.identity: {identity_value.numpy()}")

    # Step 9: Apply forward bijector to get constrained value (happens in _apply_parameters line 284)
    # internal/trainable_state_util.py line 284
    constrained_value = constraining_bijector.forward(identity_value)
    if print_steps:
        print(f"Step 9 - After forward bijector (constrained): {constrained_value.numpy()}")

    # Step 10: Pass to Normal constructor and store in _scale
    # internal/trainable_state_util.py line 287
    normal_dist = tfd.Normal(loc=tf.constant(0.0, dtype=dtype), scale=constrained_value)
    if print_steps:
        print(f"Step 10 - Normal.scale: {normal_dist.scale.numpy()}")

    # Step 11: Access via .scale property (returns self._scale)
    # distributions/normal.py lines 163-166
    final_value = normal_dist.scale
    if print_steps:
        print(f"Step 11 - Final value (.scale): {final_value.numpy()}")
    return final_value

final_default_scale = pass_through_scale_initialization(1e-2)
final_init_scale = pass_through_scale_initialization(0.8)
print(f"Final scale from default initialization after emulating tfp's steps: {final_default_scale.numpy()}")
print(f"Final scale from custom initialization after emulating tfp's steps: {final_init_scale.numpy()}")

The initialized values by default are 1.9238139390945435, 0.009999997913837433
The initialized values when we set loc=1.0 and scale=0.8 are 1.0, 0.8000000715255737
Step 1 - Initial value: 0.01
Step 2 - After tf.cast: 0.009999999776482582
Step 3 - After tf.broadcast_to: 0.009999999776482582
Step 4 - Bijector: tfp.bijectors.Softplus("softplus", batch_shape=[], min_event_ndims=0, dtype=float32)
Step 5 - After inverse bijector (unconstrained): -4.600178241729736
Step 6 - Variable value: -4.600178241729736
Step 7 - Variable read: -4.600178241729736
Step 8 - After tf.identity: -4.600178241729736
Step 9 - After forward bijector (constrained): 0.009999997913837433
Step 10 - Normal.scale: 0.009999997913837433
Step 11 - Final value (.scale): 0.009999997913837433
Step 1 - Initial value: 0.8
Step 2 - After tf.cast: 0.800000011920929
Step 3 - After tf.broadcast_to: 0.800000011920929
Step 4 - Bijector: tfp.bijectors.Softplus("softplus", batch_shape=[], min_event_ndims=0, dtype=float32)
Step 5 - Afte

In [4]:
# empirically observe the mean and std of the samples
init_samples = init_fit.sample(100_000)
default_samples = default_fit.sample(100_000)
print(f"Default fit (constrained) empirical mean and std: {np.mean(default_samples.numpy())}, {np.std(default_samples.numpy())}")
print(f"Init fit (constrained) empirical mean and std: {np.mean(init_samples.numpy())}, {np.std(init_samples.numpy())}")

Default fit (constrained) empirical mean and std: 0.48628485202789307, 0.002491940511390567
Init fit (constrained) empirical mean and std: 0.7071442008018494, 0.15043577551841736


It seems that the values we observe empirically are different from the initialized values, so let's transform the initialized values and see if they match the empirical values. (we use measure transport, i.e. we draw samples from the unconstrained distribution, transform the samples, and then take the empirical mean and std of the transformed samples)

In [5]:
# sample from the initialized values and transform to constrained space to find empirical constrained mean and std from
# initial values
init_constrained_samples = tfb.Sigmoid().forward(
    tfd.Normal(loc=init_fit.distribution.loc, scale=init_fit.distribution.scale).sample(100_000)
)
default_constrained_samples = tfb.Sigmoid().forward(
    tfd.Normal(loc=default_fit.distribution.loc, scale=default_fit.distribution.scale).sample(100_000)
)
print(f"Default fit empirical constrained mean and std from initial values: {np.mean(default_constrained_samples.numpy())}, {np.std(default_constrained_samples.numpy())}")
print(f"Init fit empirical constrained mean and std from initial values: {np.mean(init_constrained_samples.numpy())}, {np.std(init_constrained_samples.numpy())}")

Default fit empirical constrained mean and std from initial values: 0.4862838387489319, 0.002490274142473936
Init fit empirical constrained mean and std from initial values: 0.7078702449798584, 0.14973874390125275


Since these values match the constrained mean and std above, we conclude that TFP's initialization control changes the underlying unconstrained gaussian distribution in ADVI.