# Synthetic models for posterior distributions

Marco Raveri (<marco.raveri@unige.it>), Cyrille Doux (<doux@lpsc.in2p3.fr>), Shivam Pandey (<shivampcosmo@gmail.com>)

In this notebook we show how to build normalizing flow syntetic models for posterior distributions, as in [Raveri, Doux and Pandey (2024), arXiv:XXXX.XXXX](https://arxiv.org/abs/XXXX.XXXXX).

## Notebook setup:

In [None]:
# Show plots inline, and load main getdist plot module and samples class
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

# import libraries:
import sys, os
os.environ['TF_USE_LEGACY_KERAS'] = '1'  # needed for tensorflow KERAS compatibility
os.environ['DISPLAY'] = 'inline'  # hack to get getdist working
sys.path.insert(0,os.path.realpath(os.path.join(os.getcwd(),'../..')))
from getdist import plots, MCSamples
from getdist.gaussian_mixtures import GaussianND
import getdist
getdist.chains.print_load_details = False
import scipy
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# tensorflow imports:
import tensorflow as tf
import tensorflow_probability as tfp

# import the tensiometer tools that we need:
import tensiometer
from tensiometer import utilities
from tensiometer.synthetic_probability import synthetic_probability as sp

# getdist settings to ensure consistency of plots:
getdist_settings = {'ignore_rows': 0.0, 
                    'smooth_scale_2D': 0.3,
                    'smooth_scale_1D': 0.3,
                    }    

We start by building a random Gaussian mixture that we are going to use for tests:

In [None]:
# define the parameters of the problem:
dim = 6
num_gaussians = 3
num_samples = 10000

# we seed the random number generator to get reproducible results:
seed = 100
np.random.seed(seed)
# we define the range for the means and covariances:
mean_range = (-0.5, 0.5)
cov_scale = 0.4**2
# means and covs:
means = np.random.uniform(mean_range[0], mean_range[1], num_gaussians*dim).reshape(num_gaussians, dim)
weights = np.random.rand(num_gaussians)
weights = weights / np.sum(weights)
covs = [cov_scale*utilities.vector_to_PDM(np.random.rand(int(dim*(dim+1)/2))) for _ in range(num_gaussians)]

# cast to required precision:
means = means.astype(np.float32)
weights = weights.astype(np.float32)
covs = [cov.astype(np.float32) for cov in covs]

# initialize distribution:
distribution = tfp.distributions.Mixture(
    cat=tfp.distributions.Categorical(probs=weights),
    components=[
        tfp.distributions.MultivariateNormalTriL(loc=_m, scale_tril=tf.linalg.cholesky(_c))
        for _m, _c in zip(means, covs)
    ], name='Mixture')

# sample the distribution:
samples = distribution.sample(num_samples).numpy()
# calculate log posteriors:
logP = distribution.log_prob(samples).numpy()

# create MCSamples from the samples:
chain = MCSamples(samples=samples, 
                    settings=getdist_settings,
                    loglikes=-logP,
                    name_tag='Mixture',
                    )

# we make a sanity check plot:
g = plots.get_subplot_plotter()
g.triangle_plot(chain, filled=True)
    

## Base example: 

We train a normalizing flow on samples of a given distribution.

We initialize and train the normalizing flow on samples of the distribution we have just defined:

In [None]:
kwargs = {
          'feedback': 2,
          'plot_every': 1000,
          'pop_size': 1,
          #'cache_dir': 'test',  # set this to a directory to cache the results
          #'root_name': 'test',  # sets the name of the flow for the cache files
        }

flow = sp.flow_from_chain(chain,  # parameter difference chain
                          **kwargs)

In [None]:
# we can plot training summaries to make sure training went smoothly:
flow.training_plot()

In [None]:
# and we can print the training summary:
flow.print_training_summary()

In [None]:
# we can triangle plot the flow to see how well it has learned the target distribution:
g = plots.get_subplot_plotter()
g.triangle_plot([chain, flow.MCSamples(20000)], 
                params=flow.param_names,
                filled=True)

In [None]:
# this looks nice but not perfect, let's train for longer:
flow.feedback = 1
flow.train(epochs=300, verbose=-1);  # verbose = -1 uses tqdm progress bar

In [None]:
# we can plot training summaries to make sure training went smoothly:
flow.training_plot()

If you train for long enough you should start seeing the learning rate adapting to the non-improving (noisy)  loss function.

This means that the flow is learning finer and finer features and a good indication that training is converging. 
If you push it further, at some point, the flow will start overfitting and training will stop.

Now let's look at how the marginal distributions look like:

In [None]:
# we can triangle plot the flow to see how well it has learned the target distribution:
g = plots.get_subplot_plotter()
g.triangle_plot([chain, 
                 flow.MCSamples(20000)  # this flow method returns a MCSamples object
                 ], 
                params=flow.param_names,
                filled=True)

This is now much better!

We can use the trained flow to perform several operations. For example let's compute log-likelihoods

In [None]:
samples = flow.MCSamples(20000)
logP = flow.log_probability(flow.cast(samples.samples)).numpy()
samples.addDerived(logP, name='logP', label='\\log P')
samples.updateBaseStatistics();

# now let's plot everything:
g = plots.get_subplot_plotter()
g.triangle_plot([samples, chain], 
                plot_3d_with_param='logP',
                filled=False)

We can appreciate here a beautiful display of a projection effect. The marginal distribution of $p_5$ is peaked at a positive value while the logP plot clearly shows that the peak of the full distribution is the negative one.

If you are interested in understanding systematically these types of effect, check the corresponding tensiometer tutorial!

## Average flow example:

A more advanced flow model consists in training several flows and using a weighted mixture normalizing flow model.

This flow model improves the variance of the flow in regions that are scarse with samples (as different flow models will allucinate differently)...

Let's try averaging 5 flow models (note that we could do this in parallel with MPI on bigger machines):

In [None]:
kwargs = {
          'feedback': 1,
          'verbose': -1,
          'plot_every': 1000,
          'pop_size': 1,
          'num_flows': 5,
          'epochs': 400,
        }

average_flow = sp.average_flow_from_chain(chain,  # parameter difference chain
                                                                         **kwargs)

In [None]:
# most methods are implemented for the average flow as well:
average_flow.training_plot()

In [None]:
# and we can print the training summary, which in this case contains more info:
average_flow.print_training_summary()

In [None]:
avg_samples = average_flow.MCSamples(20000)
avg_samples.name_tag = 'Average Flow'
temp_samples = [_f.MCSamples(20000) for _f in average_flow.flows]
for i, _s in enumerate(temp_samples):
    _s.name_tag = _s.name_tag + f'_{i}'
# let's plot the flows:
g = plots.get_subplot_plotter()
g.triangle_plot([chain, avg_samples] + temp_samples,
                filled=False)

In [None]:
logP = average_flow.log_probability(average_flow.cast(avg_samples.samples)).numpy()
avg_samples.addDerived(logP, name='logP', label='\\log P')
avg_samples.updateBaseStatistics();

# now let's plot everything:
g = plots.get_subplot_plotter()
g.triangle_plot([avg_samples, chain], 
                plot_3d_with_param='logP',
                filled=False)

## Real world application: joint parameter estimation

In this example we perform a flow-based analysis of a joint posterior.

The idea is that we have posteriors samples from two independent experiments, we learn the two posteriors and then we combine them to form the joint posterior.

Note that we are assuming - as it is true in this example - that the prior is the same among the two experiments and flat (so that we are not duplicating the prior).

This procedure was used, for example, in [Gatti, Campailla et al (2024), arXiv:2405.10881](https://arxiv.org/abs/2405.10881).


In [None]:
# we start by loading up the posteriors:

# load the samples (remove no burn in since the example chains have already been cleaned):
chains_dir = os.path.realpath(os.path.join(os.getcwd(), '../..', 'test_chains'))
# the Planck 2018 TTTEEE chain:
chain_1 = getdist.mcsamples.loadMCSamples(file_root=os.path.join(chains_dir, 'Planck18TTTEEE'), no_cache=True, settings=getdist_settings)
# the DES Y1 3x2 chain:
chain_2 = getdist.mcsamples.loadMCSamples(file_root=os.path.join(chains_dir, 'DES'), no_cache=True, settings=getdist_settings)
# the joint chain:
chain_12 = getdist.mcsamples.loadMCSamples(file_root=os.path.join(chains_dir, 'Planck18TTTEEE_DES'), no_cache=True, settings=getdist_settings)

# let's add omegab as a derived parameter:
for _ch in [chain_1, chain_2, chain_12]:
    _p = _ch.getParams()
    _h = _p.H0 / 100.
    _ch.addDerived(_p.omegabh2 / _h**2, name='omegab', label='\\Omega_b')
    _ch.updateBaseStatistics()

# we define the parameters of the problem:
param_names = ['H0', 'omegam', 'sigma8', 'ns', 'omegab']

# and then do a sanity check plot:
g = plots.get_subplot_plotter()
g.triangle_plot([chain_1, chain_2, chain_12], params=param_names, filled=True)

In [None]:
# we then train the flows on the base parameters that we want to combine (note that for this exercise we should include all shared parameters):
kwargs = {
          'feedback': 1,
          'verbose': -1,
          'plot_every': 1000,
          'pop_size': 1,
          'num_flows': 3,
          'epochs': 400,
        }

# actual flow training:
flow_1 = sp.average_flow_from_chain(chain_1, param_names=param_names, **kwargs)
flow_2 = sp.average_flow_from_chain(chain_2, param_names=param_names, **kwargs)
flow_12 = sp.average_flow_from_chain(chain_12, param_names=param_names, **kwargs)

# plot to make sure training went well:
flow_1.training_plot()
flow_2.training_plot()
flow_12.training_plot()

In [None]:
# sanity check triangle plot:
g = plots.get_subplot_plotter()
g.triangle_plot([chain_1, flow_1.MCSamples(20000, settings=getdist_settings),
                 chain_2, flow_2.MCSamples(20000, settings=getdist_settings),
                 chain_12, flow_12.MCSamples(20000, settings=getdist_settings),
                 ], 
                params=param_names,
                filled=False)
# we log scale the y axis for the logP plot so that we can appreciate the accuracy of the flow on the tails:
for i in range(len(param_names)):
    _ax = g.subplots[i, i]
    _ax.set_yscale('log')
    _ax.set_ylim([1.e-5, 1.0])
    _ax.set_ylabel('$\\log P$')
    _ax.tick_params(axis='y', which='both', labelright='on')
    _ax.yaxis.set_label_position("right")    

In [None]:
# now we can define the joint posterior:
def joint_log_posterior(H0, omegam, sigma8, ns, omegab):
    params = [H0, omegam, sigma8, ns, omegab]
    return [flow_1.log_probability(flow_1.cast([params])).numpy()[0] + flow_2.log_probability(flow_2.cast([params])).numpy()[0]]

# and sample it:
from cobaya.run import run
from getdist.mcsamples import MCSamplesFromCobaya

parameters = {}
for key in param_names:
    parameters[key] = {"prior": {"min": 1.01*max(flow_1.parameter_ranges[key][0], flow_2.parameter_ranges[key][0]),
                                 "max": 0.99*min(flow_1.parameter_ranges[key][1], flow_2.parameter_ranges[key][1])},
                       "latex": flow_1.param_labels[flow_1.param_names.index(key)]}
info = {
    "likelihood": {"joint_log_posterior": joint_log_posterior},
    "params": parameters,
    }

In [None]:
# MCMC sample:

# we need a \sim good initial proposal and starting point, we get them from one of the flows:
flow_1_samples = flow_1.sample(10000)
flow_1_logPs = flow_1.log_probability(flow_1_samples).numpy()
flow_1_maxP_sample = flow_1_samples[np.argmax(flow_1_logPs)].numpy()

# we need a good starting point otherwise this will take long...
for _i, _k in enumerate(parameters.keys()):
    info['params'][_k]['ref'] = flow_1_maxP_sample[_i]

info["sampler"] = {"mcmc": 
                {'covmat': np.cov(flow_1_samples.numpy().T),
                 'covmat_params': param_names,
                 'max_tries': np.inf,
                 'Rminus1_stop': 0.01,
                 'learn_proposal_Rminus1_max': 30.,
                 'learn_proposal_Rminus1_max_early': 30.,
                 'measure_speeds': False,
                 'Rminus1_single_split': 10,
                 }}
info['debug'] = 100  # note this is an insane hack to disable very verbose output...
updated_info, sampler = run(info)
joint_chain = MCSamplesFromCobaya(updated_info, sampler.products()["sample"], ignore_rows=0.3, settings=getdist_settings)

In [None]:
## Nested sampling sample:
#_dim = len(flow_1.param_names)
#
#info["sampler"] = {"polychord": {'nlive': 50*_dim,
#                                 'measure_speeds': False,
#                                 'num_repeats': 2*_dim,
#                                 'nprior': 10*25*_dim,
#                                 'do_clustering': True,
#                                 'precision_criterion': 0.01,
#                                 'boost_posterior': 10, 
#                                 'feedback': 0,
#                                 },
#                    }
#info['debug'] = 100  # note this is an insane hack to disable very verbose output...
#updated_info, sampler = run(info)
#joint_chain = MCSamplesFromCobaya(updated_info, sampler.products()["sample"], settings=getdist_settings)

In [None]:
joint_chain.name_tag = 'Flow Joint'
chain_12.name_tag = 'Real Joint (Planck + DES)'

# sanity check triangle plot:
g = plots.get_subplot_plotter()
g.triangle_plot([joint_chain, chain_12], 
                params=param_names,
                filled=False)


As we can see this works fairly well, given that the two experiments are in some tension - do not overlap significantly.

Make sure you check for the consistency of the experiments you are combining before doing so, to ensure that the joint flow posterior samples a well-trained part of the flows.

You can check the example notebook in this documentation for how to compute tensions between two experiments.

## Advanced Topic: accurate likelihood values

For some applications we need to push the local accuracy of the flow model. In this case we need to provide exact probability values (up to normalization constant) for the training set.

These are then used to build a part of the loss function that rewards local accuracy of probability values. This second part of the loss function is the estimated evidence error.
By default the code adaptively mixes the two loss functions to find an optimal solution.

As a downside we can only train a flow that preserves all the parameters of the distribution, i.e. we cannot train on marginalized parameters (as we have done in the previous examples).

For more details see

In [None]:
ev, eer = flow.evidence()
print(f'log(Z) = {ev} +- {eer}')

We can see that the value is close to what it should be (zero since the original distribution is normalized) but the estimated error is still fairly high.

Since we have (normalized) log P values we can check the local reliability of the normalizing flow:

In [None]:
validation_flow_log10_P = flow.log_probability(flow.cast(chain.samples[flow.test_idx, :])).numpy()/np.log(10.)
validation_samples_log10_P = -chain.loglikes[flow.test_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist
training_flow_log10_P = flow.log_probability(flow.cast(chain.samples[flow.training_idx, :])).numpy()/np.log(10.)
training_samples_log10_P = -chain.loglikes[flow.training_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist

# do the plot:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.scatter(training_samples_log10_P - np.amax(training_samples_log10_P), training_flow_log10_P - training_samples_log10_P, s=0.1, label='training')
ax1.scatter(validation_samples_log10_P - np.amax(validation_samples_log10_P), validation_flow_log10_P - validation_samples_log10_P, s=0.5, label='validation')
ax1.legend()
ax1.axhline(0, color='k', linestyle='--')
ax1.set_xlabel('$\log_{10}(P_{\\rm true}/P_{\\rm max})$')
ax1.set_ylabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax1.set_ylim(-1.0, 1.0)

ax2.hist(training_flow_log10_P - training_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='training')
ax2.hist(validation_flow_log10_P - validation_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='validation')
ax2.legend()
ax2.axvline(0, color='k', linestyle='--')
ax2.set_xlabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax2.set_xlim([-1.0, 1.0])

plt.tight_layout()
plt.show()

We can clearly see that the local accuracy of the flow in full dimension is not high. As we move to the tails we easily have large errors. The variance of this plot is the estimated error on the evidence, which is rather large and dominated by the outliers in the tails.

Considering average flows usually improves the situation, in particular on the validation sample.

In [None]:
ev, eer = average_flow.evidence()
print(f'log(Z) = {ev} +- {eer}')

In [None]:
validation_flow_log10_P = average_flow.log_probability(average_flow.cast(chain.samples[average_flow.test_idx, :])).numpy()/np.log(10.)
validation_samples_log10_P = -chain.loglikes[average_flow.test_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist
training_flow_log10_P = average_flow.log_probability(average_flow.cast(chain.samples[average_flow.training_idx, :])).numpy()/np.log(10.)
training_samples_log10_P = -chain.loglikes[average_flow.training_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist

# do the plot:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.scatter(training_samples_log10_P - np.amax(training_samples_log10_P), training_flow_log10_P - training_samples_log10_P, s=0.1, label='training')
ax1.scatter(validation_samples_log10_P - np.amax(validation_samples_log10_P), validation_flow_log10_P - validation_samples_log10_P, s=0.5, label='validation')
ax1.legend()
ax1.axhline(0, color='k', linestyle='--')
ax1.set_xlabel('$\log_{10}(P_{\\rm true}/P_{\\rm max})$')
ax1.set_ylabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax1.set_ylim(-1.0, 1.0)

ax2.hist(training_flow_log10_P - training_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='training')
ax2.hist(validation_flow_log10_P - validation_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='validation')
ax2.legend()
ax2.axvline(0, color='k', linestyle='--')
ax2.set_xlabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax2.set_xlim([-1.0, 1.0])

plt.tight_layout()
plt.show()

This looks significantly better, and in fact the error on the evidence estimate is lower...

If we want to do better we need to train with evidence error loss, as discussed in the reference paper for this example notebook.

In [None]:
kwargs = {
          'feedback': 1,
          'verbose': -1,
          'plot_every': 1000,
          'pop_size': 1,
          'num_flows': 1,
          'epochs': 400,
          'loss_mode': 'softadapt',
        }

average_flow_2 = sp.average_flow_from_chain(chain,  # parameter difference chain
                                            **kwargs)

In [None]:
average_flow_2.training_plot()

As we can see the training plots are substantially more complicated as we are monitoring several additional quantities.

In [None]:
ev, eer = average_flow_2.evidence()
print(f'log(Z) = {ev} +- {eer}')

In [None]:
validation_flow_log10_P = average_flow_2.log_probability(average_flow_2.cast(chain.samples[average_flow_2.test_idx, :])).numpy()/np.log(10.)
validation_samples_log10_P = -chain.loglikes[average_flow_2.test_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist
training_flow_log10_P = average_flow_2.log_probability(average_flow_2.cast(chain.samples[average_flow_2.training_idx, :])).numpy()/np.log(10.)
training_samples_log10_P = -chain.loglikes[average_flow_2.training_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist

# do the plot:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.scatter(training_samples_log10_P - np.amax(training_samples_log10_P), training_flow_log10_P - training_samples_log10_P, s=0.1, label='training')
ax1.scatter(validation_samples_log10_P - np.amax(validation_samples_log10_P), validation_flow_log10_P - validation_samples_log10_P, s=0.5, label='validation')
ax1.legend()
ax1.axhline(0, color='k', linestyle='--')
ax1.set_xlabel('$\log_{10}(P_{\\rm true}/P_{\\rm max})$')
ax1.set_ylabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax1.set_ylim(-1.0, 1.0)

ax2.hist(training_flow_log10_P - training_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='training')
ax2.hist(validation_flow_log10_P - validation_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='validation')
ax2.legend()
ax2.axvline(0, color='k', linestyle='--')
ax2.set_xlabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax2.set_xlim([-1.0, 1.0])

plt.tight_layout()
plt.show()

As we can see this achieves performances that are very close to averaging flows. Combining the two strategies achieves the best performances (but is slower to train).

## Advanced Topic: Spline Flows

When more flexibility in the normalizing flow model is needed we provide an implementation of neural spline flows as discussed in [Durkan et al (2019), arXiv:1906.04032](https://arxiv.org/abs/1906.04032).

In [None]:
kwargs = {
          # flow settings:
          'pop_size': 1,
          'num_flows': 1,
          'epochs': 400,
          'transformation_type': 'spline',
          'autoregressive_type': 'masked',
          # feedback flags:
          'feedback': 1,
          'verbose': -1,
          'plot_every': 1000,
        }

spline_flow = sp.flow_from_chain(chain,  # parameter difference chain
                                 **kwargs)

In [None]:
# we can plot training summaries to make sure training went smoothly:
spline_flow.training_plot()

In [None]:
# we can triangle plot the flow to see how well it has learned the target distribution:
g = plots.get_subplot_plotter()
g.triangle_plot([chain, 
                 spline_flow.MCSamples(20000)  # this flow method returns a MCSamples object
                 ], 
                params=flow.param_names,
                filled=True)

In [None]:
samples = spline_flow.MCSamples(20000)
logP = spline_flow.log_probability(spline_flow.cast(samples.samples)).numpy()
samples.addDerived(logP, name='logP', label='\\log P')
samples.updateBaseStatistics();

# now let's plot everything:
g = plots.get_subplot_plotter()
g.triangle_plot([samples, chain], 
                plot_3d_with_param='logP',
                filled=False)

In [None]:
validation_flow_log10_P = spline_flow.log_probability(spline_flow.cast(chain.samples[spline_flow.test_idx, :])).numpy()/np.log(10.)
validation_samples_log10_P = -chain.loglikes[spline_flow.test_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist
training_flow_log10_P = spline_flow.log_probability(spline_flow.cast(chain.samples[spline_flow.training_idx, :])).numpy()/np.log(10.)
training_samples_log10_P = -chain.loglikes[spline_flow.training_idx]/np.log(10.)  # notice the minus sign due to the definition of logP in getdist

ev, eer = spline_flow.evidence()
print(f'log(Z) = {ev} +- {eer}')

# do the plot:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.scatter(training_samples_log10_P - np.amax(training_samples_log10_P), training_flow_log10_P - training_samples_log10_P, s=0.1, label='training')
ax1.scatter(validation_samples_log10_P - np.amax(validation_samples_log10_P), validation_flow_log10_P - validation_samples_log10_P, s=0.5, label='validation')
ax1.legend()
ax1.axhline(0, color='k', linestyle='--')
ax1.set_xlabel('$\log_{10}(P_{\\rm true}/P_{\\rm max})$')
ax1.set_ylabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax1.set_ylim(-1.0, 1.0)

ax2.hist(training_flow_log10_P - training_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='training')
ax2.hist(validation_flow_log10_P - validation_samples_log10_P, bins=50, range=[-1., 1.], density=True, alpha=0.5, label='validation')
ax2.legend()
ax2.axvline(0, color='k', linestyle='--')
ax2.set_xlabel('$\log_{10}(P_{\\rm flow}) - \log_{10}(P_{\\rm true})$')
ax2.set_xlim([-1.0, 1.0])

plt.tight_layout()
plt.show()

We can check what happens across the bijector layers:

In [None]:
from tensiometer.synthetic_probability import flow_utilities as flow_utils

training_samples_spaces, validation_samples_spaces = \
    flow_utils.get_samples_bijectors(spline_flow, 
                                     feedback=True)
    
for i, _s in enumerate(training_samples_spaces):
    print('*  ', _s.name_tag)
    g = plots.get_subplot_plotter()
    g.triangle_plot([
                    training_samples_spaces[i],
                    validation_samples_spaces[i]], 
                    filled=True,
                    )
    plt.show()