From 2d42d5526f1a170184dbfc15acb3e2cd5ed70447 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Thu, 21 Sep 2023 20:02:35 -0400 Subject: [PATCH 1/6] checkpointing --- docsrc/internal_api.rst | 4 +- docsrc/users-guide/examples.rst | 1 + .../users-guide/examples/MCMC Sampling.ipynb | 4 +- docsrc/users-guide/examples/Pathfinder.ipynb | 91 +++++++++++++++++++ .../examples/VI as Sampler Inits.ipynb | 4 +- .../examples/Variational Inference.ipynb | 39 ++++---- 6 files changed, 117 insertions(+), 26 deletions(-) create mode 100644 docsrc/users-guide/examples/Pathfinder.ipynb diff --git a/docsrc/internal_api.rst b/docsrc/internal_api.rst index c8a4d59e..dffbfca8 100644 --- a/docsrc/internal_api.rst +++ b/docsrc/internal_api.rst @@ -15,7 +15,7 @@ Classes InferenceMetadata ================= -.. autoclass:: cmdstanpy.InferenceMetadata +.. autoclass:: cmdstanpy.stanfit.InferenceMetadata :members: RunSet @@ -27,7 +27,7 @@ RunSet CompilerOptions =============== -.. autoclass:: cmdstanpy.compiler_opts.CompilerOptions +.. autoclass:: cmdstanpy.compilation.CompilerOptions :members: CmdStanArgs diff --git a/docsrc/users-guide/examples.rst b/docsrc/users-guide/examples.rst index 9c9dbe32..38ff37e7 100644 --- a/docsrc/users-guide/examples.rst +++ b/docsrc/users-guide/examples.rst @@ -6,6 +6,7 @@ __________________ examples/MCMC Sampling.ipynb examples/Maximum Likelihood Estimation.ipynb + examples/Pathfinder.ipynb examples/Variational Inference.ipynb examples/VI as Sampler Inits.ipynb examples/Run Generated Quantities.ipynb diff --git a/docsrc/users-guide/examples/MCMC Sampling.ipynb b/docsrc/users-guide/examples/MCMC Sampling.ipynb index 2f027b10..b948457e 100644 --- a/docsrc/users-guide/examples/MCMC Sampling.ipynb +++ b/docsrc/users-guide/examples/MCMC Sampling.ipynb @@ -1975,7 +1975,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1989,7 +1989,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/docsrc/users-guide/examples/Pathfinder.ipynb b/docsrc/users-guide/examples/Pathfinder.ipynb new file mode 100644 index 00000000..40ea005c --- /dev/null +++ b/docsrc/users-guide/examples/Pathfinder.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Variational Inference using Pathfinder\n", + "\n", + "Stan supports the Pathfinder algorithm\n", + "([Zhang, 2022](https://jmlr.org/papers/v23/21-0889.html)).\n", + "Pathfinder is a variational method for approximately\n", + "sampling from differentiable log densities. Starting from a random\n", + "initialization, Pathfinder locates normal approximations to the target\n", + "density along a quasi-Newton optimization path, with local covariance\n", + "estimated using the negative inverse Hessian estimates produced by the\n", + "LBFGS optimizer. Pathfinder returns draws from the Gaussian approximation\n", + "with the lowest estimated Kullback-Leibler (KL) divergence to the true\n", + "posterior.\n", + "\n", + "There are two Stan implementations of the Pathfinder algorithm:\n", + "single-path Pathfinder and multi-path Pathfinder.\n", + "Single-path Pathfinder generates a set of approximate draws from one run of the basic Pathfinder algorithm.\n", + "Multi-path Pathfinder uses importance resampling over the draws from multiple runs of Pathfinder.\n", + "This better matches non-normal target densities and also mitigates\n", + "the problem of L-BFGS getting stuck at local optima or in saddle points on plateaus." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example: variational inference with Pathfinder for model ``bernoulli.stan``\n", + "\n", + "The [CmdStanModel pathfinder](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanModel.pathfinder ) method\n", + "wraps the CmdStan [pathfinder ](https://mc-stan.org/docs/cmdstan-guide/pathfinder-config.html) method.\n", + "\n", + "By default, CmdStanPy runs multi-path Pathfinder which returns an importance-resampled set of draws over the outputs of 4 independent single-path Pathfinders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from cmdstanpy.model import CmdStanModel\n", + "from cmdstanpy.utils import cmdstan_path\n", + "\n", + "bernoulli_dir = os.path.join(cmdstan_path(), 'examples', 'bernoulli')\n", + "stan_file = os.path.join(bernoulli_dir, 'bernoulli.stan')\n", + "data_file = os.path.join(bernoulli_dir, 'bernoulli.data.json')\n", + "# instantiate, compile bernoulli model\n", + "model = CmdStanModel(stan_file=stan_file)\n", + "# run CmdStan's pathfinder method, returns object `CmdStanPathfinder`\n", + "pathfinder = model.pathfinder(data=data_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pathfinder)\n", + "print(pathfinder.metadata)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb index ccb6a792..d77ca78e 100644 --- a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb +++ b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb @@ -162,7 +162,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -176,7 +176,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/docsrc/users-guide/examples/Variational Inference.ipynb b/docsrc/users-guide/examples/Variational Inference.ipynb index cb4434a0..64304034 100644 --- a/docsrc/users-guide/examples/Variational Inference.ipynb +++ b/docsrc/users-guide/examples/Variational Inference.ipynb @@ -4,18 +4,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Variational Inference in Stan\n", + "# Variational Inference using ADVI\n", "\n", - "Variational inference is a scalable technique for approximate Bayesian inference.\n", - "Stan implements an automatic variational inference algorithm,\n", - "called Automatic Differentiation Variational Inference (ADVI)\n", - "which searches over a family of simple densities to find the best\n", + "Variational inference is a method for approximating complex Bayesian posterior distributions using simpler, parameterized distributions.\n", + "The Automatic Differentiation Variational Inference (ADVI) algorithm\n", + "searches over a family of simple densities to find the best\n", "approximate posterior density.\n", "ADVI produces an estimate of the parameter means together with a sample\n", "from the approximate posterior density.\n", "\n", - "ADVI approximates the variational objective function, the evidence lower bound or ELBO,\n", - "using stochastic gradient ascent.\n", + "ADVI uses stochastic gradient ascent to approximate the variational objective function, the evidence lower bound or ELBO.\n", "The algorithm ascends these gradients using an adaptive stepsize sequence\n", "that has one parameter ``eta`` which is adjusted during warmup.\n", "The number of draws used to approximate the ELBO is denoted by ``elbo_samples``. \n", @@ -31,10 +29,8 @@ "source": [ "### Example: variational inference for model ``bernoulli.stan``\n", "\n", - "In CmdStanPy, the `CmdStanModel` class method `variational` invokes CmdStan with\n", - "`method=variational` and returns an estimate of the approximate posterior\n", - "mean of all model parameters as well as a set of draws from this approximate\n", - "posterior." + "The [CmdStanModel variational](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanModel.variational) method\n", + "wraps the CmdStan [variational](https://mc-stan.org/docs/cmdstan-guide/variational-config.html) method." ] }, { @@ -60,12 +56,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The class [`CmdStanVB`](https://cmdstanpy.readthedocs.io/en/latest/api.html#stanvariational) provides the following properties to access information about the parameter names, estimated means, and the sample:\n", - " + `column_names`\n", - " + `variational_params_dict`\n", - " + `variational_params_np`\n", - " + `variational_params_pd`\n", - " + `variational_sample`" + "The class [`CmdStanVB`](https://mc-stan.org/cmdstanpy/api.html#cmdstanvb) provides the following properties to access information about the parameter names, estimated means, and the sample:\n", + " + `column_names` - list of column names\n", + " + `columns` - number of columns\n", + " + `eta` - step size scaling parameter\n", + " + `variational_params_dict` - inferred parameter means as a Dict.\n", + " + `variational_params_np` - inferred parameter means as a numpy.ndarray.\n", + " + `variational_params_pd` - inferred parameter means as a pandas.DataFrame.\n", + " + `variational_sample` - the set of approximate posterior output draws ad a numpy.ndarray.\n", + " + `variational_sample_pd` - the set of approximate posterior output draws ad a pandas.DataFrame." ] }, { @@ -156,13 +155,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "See the [api docs](https://cmdstanpy.readthedocs.io/en/latest/api.html), section [`CmdStanModel.variational`](https://cmdstanpy.readthedocs.io/en/latest/api.html#cmdstanpy.CmdStanModel.variational) for a full description of all arguments." + "See the [API documentation](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanModel.variational) for a full description of all arguments." ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -176,7 +175,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.16" } }, "nbformat": 4, From 9c535bd8b3edd6116c4186a4c6d05958ed741e87 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Fri, 22 Sep 2023 03:13:21 -0400 Subject: [PATCH 2/6] checkpointing --- docsrc/api.rst | 32 +++++++++---------- docsrc/internal_api.rst | 6 ++++ .../examples/Variational Inference.ipynb | 17 +++++----- docsrc/users-guide/workflow.rst | 32 +++++++++++++------ 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/docsrc/api.rst b/docsrc/api.rst index 4773f21a..b6412857 100644 --- a/docsrc/api.rst +++ b/docsrc/api.rst @@ -25,15 +25,15 @@ A CmdStanModel object encapsulates the Stan program. It manages program compilat :meth:`~CmdStanModel.sample` runs the HMC-NUTS sampler to produce a set of draws from the posterior distribution. -:meth:`~CmdStanModel.pathfinder` - runs the Pathfinder variational inference parameters to recieve approximate draws from the posterior. - :meth:`~CmdStanModel.optimize` produce a penalized maximum likelihood estimate or maximum a posteriori estimate (point estimate) of the model parameters. :meth:`~CmdStanModel.laplace_sample` draw from a Laplace approximatation centered at the posterior mode found by ``optimize``. +:meth:`~CmdStanModel.pathfinder` + runs the Pathfinder variational inference parameters to recieve approximate draws from the posterior. + :meth:`~CmdStanModel.variational` run CmdStan’s automatic differentiation variational inference (ADVI) algorithm to approximate the posterior distribution. @@ -43,30 +43,28 @@ A CmdStanModel object encapsulates the Stan program. It manages program compilat .. autoclass:: cmdstanpy.CmdStanModel :members: - CmdStanMCMC =========== .. autoclass:: cmdstanpy.CmdStanMCMC :members: - -CmdStanPathfinder -================= - -.. autoclass:: cmdstanpy.CmdStanPathfinder - :members: - CmdStanMLE ========== .. autoclass:: cmdstanpy.CmdStanMLE :members: -CmdStanGQ -========= +CmdStanLaplace +============== -.. autoclass:: cmdstanpy.CmdStanGQ +.. autoclass:: cmdstanpy.CmdStanLaplace + :members: + +CmdStanPathfinder +================= + +.. autoclass:: cmdstanpy.CmdStanPathfinder :members: CmdStanVB @@ -75,10 +73,10 @@ CmdStanVB .. autoclass:: cmdstanpy.CmdStanVB :members: -CmdStanLaplace -============== +CmdStanGQ +========= -.. autoclass:: cmdstanpy.CmdStanLaplace +.. autoclass:: cmdstanpy.CmdStanGQ :members: ********* diff --git a/docsrc/internal_api.rst b/docsrc/internal_api.rst index dffbfca8..9747f9b9 100644 --- a/docsrc/internal_api.rst +++ b/docsrc/internal_api.rst @@ -48,6 +48,12 @@ OptimizeArgs .. autoclass:: cmdstanpy.cmdstan_args.OptimizeArgs :members: +PathfinderArgs +============== + +.. autoclass:: cmdstanpy.cmdstan_args.PathfinderArgs + :members: + VariationalArgs =============== diff --git a/docsrc/users-guide/examples/Variational Inference.ipynb b/docsrc/users-guide/examples/Variational Inference.ipynb index 64304034..18bffac6 100644 --- a/docsrc/users-guide/examples/Variational Inference.ipynb +++ b/docsrc/users-guide/examples/Variational Inference.ipynb @@ -57,14 +57,15 @@ "metadata": {}, "source": [ "The class [`CmdStanVB`](https://mc-stan.org/cmdstanpy/api.html#cmdstanvb) provides the following properties to access information about the parameter names, estimated means, and the sample:\n", - " + `column_names` - list of column names\n", - " + `columns` - number of columns\n", - " + `eta` - step size scaling parameter\n", - " + `variational_params_dict` - inferred parameter means as a Dict.\n", - " + `variational_params_np` - inferred parameter means as a numpy.ndarray.\n", - " + `variational_params_pd` - inferred parameter means as a pandas.DataFrame.\n", - " + `variational_sample` - the set of approximate posterior output draws ad a numpy.ndarray.\n", - " + `variational_sample_pd` - the set of approximate posterior output draws ad a pandas.DataFrame." + "\n", + " + `column_names` - list of column names\n", + " + `columns` - number of columns\n", + " + `eta` - step size scaling parameter\n", + " + `variational_params_dict` - inferred parameter means as a Dict.\n", + " + `variational_params_np` - inferred parameter means as a numpy.ndarray.\n", + " + `variational_params_pd` - inferred parameter means as a pandas.DataFrame.\n", + " + `variational_sample` - the set of approximate posterior output draws ad a numpy.ndarray.\n", + " + `variational_sample_pd` - the set of approximate posterior output draws ad a pandas.DataFrame." ] }, { diff --git a/docsrc/users-guide/workflow.rst b/docsrc/users-guide/workflow.rst index 2aa56d92..4171e7bd 100644 --- a/docsrc/users-guide/workflow.rst +++ b/docsrc/users-guide/workflow.rst @@ -134,22 +134,34 @@ An example of each is provided in the `next section `__. It returns a :class:`CmdStanMCMC` object which contains a sample from the posterior distribution of the model conditioned on the data. -* The :meth:`~CmdStanModel.variational` method runs Stan's - `Automatic Differentiation Variational Inference (ADVI) algorithm `__. - - It returns a :class:`CmdStanVB` object which contains - an approximation the posterior distribution in the unconstrained variable space. - -* The :meth:`~CmdStanModel.optimize` runs one of +* The :meth:`~CmdStanModel.optimize` method runs one of `Stan's optimization algorithms `__. to find a mode of the density specified by the Stan program. It returns a :class:`CmdStanMLE` object. +* The :meth:`~CmdStanModel.laplace` method runs Stan's + `Laplace approximation algorithm `__. + and returns a sample from a Laplace approximatation centered at the posterior mode found by optimization. + + It returns a :class:`CmdStanLaplace` object. + +* The :meth:`~CmdStanModel.pathfinder` method runs Stan's + `Pathfinder Variational Inference algorithm `__. + + It returns a :class:`CmdStanPathfinder` object which contains a sample from a Guassian approximation to the posterior. + +* The :meth:`~CmdStanModel.variational` method runs Stan's + `Automatic Differentiation Variational Inference (ADVI) algorithm `__. + + It returns a :class:`CmdStanVB` object which contains + an approximation to the posterior distribution in the unconstrained variable space. + * The :meth:`~CmdStanModel.generate_quantities` method runs Stan's `generate_quantities method `__. which generates additional quantities of interest from a mode. Its take an existing fit as input and - uses the parameter estimates in the fit to run the Stan program's `generated quantities block `__. + uses the parameter estimates in the fit to run the Stan program's + `generated quantities block `__. It returns a :class:`CmdStanGQ` object. @@ -170,8 +182,10 @@ Output data The resulting Stan CSV file or set of files are assembled into an inference result object. + :class:`CmdStanMCMC` object contains the :meth:`~CmdStanModel.sample` outputs -+ :class:`CmdStanVB` object contains the :meth:`~CmdStanModel.variational` outputs + :class:`CmdStanMLE` object contains the :meth:`~CmdStanModel.optimize` outputs ++ :class:`CmdStanLaplace` object contains the :meth:`~CmdStanModel.laplace` outputs ++ :class:`CmdStanPathfinder` object contains the :meth:`~CmdStanModel.pathfinder` outputs ++ :class:`CmdStanVB` object contains the :meth:`~CmdStanModel.variational` outputs + :class:`CmdStanGQ` object contains the :meth:`~CmdStanModel.generate_quantities` outputs From 2d0d0e39b58cecad6bee5087b3fdec6e4351f66d Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Fri, 22 Sep 2023 15:55:42 -0400 Subject: [PATCH 3/6] update users guide --- .../users-guide/examples/MCMC Sampling.ipynb | 1431 +---------------- docsrc/users-guide/examples/Pathfinder.ipynb | 61 + .../examples/VI as Sampler Inits.ipynb | 110 +- docsrc/users-guide/workflow.rst | 27 +- 4 files changed, 212 insertions(+), 1417 deletions(-) diff --git a/docsrc/users-guide/examples/MCMC Sampling.ipynb b/docsrc/users-guide/examples/MCMC Sampling.ipynb index b948457e..020be6a8 100644 --- a/docsrc/users-guide/examples/MCMC Sampling.ipynb +++ b/docsrc/users-guide/examples/MCMC Sampling.ipynb @@ -46,7 +46,7 @@ "\n", "- The [draws](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanMCMC.draws) method returns the sample as either a 2-D or 3-D numpy.ndarray.\n", "\n", - "- The [draws_pd](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanMCMC.draws) method returns the entire sample or selected variables as a pandas.DataFrame.\n", + "- The [draws_pd](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanMCMC.draws_pd) method returns the entire sample or selected variables as a pandas.DataFrame.\n", "\n", "- The [draws_xr](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanMCMC.draws_xr) method returns a structured Xarray dataset over the Stan model variables.\n", "\n", @@ -78,18 +78,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Enabling notebook extension jupyter-js-widgets/extension...\n", - " - Validating: \u001b[32mOK\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "!jupyter nbextension enable --py widgetsnbextension" ] @@ -122,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -144,94 +135,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:33 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "872ca7b6b43d4195830069900ff9b43a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7f94c4b7a03c4178be2d84e3c58378b3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a83b62b0bd704af4a0830ff8381ff00b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "dfc39d2bf5c74cfc8e6d9dcbc3cbd849", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:33 - cmdstanpy - INFO - CmdStan done processing.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "# run CmdStan's sample method, returns object `CmdStanMCMC`\n", "fit = model.sample(data='bernoulli.data.json')" @@ -249,53 +155,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/plain": [ - "CmdStanMCMC: model=bernoulli chains=4['method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']\n", - " csv_files:\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_1.csv\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_2.csv\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_3.csv\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_4.csv\n", - " output_files:\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_0-stdout.txt\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_1-stdout.txt\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_2-stdout.txt\n", - "\t/tmp/tmp1c58xoau/bernoullibcep5e9d/bernoulli-20230911105233_3-stdout.txt" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fit" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "draws as array: (1000, 4, 8)\n", - "draws as structured object:\n", - "\tdict_keys(['theta'])\n", - "sampler diagnostics:\n", - "\tdict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])\n" - ] - } - ], + "outputs": [], "source": [ "print(f'draws as array: {fit.draws().shape}')\n", "print(f'draws as structured object:\\n\\t{fit.stan_variables().keys()}')\n", @@ -319,94 +192,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:33 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cb7ff406992d40bfad0723d5336648d1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "18b6c2dd786949599cf4ae30728effcd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f90f9ca6848541898445f3e1f2df1716", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bb2f46fa30a441a082c366e7e10093eb", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:35 - cmdstanpy - INFO - CmdStan done processing.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "fit = model.sample(data='bernoulli.data.json', iter_warmup=100000, iter_sampling=100000, show_progress=True)\n" ] @@ -422,174 +210,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:37 - cmdstanpy - INFO - Chain [1] start processing\n", - "10:52:37 - cmdstanpy - INFO - Chain [1] done processing\n", - "10:52:37 - cmdstanpy - INFO - Chain [2] start processing\n", - "10:52:37 - cmdstanpy - INFO - Chain [2] done processing\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Chain [1] method = sample (Default)\n", - "Chain [1] sample\n", - "Chain [1] num_samples = 1000 (Default)\n", - "Chain [1] num_warmup = 1000 (Default)\n", - "Chain [1] save_warmup = 0 (Default)\n", - "Chain [1] thin = 1 (Default)\n", - "Chain [1] adapt\n", - "Chain [1] engaged = 1 (Default)\n", - "Chain [1] gamma = 0.050000000000000003 (Default)\n", - "Chain [1] delta = 0.80000000000000004 (Default)\n", - "Chain [1] kappa = 0.75 (Default)\n", - "Chain [1] t0 = 10 (Default)\n", - "Chain [1] init_buffer = 75 (Default)\n", - "Chain [1] term_buffer = 50 (Default)\n", - "Chain [1] window = 25 (Default)\n", - "Chain [1] algorithm = hmc (Default)\n", - "Chain [1] hmc\n", - "Chain [1] engine = nuts (Default)\n", - "Chain [1] nuts\n", - "Chain [1] max_depth = 10 (Default)\n", - "Chain [1] metric = diag_e (Default)\n", - "Chain [1] metric_file = (Default)\n", - "Chain [1] stepsize = 1 (Default)\n", - "Chain [1] stepsize_jitter = 0 (Default)\n", - "Chain [1] num_chains = 1 (Default)\n", - "Chain [1] id = 1 (Default)\n", - "Chain [1] data\n", - "Chain [1] file = bernoulli.data.json\n", - "Chain [1] init = 2 (Default)\n", - "Chain [1] random\n", - "Chain [1] seed = 22120\n", - "Chain [1] output\n", - "Chain [1] file = /tmp/tmp1c58xoau/bernoulli5_ykvxrn/bernoulli-20230911105237_1.csv\n", - "Chain [1] diagnostic_file = (Default)\n", - "Chain [1] refresh = 100 (Default)\n", - "Chain [1] sig_figs = -1 (Default)\n", - "Chain [1] profile_file = profile.csv (Default)\n", - "Chain [1] num_threads = 1 (Default)\n", - "Chain [1] \n", - "Chain [1] \n", - "Chain [1] Gradient evaluation took 3e-06 seconds\n", - "Chain [1] 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.\n", - "Chain [1] Adjust your expectations accordingly!\n", - "Chain [1] \n", - "Chain [1] \n", - "Chain [1] Iteration: 1 / 2000 [ 0%] (Warmup)\n", - "Chain [1] Iteration: 100 / 2000 [ 5%] (Warmup)\n", - "Chain [1] Iteration: 200 / 2000 [ 10%] (Warmup)\n", - "Chain [1] Iteration: 300 / 2000 [ 15%] (Warmup)\n", - "Chain [1] Iteration: 400 / 2000 [ 20%] (Warmup)\n", - "Chain [1] Iteration: 500 / 2000 [ 25%] (Warmup)\n", - "Chain [1] Iteration: 600 / 2000 [ 30%] (Warmup)\n", - "Chain [1] Iteration: 700 / 2000 [ 35%] (Warmup)\n", - "Chain [1] Iteration: 800 / 2000 [ 40%] (Warmup)\n", - "Chain [1] Iteration: 900 / 2000 [ 45%] (Warmup)\n", - "Chain [1] Iteration: 1000 / 2000 [ 50%] (Warmup)\n", - "Chain [1] Iteration: 1001 / 2000 [ 50%] (Sampling)\n", - "Chain [1] Iteration: 1100 / 2000 [ 55%] (Sampling)\n", - "Chain [1] Iteration: 1200 / 2000 [ 60%] (Sampling)\n", - "Chain [1] Iteration: 1300 / 2000 [ 65%] (Sampling)\n", - "Chain [1] Iteration: 1400 / 2000 [ 70%] (Sampling)\n", - "Chain [1] Iteration: 1500 / 2000 [ 75%] (Sampling)\n", - "Chain [1] Iteration: 1600 / 2000 [ 80%] (Sampling)\n", - "Chain [1] Iteration: 1700 / 2000 [ 85%] (Sampling)\n", - "Chain [1] Iteration: 1800 / 2000 [ 90%] (Sampling)\n", - "Chain [1] Iteration: 1900 / 2000 [ 95%] (Sampling)\n", - "Chain [1] Iteration: 2000 / 2000 [100%] (Sampling)\n", - "Chain [1] \n", - "Chain [1] Elapsed Time: 0.005 seconds (Warm-up)\n", - "Chain [1] 0.01 seconds (Sampling)\n", - "Chain [1] 0.015 seconds (Total)\n", - "Chain [1] \n", - "Chain [1] \n", - "Chain [2] method = sample (Default)\n", - "Chain [2] sample\n", - "Chain [2] num_samples = 1000 (Default)\n", - "Chain [2] num_warmup = 1000 (Default)\n", - "Chain [2] save_warmup = 0 (Default)\n", - "Chain [2] thin = 1 (Default)\n", - "Chain [2] adapt\n", - "Chain [2] engaged = 1 (Default)\n", - "Chain [2] gamma = 0.050000000000000003 (Default)\n", - "Chain [2] delta = 0.80000000000000004 (Default)\n", - "Chain [2] kappa = 0.75 (Default)\n", - "Chain [2] t0 = 10 (Default)\n", - "Chain [2] init_buffer = 75 (Default)\n", - "Chain [2] term_buffer = 50 (Default)\n", - "Chain [2] window = 25 (Default)\n", - "Chain [2] algorithm = hmc (Default)\n", - "Chain [2] hmc\n", - "Chain [2] engine = nuts (Default)\n", - "Chain [2] nuts\n", - "Chain [2] max_depth = 10 (Default)\n", - "Chain [2] metric = diag_e (Default)\n", - "Chain [2] metric_file = (Default)\n", - "Chain [2] stepsize = 1 (Default)\n", - "Chain [2] stepsize_jitter = 0 (Default)\n", - "Chain [2] num_chains = 1 (Default)\n", - "Chain [2] id = 2\n", - "Chain [2] data\n", - "Chain [2] file = bernoulli.data.json\n", - "Chain [2] init = 2 (Default)\n", - "Chain [2] random\n", - "Chain [2] seed = 22120\n", - "Chain [2] output\n", - "Chain [2] file = /tmp/tmp1c58xoau/bernoulli5_ykvxrn/bernoulli-20230911105237_2.csv\n", - "Chain [2] diagnostic_file = (Default)\n", - "Chain [2] refresh = 100 (Default)\n", - "Chain [2] sig_figs = -1 (Default)\n", - "Chain [2] profile_file = profile.csv (Default)\n", - "Chain [2] num_threads = 1 (Default)\n", - "Chain [2] \n", - "Chain [2] \n", - "Chain [2] Gradient evaluation took 3e-06 seconds\n", - "Chain [2] 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.\n", - "Chain [2] Adjust your expectations accordingly!\n", - "Chain [2] \n", - "Chain [2] \n", - "Chain [2] Iteration: 1 / 2000 [ 0%] (Warmup)\n", - "Chain [2] Iteration: 100 / 2000 [ 5%] (Warmup)\n", - "Chain [2] Iteration: 200 / 2000 [ 10%] (Warmup)\n", - "Chain [2] Iteration: 300 / 2000 [ 15%] (Warmup)\n", - "Chain [2] Iteration: 400 / 2000 [ 20%] (Warmup)\n", - "Chain [2] Iteration: 500 / 2000 [ 25%] (Warmup)\n", - "Chain [2] Iteration: 600 / 2000 [ 30%] (Warmup)\n", - "Chain [2] Iteration: 700 / 2000 [ 35%] (Warmup)\n", - "Chain [2] Iteration: 800 / 2000 [ 40%] (Warmup)\n", - "Chain [2] Iteration: 900 / 2000 [ 45%] (Warmup)\n", - "Chain [2] Iteration: 1000 / 2000 [ 50%] (Warmup)\n", - "Chain [2] Iteration: 1001 / 2000 [ 50%] (Sampling)\n", - "Chain [2] Iteration: 1100 / 2000 [ 55%] (Sampling)\n", - "Chain [2] Iteration: 1200 / 2000 [ 60%] (Sampling)\n", - "Chain [2] Iteration: 1300 / 2000 [ 65%] (Sampling)\n", - "Chain [2] Iteration: 1400 / 2000 [ 70%] (Sampling)\n", - "Chain [2] Iteration: 1500 / 2000 [ 75%] (Sampling)\n", - "Chain [2] Iteration: 1600 / 2000 [ 80%] (Sampling)\n", - "Chain [2] Iteration: 1700 / 2000 [ 85%] (Sampling)\n", - "Chain [2] Iteration: 1800 / 2000 [ 90%] (Sampling)\n", - "Chain [2] Iteration: 1900 / 2000 [ 95%] (Sampling)\n", - "Chain [2] Iteration: 2000 / 2000 [100%] (Sampling)\n", - "Chain [2] \n", - "Chain [2] Elapsed Time: 0.003 seconds (Warm-up)\n", - "Chain [2] 0.013 seconds (Sampling)\n", - "Chain [2] 0.016 seconds (Total)\n", - "Chain [2] \n", - "Chain [2] \n" - ] - } - ], + "outputs": [], "source": [ "fit = model.sample(data='bernoulli.data.json', chains=2, parallel_chains=1, show_console=True)\n", "\n" @@ -632,32 +257,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "data {\n", - " int J; // number of schools\n", - " array[J] real y; // estimated treatment effect (school j)\n", - " array[J] real sigma; // std err of effect estimate (school j)\n", - "}\n", - "parameters {\n", - " real mu;\n", - " array[J] real theta;\n", - " real tau;\n", - "}\n", - "model {\n", - " theta ~ normal(mu, tau);\n", - " y ~ normal(theta, sigma);\n", - "}\n", - "\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "with open('eight_schools.stan', 'r') as fd:\n", " print(fd.read())" @@ -672,23 +274,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"J\" : 8,\n", - " \"y\" : [28,8,-3,7,-1,1,18,12],\n", - " \"sigma\" : [15,10,16,11,9,11,10,18],\n", - " \"tau\" : 25\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "with open('eight_schools.data.json', 'r') as fd:\n", " print(fd.read())" @@ -712,102 +300,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:37 - cmdstanpy - INFO - compiling stan file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/eight_schools.stan to exe file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", - "10:52:57 - cmdstanpy - INFO - compiled model executable: /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", - "10:52:57 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "27bd6ca586c34d049c98baf587b0cd3a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e11c1c7d655b43d4843aa9737433031f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c85ce416cdfc440eb36850aa0d250985", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "698a83d7802543d78d383f1a2cc0eb69", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:57 - cmdstanpy - INFO - CmdStan done processing.\n", - "10:52:57 - cmdstanpy - WARNING - Some chains may have failed to converge.\n", - "\tChain 1 had 29 divergent transitions (2.9%)\n", - "\tChain 2 had 208 divergent transitions (20.8%)\n", - "\tChain 3 had 17 divergent transitions (1.7%)\n", - "\tChain 4 had 31 divergent transitions (3.1%)\n", - "\tUse the \"diagnose()\" method on the CmdStanMCMC object to see further information.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "eight_schools_model = CmdStanModel(stan_file='eight_schools.stan')\n", "eight_schools_fit = eight_schools_model.sample(data='eight_schools.data.json', seed=55157)" @@ -824,20 +319,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "divergences:\n", - "[ 29 208 17 31]\n", - "iterations at max_treedepth:\n", - "[0 0 0 0]\n" - ] - } - ], + "outputs": [], "source": [ "print(f'divergences:\\n{eight_schools_fit.divergences}\\niterations at max_treedepth:\\n{eight_schools_fit.max_treedepths}')" ] @@ -853,213 +337,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MeanMCSEStdDev5%50%95%N_EffN_Eff/sR_hat
lp__-17.804201.2976005.69607-26.306500-18.54890-8.1872319.26950100.362001.18588
mu7.980880.1961415.17030-0.8470438.4613516.43540694.855003619.040001.00830
theta[1]11.695300.3573248.65978-0.38425110.3276028.16590587.339003059.060001.00867
theta[2]7.766560.2013296.38375-2.5185407.2541918.443901005.400005236.470001.00249
theta[3]5.968520.2398678.27095-8.7562707.2148718.390701188.960006192.520001.01065
theta[4]7.716600.2015246.85139-3.6447108.2686618.807801155.860006020.090001.00593
theta[5]4.976210.4475976.65889-6.7018805.6381114.88990221.325001152.730001.03708
theta[6]5.880400.2129336.89437-6.3161406.8341416.413701048.340005460.110001.01177
theta[7]10.952500.2437376.901530.3849189.9328523.58810801.764004175.850001.00408
theta[8]8.473010.2180128.06921-4.2225108.1447522.197201369.940007135.110001.00222
tau7.015150.7728705.538961.0229305.7858217.3253051.36157267.508191.07508
\n", - "
" - ], - "text/plain": [ - " Mean MCSE StdDev 5% 50% 95% \\\n", - "lp__ -17.80420 1.297600 5.69607 -26.306500 -18.54890 -8.18723 \n", - "mu 7.98088 0.196141 5.17030 -0.847043 8.46135 16.43540 \n", - "theta[1] 11.69530 0.357324 8.65978 -0.384251 10.32760 28.16590 \n", - "theta[2] 7.76656 0.201329 6.38375 -2.518540 7.25419 18.44390 \n", - "theta[3] 5.96852 0.239867 8.27095 -8.756270 7.21487 18.39070 \n", - "theta[4] 7.71660 0.201524 6.85139 -3.644710 8.26866 18.80780 \n", - "theta[5] 4.97621 0.447597 6.65889 -6.701880 5.63811 14.88990 \n", - "theta[6] 5.88040 0.212933 6.89437 -6.316140 6.83414 16.41370 \n", - "theta[7] 10.95250 0.243737 6.90153 0.384918 9.93285 23.58810 \n", - "theta[8] 8.47301 0.218012 8.06921 -4.222510 8.14475 22.19720 \n", - "tau 7.01515 0.772870 5.53896 1.022930 5.78582 17.32530 \n", - "\n", - " N_Eff N_Eff/s R_hat \n", - "lp__ 19.26950 100.36200 1.18588 \n", - "mu 694.85500 3619.04000 1.00830 \n", - "theta[1] 587.33900 3059.06000 1.00867 \n", - "theta[2] 1005.40000 5236.47000 1.00249 \n", - "theta[3] 1188.96000 6192.52000 1.01065 \n", - "theta[4] 1155.86000 6020.09000 1.00593 \n", - "theta[5] 221.32500 1152.73000 1.03708 \n", - "theta[6] 1048.34000 5460.11000 1.01177 \n", - "theta[7] 801.76400 4175.85000 1.00408 \n", - "theta[8] 1369.94000 7135.11000 1.00222 \n", - "tau 51.36157 267.50819 1.07508 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eight_schools_fit.summary()" ] @@ -1075,40 +357,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing csv files: /tmp/tmp1c58xoau/eight_schoolsmty8utjy/eight_schools-20230911105257_1.csv, /tmp/tmp1c58xoau/eight_schoolsmty8utjy/eight_schools-20230911105257_2.csv, /tmp/tmp1c58xoau/eight_schoolsmty8utjy/eight_schools-20230911105257_3.csv, /tmp/tmp1c58xoau/eight_schoolsmty8utjy/eight_schools-20230911105257_4.csv\n", - "\n", - "Checking sampler transitions treedepth.\n", - "Treedepth satisfactory for all transitions.\n", - "\n", - "Checking sampler transitions for divergences.\n", - "285 of 4000 (7.12%) transitions ended with a divergence.\n", - "These divergent transitions indicate that HMC is not fully able to explore the posterior distribution.\n", - "Try increasing adapt delta closer to 1.\n", - "If this doesn't remove all divergences, try to reparameterize the model.\n", - "\n", - "Checking E-BFMI - sampler transitions HMC potential energy.\n", - "The E-BFMI, 0.28, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.\n", - "If possible, try to reparameterize the model.\n", - "\n", - "Effective sample size satisfactory.\n", - "\n", - "The following parameters had split R-hat greater than 1.05:\n", - " tau\n", - "Such high values indicate incomplete mixing and biased estimation.\n", - "You should consider regularizating your model with additional prior information or a more effective parameterization.\n", - "\n", - "Processing complete.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(eight_schools_fit.diagnose())" ] @@ -1122,94 +373,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:58 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cf0dd603beb54875a1c17e75e540fa4b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f2b3df4b82f94bb68893b6e55b1911c6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "920ded59dd8d423c9df5c75572a51778", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d55c88bc05574a489338e88f23bc2342", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:58 - cmdstanpy - INFO - CmdStan done processing.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "fit = model.sample(data='bernoulli.data.json')" ] @@ -1226,17 +392,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.458989 0.557688 0.423626 ... 0.343436 0.543817 0.235329]\n" - ] - } - ], + "outputs": [], "source": [ "print(fit.stan_variable('theta'))" ] @@ -1250,17 +408,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name: theta, shape: (4000,)\n" - ] - } - ], + "outputs": [], "source": [ "for k, v in fit.stan_variables().items():\n", " print(f'name: {k}, shape: {v.shape}')" @@ -1268,27 +418,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dimensions: (draw: 1000, chain: 4)\n", - "Coordinates:\n", - " * chain (chain) int64 1 2 3 4\n", - " * draw (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999\n", - "Data variables:\n", - " theta (chain, draw) float64 0.459 0.5577 0.4236 ... 0.3434 0.5438 0.2353\n", - "Attributes:\n", - " stan_version: 2.33.0\n", - " model: bernoulli_model\n", - " num_draws_sampling: 1000\n" - ] - } - ], + "outputs": [], "source": [ "print(fit.draws_xr('theta'))" ] @@ -1304,161 +436,18 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample as ndarray: (1000, 4, 8)\n", - "first 2 draws, chain 1:\n", - "[[-7.86503 0.766382 1.01693 1. 1. 0. 7.90921\n", - " 0.458989]\n", - " [-9.09353 0.641941 1.01693 1. 1. 0. 9.34714\n", - " 0.557688]]\n" - ] - } - ], + "outputs": [], "source": [ "print(f'sample as ndarray: {fit.draws().shape}\\nfirst 2 draws, chain 1:\\n{fit.draws()[:2, 0, :]}')" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chain__iter__draw__lp__accept_stat__stepsize__treedepth__n_leapfrog__divergent__energy__theta
01.01.01.0-7.865030.7663821.016931.01.00.07.909210.458989
11.02.02.0-9.093530.6419411.016931.01.00.09.347140.557688
21.03.03.0-7.535701.0000001.016931.01.00.08.661560.423626
31.04.04.0-7.529561.0000001.016931.01.00.07.792340.422905
41.05.05.0-6.876641.0000001.016932.03.00.07.451030.190439
\n", - "
" - ], - "text/plain": [ - " chain__ iter__ draw__ lp__ accept_stat__ stepsize__ treedepth__ \\\n", - "0 1.0 1.0 1.0 -7.86503 0.766382 1.01693 1.0 \n", - "1 1.0 2.0 2.0 -9.09353 0.641941 1.01693 1.0 \n", - "2 1.0 3.0 3.0 -7.53570 1.000000 1.01693 1.0 \n", - "3 1.0 4.0 4.0 -7.52956 1.000000 1.01693 1.0 \n", - "4 1.0 5.0 5.0 -6.87664 1.000000 1.01693 2.0 \n", - "\n", - " n_leapfrog__ divergent__ energy__ theta \n", - "0 1.0 0.0 7.90921 0.458989 \n", - "1 1.0 0.0 9.34714 0.557688 \n", - "2 1.0 0.0 8.66156 0.423626 \n", - "3 1.0 0.0 7.79234 0.422905 \n", - "4 3.0 0.0 7.45103 0.190439 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fit.draws_pd().head()" ] @@ -1472,23 +461,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name: lp__, shape: (1000, 4)\n", - "name: accept_stat__, shape: (1000, 4)\n", - "name: stepsize__, shape: (1000, 4)\n", - "name: treedepth__, shape: (1000, 4)\n", - "name: n_leapfrog__, shape: (1000, 4)\n", - "name: divergent__, shape: (1000, 4)\n", - "name: energy__, shape: (1000, 4)\n" - ] - } - ], + "outputs": [], "source": [ "for k, v in fit.method_variables().items():\n", " print(f'name: {k}, shape: {v.shape}')" @@ -1503,24 +478,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "adapted step_size per chain\n", - "[1.01693 0.960321 0.964455 0.928652]\n", - "metric_type: diag_e\n", - "metric:\n", - "[[0.466677]\n", - " [0.518573]\n", - " [0.452713]\n", - " [0.554712]]\n" - ] - } - ], + "outputs": [], "source": [ "print(f'adapted step_size per chain\\n{fit.step_size}\\nmetric_type: {fit.metric_type}\\nmetric:\\n{fit.metric}')" ] @@ -1534,21 +494,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample method variables:\n", - "dict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])\n", - "\n", - "stan model variables:\n", - "dict_keys(['theta'])\n" - ] - } - ], + "outputs": [], "source": [ "print('sample method variables:\\n{}\\n'.format(fit.metadata.method_vars.keys()))\n", "print('stan model variables:\\n{}'.format(fit.metadata.stan_vars.keys()))" @@ -1582,35 +530,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:52:59 - cmdstanpy - INFO - compiling stan file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/bernoulli.stan to exe file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/bernoulli\n", - "10:55:23 - cmdstanpy - INFO - compiled model executable: /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/bernoulli\n" - ] - }, - { - "data": { - "text/plain": [ - "{'stan_version_major': '2',\n", - " 'stan_version_minor': '33',\n", - " 'stan_version_patch': '0',\n", - " 'STAN_THREADS': 'true',\n", - " 'STAN_MPI': 'false',\n", - " 'STAN_OPENCL': 'false',\n", - " 'STAN_NO_RANGE_CHECKS': 'false',\n", - " 'STAN_CPP_OPTIMS': 'false'}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model = CmdStanModel(stan_file='bernoulli.stan',\n", " cpp_options={'STAN_THREADS': 'TRUE'},\n", @@ -1641,94 +563,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:55:23 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9a8714ee97f74813867a9b5a8a566110", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5dbeba7b79534e8f95a6c0baf16e4cec", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cd68a79b291e4a449df712136bbe1e37", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2736078d35c44361b297d2e3f244dd77", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:55:23 - cmdstanpy - INFO - CmdStan done processing.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "fit = model.sample(data='bernoulli.data.json', parallel_chains=4)" ] @@ -1753,43 +590,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "functions {\n", - " real partial_sum(array[] int slice_n_redcards, int start, int end,\n", - " array[] int n_games, vector rating, vector beta) {\n", - " return binomial_logit_lpmf(slice_n_redcards | n_games[start : end], beta[1]\n", - " + beta[2]\n", - " * rating[start : end]);\n", - " }\n", - "}\n", - "data {\n", - " int N;\n", - " array[N] int n_redcards;\n", - " array[N] int n_games;\n", - " vector[N] rating;\n", - " int grainsize;\n", - "}\n", - "parameters {\n", - " vector[2] beta;\n", - "}\n", - "model {\n", - " beta[1] ~ normal(0, 10);\n", - " beta[2] ~ normal(0, 1);\n", - " \n", - " target += reduce_sum(partial_sum, n_redcards, grainsize, n_games, rating,\n", - " beta);\n", - "}\n", - "\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "with open('redcard_reduce_sum.stan', 'r') as fd:\n", " print(fd.read())" @@ -1804,35 +607,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:55:23 - cmdstanpy - INFO - compiling stan file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum.stan to exe file /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum\n", - "10:55:45 - cmdstanpy - INFO - compiled model executable: /home/brian/Dev/py/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum\n" - ] - }, - { - "data": { - "text/plain": [ - "{'stan_version_major': '2',\n", - " 'stan_version_minor': '33',\n", - " 'stan_version_patch': '0',\n", - " 'STAN_THREADS': 'true',\n", - " 'STAN_MPI': 'false',\n", - " 'STAN_OPENCL': 'false',\n", - " 'STAN_NO_RANGE_CHECKS': 'false',\n", - " 'STAN_CPP_OPTIMS': 'false'}" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "redcard_model = CmdStanModel(stan_file='redcard_reduce_sum.stan',\n", " cpp_options={'STAN_THREADS': 'TRUE'},\n", @@ -1849,94 +626,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:55:45 - cmdstanpy - INFO - CmdStan start processing\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c54894e0923947f78ec0ef1221e8a7e5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 1 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b86fbbff13624730adbb55be2f7fa0ec", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 2 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "621528cbedbf4679b0e4a5fb8f299bb6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 3 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "47232e3b40b3484f9290574d4d828ec6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "chain 4 | | 00:00 Status" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "10:56:58 - cmdstanpy - INFO - CmdStan done processing.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "redcard_fit = redcard_model.sample(data='redcard.json', threads_per_chain=4)" ] @@ -1954,20 +646,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'16'" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "os.environ['STAN_NUM_THREADS']" ] diff --git a/docsrc/users-guide/examples/Pathfinder.ipynb b/docsrc/users-guide/examples/Pathfinder.ipynb index 40ea005c..efbd48a6 100644 --- a/docsrc/users-guide/examples/Pathfinder.ipynb +++ b/docsrc/users-guide/examples/Pathfinder.ipynb @@ -65,6 +65,67 @@ "print(pathfinder)\n", "print(pathfinder.metadata)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `pathfinder` method returns a [CmdStanPathfinder](https://mc-stan.org/cmdstanpy/api.html#cmdstanpathfinder) object,\n", + "which provides access to the disparate information from the Stan CSV files.\n", + "\n", + "\n", + "- The [stan_variable](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.stan_variable) and\n", + "[stan_variables](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.stan_variables) methods \n", + "return a Python [numpy.ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray)\n", + "containing all draws from the sample where the structure of each draw corresponds to the structure of the\n", + "Stan variable.\n", + "\n", + "- The [draws](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.draws) method returns the sample as a numpy.ndarray." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pathfinder.stan_variable(\"theta\").shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pathfinder.column_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pathfinder.draws().shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The method [create_inits](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.create_inits) returns a Python Dict containing a set of per-chain initializations for the model parameters. Each set of initializations is a random draw from the Pathfinder sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inits = pathfinder.create_inits()\n", + "print(inits)" + ] } ], "metadata": { diff --git a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb index d77ca78e..2e2a2bca 100644 --- a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb +++ b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb @@ -42,10 +42,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Run Stan's variational inference algorithm, obtain fitted estimates\n", + "### Run Stan's `pathfinder` or `variational` algorithm, obtain fitted estimates\n", "\n", - "The `CmdStanModel` method `variational` runs CmdStan's ADVI algorithm.\n", - "Because this algorithm is unstable and may fail to converge, we run it with argument `require_converged` set to `False`. We also specify a seed, to avoid instabilities as well as for reproducibility." + "The [CmdStanModel pathfinder](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanModel.pathfinder ) method\n", + "wraps the CmdStan [pathfinder ](https://mc-stan.org/docs/cmdstan-guide/pathfinder-config.html) method.\n", + "\n", + "Pathfinder locates normal approximations to the target\n", + "density along a quasi-Newton optimization path, with local covariance\n", + "estimated using the negative inverse Hessian estimates produced by the\n", + "LBFGS optimizer. Pathfinder returns draws from the Gaussian approximation\n", + "with the lowest estimated Kullback-Leibler (KL) divergence to the true\n", + "posterior.\n", + "By default, CmdStanPy runs multi-path Pathfinder which returns an importance-resampled set of draws over the outputs of 4 independent single-path Pathfinders.\n", + "This better matches non-normal target densities and also mitigates\n", + "the problem of L-BFGS getting stuck at local optima or in saddle points on plateaus.\n", + "\n", + "The method [create_inits](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.create_inits) returns a Python Dict containing a set of per-chain initializations for the model parameters. Each set of initializations is a random draw from the Pathfinder sample." ] }, { @@ -54,18 +66,19 @@ "metadata": {}, "outputs": [], "source": [ - "vb_fit = model.variational(data=data_file, require_converged=False, seed=123)" + "pathfinder_fit = model.pathfinder(data=data_file, seed=123)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The ADVI algorithm provides estimates of all model parameters.\n", + "Posteriordb provides reference posteriors for all models. For the blr model, conditioned on the dataset `sblri.json`, the reference posteriors are in file [sblri-blr.json](https://github.com/stan-dev/posteriordb/blob/master/posterior_database/reference_posteriors/summary_statistics/mean/mean/sblri-blr.json)\n", "\n", - "The `variational` method returns a `CmdStanVB` object, with method `stan_variables`, which\n", - "returns the approximat posterior samples of all model parameters as a Python dictionary. \n", - "Here, we report the approximate posterior mean." + "The reference posteriors for all elements of `beta` and `sigma` are all very close to $1.0$.\n", + "\n", + "The experiments reported in Figure 3 of the paper [Pathfinder: Parallel quasi-Newton variational inference](https://arxiv.org/abs/2108.03782) by Zhang et al. show that Pathfinder provides a better estimate of the posterior, as measured by the 1-Wasserstein distance to the reference posterior, than 75 iterations of the warmup Phase I algorithm used by the NUTS-HMC sampler.\n", + "furthermore, Pathfinder is more computationally efficient, requiring fewer evaluations of the log density and gradient functions. Therefore, using the estimates from Pathfinder to initialize the parameter values for the NUTS-HMC sampler will allow the sampler to do a better job of adapting the stepsize and metric during warmup, resulting in better performance and estimation." ] }, { @@ -74,19 +87,35 @@ "metadata": {}, "outputs": [], "source": [ - "vb_mean = {var: samples.mean(axis=0) for var, samples in vb_fit.stan_variables().items()}\n", - "print(vb_mean)" + "pathfinder_inits = pathfinder_fit.create_inits()\n", + "print(pathfinder_inits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mcmc_pathfinder_inits_fit = model.sample(\n", + " data=data_file, inits=pathfinder_inits, iter_warmup=75, seed=12345\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mcmc_pathfinder_inits_fit.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Posteriordb provides reference posteriors for all models. For the blr model, conditioned on the dataset `sblri.json`, the reference posteriors are in file [sblri-blr.json](https://github.com/stan-dev/posteriordb/blob/master/posterior_database/reference_posteriors/summary_statistics/mean/mean/sblri-blr.json)\n", - "\n", - "The reference posteriors for all elements of `beta` and `sigma` are all very close to $1.0$.\n", - "\n", - "The experiments reported in the paper [Pathfinder: Parallel quasi-Newton variational inference](https://arxiv.org/abs/2108.03782) by Zhang et al. show that mean-field ADVI provides a better estimate of the posterior, as measured by the 1-Wasserstein distance to the reference posterior, than 75 iterations of the warmup Phase I algorithm used by the NUTS-HMC sampler, furthermore, ADVI is more computationally efficient, requiring fewer evaluations of the log density and gradient functions. Therefore, using the estimates from ADVI to initialize the parameter values for the NUTS-HMC sampler will allow the sampler to do a better job of adapting the stepsize and metric during warmup, resulting in better performance and estimation.\n" + "Using the default random parameter initializations, we need to run more warmup iterations. If we only run 75 warmup iterations with random inits, the result fails to estimate `sigma` correctly. It is necessary to run the model with at least 150 warmup iterations to produce a good set of estimates." ] }, { @@ -95,9 +124,7 @@ "metadata": {}, "outputs": [], "source": [ - "mcmc_vb_inits_fit = model.sample(\n", - " data=data_file, inits=vb_mean, iter_warmup=75, seed=12345\n", - ")" + "mcmc_random_inits_fit = model.sample(data=data_file, iter_warmup=75, seed=12345)" ] }, { @@ -106,14 +133,24 @@ "metadata": {}, "outputs": [], "source": [ - "mcmc_vb_inits_fit.summary()" + "mcmc_random_inits_fit.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(mcmc_random_inits_fit.diagnose())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The sampler estimates match the reference posterior." + "The `CmdStanModel` method `variational` runs CmdStan's ADVI algorithm.\n", + "Because this algorithm is unstable and may fail to converge, we run it with argument `require_converged` set to `False`. We also specify a seed, to avoid instabilities as well as for reproducibility." ] }, { @@ -122,14 +159,18 @@ "metadata": {}, "outputs": [], "source": [ - "print(mcmc_vb_inits_fit.diagnose())" + "vb_fit = model.variational(data=data_file, require_converged=False, seed=123)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Using the default random parameter initializations, we need to run more warmup iterations. If we only run 75 warmup iterations with random inits, the result fails to estimate `sigma` correctly. It is necessary to run the model with at least 150 warmup iterations to produce a good set of estimates." + "The ADVI algorithm provides estimates of all model parameters.\n", + "\n", + "The `variational` method returns a `CmdStanVB` object, with method `stan_variables`, which\n", + "returns the approximat posterior samples of all model parameters as a Python dictionary. \n", + "Here, we report the approximate posterior mean." ] }, { @@ -138,7 +179,8 @@ "metadata": {}, "outputs": [], "source": [ - "mcmc_random_inits_fit = model.sample(data=data_file, iter_warmup=75, seed=12345)" + "vb_mean = {var: samples.mean(axis=0) for var, samples in vb_fit.stan_variables().items()}\n", + "print(vb_mean)" ] }, { @@ -147,7 +189,9 @@ "metadata": {}, "outputs": [], "source": [ - "mcmc_random_inits_fit.summary()" + "mcmc_vb_inits_fit = model.sample(\n", + " data=data_file, inits=vb_mean, iter_warmup=75, seed=12345\n", + ")" ] }, { @@ -156,7 +200,23 @@ "metadata": {}, "outputs": [], "source": [ - "print(mcmc_random_inits_fit.diagnose())" + "mcmc_vb_inits_fit.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The sampler estimates match the reference posterior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(mcmc_vb_inits_fit.diagnose())" ] } ], diff --git a/docsrc/users-guide/workflow.rst b/docsrc/users-guide/workflow.rst index 4171e7bd..1e934f6d 100644 --- a/docsrc/users-guide/workflow.rst +++ b/docsrc/users-guide/workflow.rst @@ -134,28 +134,22 @@ An example of each is provided in the `next section `__. It returns a :class:`CmdStanMCMC` object which contains a sample from the posterior distribution of the model conditioned on the data. -* The :meth:`~CmdStanModel.optimize` method runs one of - `Stan's optimization algorithms `__. - to find a mode of the density specified by the Stan program. - - It returns a :class:`CmdStanMLE` object. - -* The :meth:`~CmdStanModel.laplace` method runs Stan's - `Laplace approximation algorithm `__. - and returns a sample from a Laplace approximatation centered at the posterior mode found by optimization. - - It returns a :class:`CmdStanLaplace` object. - * The :meth:`~CmdStanModel.pathfinder` method runs Stan's `Pathfinder Variational Inference algorithm `__. - It returns a :class:`CmdStanPathfinder` object which contains a sample from a Guassian approximation to the posterior. + It returns a :class:`CmdStanPathfinder` object which contains + a sample from a Gaussian approximation the posterior distribution. * The :meth:`~CmdStanModel.variational` method runs Stan's `Automatic Differentiation Variational Inference (ADVI) algorithm `__. - It returns a :class:`CmdStanVB` object which contains - an approximation to the posterior distribution in the unconstrained variable space. + It returns a :class:`CmdStanVB` object which contains an approximation the posterior distribution. + +* The :meth:`~CmdStanModel.optimize` runs one of + `Stan's optimization algorithms `__. + to find a mode of the density specified by the Stan program. + + It returns a :class:`CmdStanMLE` object. * The :meth:`~CmdStanModel.generate_quantities` method runs Stan's `generate_quantities method `__. @@ -182,10 +176,9 @@ Output data The resulting Stan CSV file or set of files are assembled into an inference result object. + :class:`CmdStanMCMC` object contains the :meth:`~CmdStanModel.sample` outputs -+ :class:`CmdStanMLE` object contains the :meth:`~CmdStanModel.optimize` outputs -+ :class:`CmdStanLaplace` object contains the :meth:`~CmdStanModel.laplace` outputs + :class:`CmdStanPathfinder` object contains the :meth:`~CmdStanModel.pathfinder` outputs + :class:`CmdStanVB` object contains the :meth:`~CmdStanModel.variational` outputs ++ :class:`CmdStanMLE` object contains the :meth:`~CmdStanModel.optimize` outputs + :class:`CmdStanGQ` object contains the :meth:`~CmdStanModel.generate_quantities` outputs From 0de3448377d14a27db8d9e82052c210d12251243 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Fri, 22 Sep 2023 19:43:45 -0400 Subject: [PATCH 4/6] code and docs cleanup --- cmdstanpy/model.py | 4 +- cmdstanpy/stanfit/gq.py | 4 + cmdstanpy/stanfit/laplace.py | 6 +- cmdstanpy/stanfit/mcmc.py | 4 + cmdstanpy/stanfit/mle.py | 4 + cmdstanpy/stanfit/pathfinder.py | 6 +- cmdstanpy/stanfit/vb.py | 4 + docsrc/api.rst | 3 +- docsrc/changes.rst | 2 +- docsrc/community.rst | 12 +- docsrc/index.rst | 2 +- docsrc/installation.rst | 26 +- docsrc/internal_api.rst | 2 +- .../users-guide/examples/MCMC Sampling.ipynb | 1102 ++++++++++++++++- docsrc/users-guide/hello_world.rst | 1 + docsrc/users-guide/overview.rst | 3 +- docsrc/users-guide/workflow.rst | 5 +- 17 files changed, 1122 insertions(+), 68 deletions(-) diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py index 2380b1b2..c7454a44 100644 --- a/cmdstanpy/model.py +++ b/cmdstanpy/model.py @@ -1882,8 +1882,8 @@ def laplace_sample( either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. - :param mode: The mode around which to place the approximation. - This can be: + :param mode: The mode around which to place the approximation, either + * A :class:`CmdStanMLE` object * A path to a CSV file containing the output of an optimization run. * ``None`` - use default optimizer settings and/or any ``opt_args``. diff --git a/cmdstanpy/stanfit/gq.py b/cmdstanpy/stanfit/gq.py index 7bde1d6a..1997dbc3 100644 --- a/cmdstanpy/stanfit/gq.py +++ b/cmdstanpy/stanfit/gq.py @@ -566,7 +566,9 @@ def stan_variable( CmdStanGQ.stan_variables CmdStanMCMC.stan_variable CmdStanMLE.stan_variable + CmdStanPathfinder.stan_variable CmdStanVB.stan_variable + CmdStanLaplace.stan_variable """ model_var_names = self.previous_fit._metadata.stan_vars.keys() gq_var_names = self._metadata.stan_vars.keys() @@ -613,7 +615,9 @@ def stan_variables(self, inc_warmup: bool = False) -> Dict[str, np.ndarray]: CmdStanGQ.stan_variable CmdStanMCMC.stan_variables CmdStanMLE.stan_variables + CmdStanPathfinder.stan_variables CmdStanVB.stan_variables + CmdStanLaplace.stan_variables """ result = {} sample_var_names = self.previous_fit._metadata.stan_vars.keys() diff --git a/cmdstanpy/stanfit/laplace.py b/cmdstanpy/stanfit/laplace.py index ce56eac1..741593e7 100644 --- a/cmdstanpy/stanfit/laplace.py +++ b/cmdstanpy/stanfit/laplace.py @@ -82,6 +82,7 @@ def stan_variable(self, var: str) -> np.ndarray: -------- CmdStanMLE.stan_variables CmdStanMCMC.stan_variable + CmdStanPathfinder.stan_variable CmdStanVB.stan_variable CmdStanGQ.stan_variable """ @@ -113,6 +114,7 @@ def stan_variables(self) -> Dict[str, np.ndarray]: CmdStanGQ.stan_variable CmdStanMCMC.stan_variables CmdStanMLE.stan_variables + CmdStanPathfinder.stan_variables CmdStanVB.stan_variables """ result = {} @@ -235,7 +237,9 @@ def mode(self) -> CmdStanMLE: @property def metadata(self) -> InferenceMetadata: """ - Return the inference metadata as an :class:`InferenceMetadata` object. + Returns object which contains CmdStan configuration as well as + information about the names and structure of the inference method + and model output variables. """ return self._metadata diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py index 73f3062b..53fb4de1 100644 --- a/cmdstanpy/stanfit/mcmc.py +++ b/cmdstanpy/stanfit/mcmc.py @@ -748,8 +748,10 @@ def stan_variable( -------- CmdStanMCMC.stan_variables CmdStanMLE.stan_variable + CmdStanPathfinder.stan_variable CmdStanVB.stan_variable CmdStanGQ.stan_variable + CmdStanLaplace.stan_variable """ try: draws = self.draws(inc_warmup=inc_warmup, concat_chains=True) @@ -774,8 +776,10 @@ def stan_variables(self) -> Dict[str, np.ndarray]: -------- CmdStanMCMC.stan_variable CmdStanMLE.stan_variables + CmdStanPathfinder.stan_variables CmdStanVB.stan_variables CmdStanGQ.stan_variables + CmdStanLaplace.stan_variables """ result = {} for name in self._metadata.stan_vars: diff --git a/cmdstanpy/stanfit/mle.py b/cmdstanpy/stanfit/mle.py index b8905324..0cc6895b 100644 --- a/cmdstanpy/stanfit/mle.py +++ b/cmdstanpy/stanfit/mle.py @@ -189,8 +189,10 @@ def stan_variable( -------- CmdStanMLE.stan_variables CmdStanMCMC.stan_variable + CmdStanPathfinder.stan_variable CmdStanVB.stan_variable CmdStanGQ.stan_variable + CmdStanLaplace.stan_variable """ if var not in self._metadata.stan_vars: raise ValueError( @@ -241,8 +243,10 @@ def stan_variables( -------- CmdStanMLE.stan_variable CmdStanMCMC.stan_variables + CmdStanPathfinder.stan_variables CmdStanVB.stan_variables CmdStanGQ.stan_variables + CmdStanLaplace.stan_variables """ if not self.runset._check_retcodes(): get_logger().warning( diff --git a/cmdstanpy/stanfit/pathfinder.py b/cmdstanpy/stanfit/pathfinder.py index a9c802d3..8c7d867c 100644 --- a/cmdstanpy/stanfit/pathfinder.py +++ b/cmdstanpy/stanfit/pathfinder.py @@ -106,7 +106,8 @@ def stan_variable(self, var: str) -> np.ndarray: See Also -------- - CmdStanMLE.stan_variables + CmdStanPathfinder.stan_variables + CmdStanMLE.stan_variable CmdStanMCMC.stan_variable CmdStanVB.stan_variable CmdStanGQ.stan_variable @@ -133,10 +134,11 @@ def stan_variables(self) -> Dict[str, np.ndarray]: See Also -------- - CmdStanGQ.stan_variable + CmdStanPathfinder.stan_variable CmdStanMCMC.stan_variables CmdStanMLE.stan_variables CmdStanVB.stan_variables + CmdStanGQ.stan_variables CmdStanLaplace.stan_variables """ result = {} diff --git a/cmdstanpy/stanfit/vb.py b/cmdstanpy/stanfit/vb.py index b2daccd3..3eae7d32 100644 --- a/cmdstanpy/stanfit/vb.py +++ b/cmdstanpy/stanfit/vb.py @@ -140,7 +140,9 @@ def stan_variable(self, var: str) -> np.ndarray: CmdStanVB.stan_variables CmdStanMCMC.stan_variable CmdStanMLE.stan_variable + CmdStanPathfinder.stan_variable CmdStanGQ.stan_variable + CmdStanLaplace.stan_variable """ try: out: np.ndarray = self._metadata.stan_vars[var].extract_reshape( @@ -166,6 +168,8 @@ def stan_variables(self) -> Dict[str, np.ndarray]: CmdStanMCMC.stan_variables CmdStanMLE.stan_variables CmdStanGQ.stan_variables + CmdStanPathfinder.stan_variables + CmdStanLaplace.stan_variables """ result = {} for name in self._metadata.stan_vars: diff --git a/docsrc/api.rst b/docsrc/api.rst index b6412857..5ec5867c 100644 --- a/docsrc/api.rst +++ b/docsrc/api.rst @@ -6,7 +6,8 @@ API Reference The following documents the public API of CmdStanPy. It is expected to be stable between versions, with backwards compatibility between minor versions and deprecation warnings preceding breaking changes. -There is also the `internal API `__, which is makes no such guarantees. +The documentation for the `internal API `_ is also provided, but the internal API +does not guarantee either stability and backwards compatibility. .. toctree:: :hidden: diff --git a/docsrc/changes.rst b/docsrc/changes.rst index ca118109..c5dce4f7 100644 --- a/docsrc/changes.rst +++ b/docsrc/changes.rst @@ -5,7 +5,7 @@ What's New ========== -For full changes, see the `Releases page `__ on GitHub. +For full changes, see the `Releases page `_ on GitHub. CmdStanPy 1.1.0 diff --git a/docsrc/community.rst b/docsrc/community.rst index f766ca3b..82ef9373 100644 --- a/docsrc/community.rst +++ b/docsrc/community.rst @@ -9,26 +9,26 @@ Project templates Templates are a great way to piggy back on other users' work, saving you time when you start a new project. -- `cookiecutter-cmdstanpy-analysis `__ +- `cookiecutter-cmdstanpy-analysis `_ A cookiecutter template for cmdstanpy-based statistical analysis projects. -- `cookiecutter-cmdstanpy-wrapper `__ +- `cookiecutter-cmdstanpy-wrapper `_ A cookiecutter template using Stan models in Python packages, including the ability to pre-compile the model as part of the package distribution. Software -------- -- `Prophet `__ A procedure for forecasting +- `Prophet `_ A procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. -- `ArviZ `__ A Python package (with a `Julia - interface `__) for exploratory analysis of +- `ArviZ `__ A Python package (with a + `Julia interface `_) for exploratory analysis of Bayesian models. Includes functions for posterior analysis, data storage, model checking, comparison and diagnostics. -- `BridgeStan `__ A project which provides efficient +- `BridgeStan `_ A project which provides efficient in-memory access through Python, Julia, and R to the methods of a Stan model, including log densities, gradients, Hessians, and constraining and unconstraining transforms. diff --git a/docsrc/index.rst b/docsrc/index.rst index 886359c8..caeafd9b 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -16,7 +16,7 @@ CmdStanPy is a lightweight interface to Stan for Python users which provides the necessary objects and functions to do Bayesian inference given a probability model and data. It wraps the -`CmdStan `__ +`CmdStan `_ command line interface in a small set of Python classes which provide methods to do analysis and manage the resulting set of model, data, and posterior estimates. diff --git a/docsrc/installation.rst b/docsrc/installation.rst index 2f6ed60a..17ab059c 100644 --- a/docsrc/installation.rst +++ b/docsrc/installation.rst @@ -9,11 +9,11 @@ There are several ways to install CmdStanPy and the underlying CmdStan component * You can download CmdStanPy, CmdStan, and the C++ toolchain from conda-forge. -* You can download the CmdStanPy package from `PyPI `__ - using `pip `__. +* You can download the CmdStanPy package from `PyPI `_ + using `pip `_. * If you want the current development version, you can clone the - GitHub `CmdStanPy `__ repository. + GitHub `CmdStanPy `_ repository. If you install CmdStanPy from PyPI or GitHub you will need to install CmdStan as well, see section :ref:`CmdStan Installation ` below. @@ -22,9 +22,9 @@ install CmdStan as well, see section :ref:`CmdStan Installation `__, +If you use `conda `_, you can install CmdStanPy and the underlying CmdStan components from the -`conda-forge `__ repository +`conda-forge `_ repository via the following command: @@ -60,7 +60,7 @@ If you require a specific release of CmdStan, CmdStan versions ``cmdstan==VERSION`` in the install command. Versions before 2.26.1 are not available from conda but can be downloaded from the CmdStan -`releases `__ page. +`releases `_ page. A Conda environment is a directory that contains a specific collection of Conda packages. To see the locations of your conda environments, use the command @@ -120,12 +120,12 @@ To install the current develop branch from GitHub: **Jupyter notebook users:** If you intend to run CmdStanPy from within a Jupyter notebook, you may need to install the - `ipywidgets `__. - This will allow for progress bars implemented using the `tqdm `__ + `ipywidgets `_. + This will allow for progress bars implemented using the `tqdm `_ to display properly in the browser. For further help on Jupyter notebook installation and configuration , see - `ipywidgets installation instructions `__ - and `this tqdm GitHub issue `__. + `ipywidgets installation instructions `_ + and `this tqdm GitHub issue `_. .. _cmdstan-install: @@ -156,7 +156,7 @@ There is usually a pre-installed C++ compiler as well, but not necessarily new e **MacOS** The Xcode and Xcode command line tools must be installed. Xcode is available for free from the Mac App Store. To install the Xcode command line tools, run the shell command: ``xcode-select --install``. -**Windows** We recommend using the `RTools 4.0 `__ toolchain +**Windows** We recommend using the `RTools 4.0 `_ toolchain which contains a ``g++ 8`` compiler and ``Mingw``, the native Windows equivalent of the GNU-Make utility. This can be installed along with CmdStan when you invoke the function :meth:`cmdstanpy.install_cmdstan` with argument ``compiler=True``. @@ -206,7 +206,7 @@ can be used to override these defaults: .. code-block:: bash - install_cmdstan -d my_local_cmdstan -v 2.27.0 + install_cmdstan -d my_local_cmdstan -v 2.33.0 ls -F my_local_cmdstan Alternate Linux Architectures @@ -224,7 +224,7 @@ DIY Installation ^^^^^^^^^^^^^^^^ If you with to install CmdStan yourself, follow the instructions -in the `CmdStan User's Guide `__. +in the `CmdStan User's Guide `_. Locating the CmdStan installation directory ------------------------------------------- diff --git a/docsrc/internal_api.rst b/docsrc/internal_api.rst index 9747f9b9..20e63582 100644 --- a/docsrc/internal_api.rst +++ b/docsrc/internal_api.rst @@ -6,7 +6,7 @@ Internal API Reference The following documents the internal API of CmdStanPy. No guarantees are made about backwards compatibility between minor versions and refactors are expected. If you find yourself needing something exposed here, please -`open an issue `__ requesting it be added to the `public API `__. +`open an issue `_ requesting it be added to the public `API `_. ******* Classes diff --git a/docsrc/users-guide/examples/MCMC Sampling.ipynb b/docsrc/users-guide/examples/MCMC Sampling.ipynb index 020be6a8..4808a441 100644 --- a/docsrc/users-guide/examples/MCMC Sampling.ipynb +++ b/docsrc/users-guide/examples/MCMC Sampling.ipynb @@ -78,9 +78,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]\n", + " [--paths] [--json] [--debug]\n", + " [subcommand]\n", + "\n", + "Jupyter: Interactive Computing\n", + "\n", + "positional arguments:\n", + " subcommand the subcommand to launch\n", + "\n", + "optional arguments:\n", + " -h, --help show this help message and exit\n", + " --version show the versions of core jupyter packages and exit\n", + " --config-dir show Jupyter config dir\n", + " --data-dir show Jupyter data dir\n", + " --runtime-dir show Jupyter runtime dir\n", + " --paths show all Jupyter paths. Add --json for machine-readable\n", + " format.\n", + " --json output paths as machine-readable json\n", + " --debug output debug information about paths\n", + "\n", + "Available subcommands: dejavu events execute kernel kernelspec lab\n", + "labextension labhub migrate nbconvert run server troubleshoot trust\n", + "\n", + "Jupyter command `jupyter-nbextension` not found.\n" + ] + } + ], "source": [ "!jupyter nbextension enable --py widgetsnbextension" ] @@ -113,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -135,9 +166,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:25 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f0d740bc35774bb59e63a11e01290851", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bf042311b4fb4994a4357832bcd2bf1d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "62f7533de2e644a38a35e933bdfcd9e3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f6b35c2c78c24aee93d088a1c956dbcc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:25 - cmdstanpy - INFO - CmdStan done processing.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "# run CmdStan's sample method, returns object `CmdStanMCMC`\n", "fit = model.sample(data='bernoulli.data.json')" @@ -155,20 +271,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "CmdStanMCMC: model=bernoulli chains=4['method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']\n", + " csv_files:\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_1.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_2.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_3.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_4.csv\n", + " output_files:\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_0-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_1-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_2-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_3-stdout.txt" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fit" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "draws as array: (1000, 4, 8)\n", + "draws as structured object:\n", + "\tdict_keys(['theta'])\n", + "sampler diagnostics:\n", + "\tdict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])\n" + ] + } + ], "source": [ "print(f'draws as array: {fit.draws().shape}')\n", "print(f'draws as structured object:\\n\\t{fit.stan_variables().keys()}')\n", @@ -192,9 +341,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:27 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ee12fa3bf16845ba8cfa3a376ee328a2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "503c539d798e477695a57f0c934ba0b1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "90a432b9392a46a5ae57ea38606acb2f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bdd07b318c4246148c233ae31ce0f0c7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:29 - cmdstanpy - INFO - CmdStan done processing.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "fit = model.sample(data='bernoulli.data.json', iter_warmup=100000, iter_sampling=100000, show_progress=True)\n" ] @@ -210,11 +444,174 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:31 - cmdstanpy - INFO - Chain [1] start processing\n", + "17:37:31 - cmdstanpy - INFO - Chain [1] done processing\n", + "17:37:31 - cmdstanpy - INFO - Chain [2] start processing\n", + "17:37:31 - cmdstanpy - INFO - Chain [2] done processing\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chain [1] method = sample (Default)\n", + "Chain [1] sample\n", + "Chain [1] num_samples = 1000 (Default)\n", + "Chain [1] num_warmup = 1000 (Default)\n", + "Chain [1] save_warmup = 0 (Default)\n", + "Chain [1] thin = 1 (Default)\n", + "Chain [1] adapt\n", + "Chain [1] engaged = 1 (Default)\n", + "Chain [1] gamma = 0.050000000000000003 (Default)\n", + "Chain [1] delta = 0.80000000000000004 (Default)\n", + "Chain [1] kappa = 0.75 (Default)\n", + "Chain [1] t0 = 10 (Default)\n", + "Chain [1] init_buffer = 75 (Default)\n", + "Chain [1] term_buffer = 50 (Default)\n", + "Chain [1] window = 25 (Default)\n", + "Chain [1] algorithm = hmc (Default)\n", + "Chain [1] hmc\n", + "Chain [1] engine = nuts (Default)\n", + "Chain [1] nuts\n", + "Chain [1] max_depth = 10 (Default)\n", + "Chain [1] metric = diag_e (Default)\n", + "Chain [1] metric_file = (Default)\n", + "Chain [1] stepsize = 1 (Default)\n", + "Chain [1] stepsize_jitter = 0 (Default)\n", + "Chain [1] num_chains = 1 (Default)\n", + "Chain [1] id = 1 (Default)\n", + "Chain [1] data\n", + "Chain [1] file = bernoulli.data.json\n", + "Chain [1] init = 2 (Default)\n", + "Chain [1] random\n", + "Chain [1] seed = 75498\n", + "Chain [1] output\n", + "Chain [1] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoulliffqp_qzu/bernoulli-20230922173731_1.csv\n", + "Chain [1] diagnostic_file = (Default)\n", + "Chain [1] refresh = 100 (Default)\n", + "Chain [1] sig_figs = -1 (Default)\n", + "Chain [1] profile_file = profile.csv (Default)\n", + "Chain [1] num_threads = 1 (Default)\n", + "Chain [1] \n", + "Chain [1] \n", + "Chain [1] Gradient evaluation took 6e-06 seconds\n", + "Chain [1] 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.\n", + "Chain [1] Adjust your expectations accordingly!\n", + "Chain [1] \n", + "Chain [1] \n", + "Chain [1] Iteration: 1 / 2000 [ 0%] (Warmup)\n", + "Chain [1] Iteration: 100 / 2000 [ 5%] (Warmup)\n", + "Chain [1] Iteration: 200 / 2000 [ 10%] (Warmup)\n", + "Chain [1] Iteration: 300 / 2000 [ 15%] (Warmup)\n", + "Chain [1] Iteration: 400 / 2000 [ 20%] (Warmup)\n", + "Chain [1] Iteration: 500 / 2000 [ 25%] (Warmup)\n", + "Chain [1] Iteration: 600 / 2000 [ 30%] (Warmup)\n", + "Chain [1] Iteration: 700 / 2000 [ 35%] (Warmup)\n", + "Chain [1] Iteration: 800 / 2000 [ 40%] (Warmup)\n", + "Chain [1] Iteration: 900 / 2000 [ 45%] (Warmup)\n", + "Chain [1] Iteration: 1000 / 2000 [ 50%] (Warmup)\n", + "Chain [1] Iteration: 1001 / 2000 [ 50%] (Sampling)\n", + "Chain [1] Iteration: 1100 / 2000 [ 55%] (Sampling)\n", + "Chain [1] Iteration: 1200 / 2000 [ 60%] (Sampling)\n", + "Chain [1] Iteration: 1300 / 2000 [ 65%] (Sampling)\n", + "Chain [1] Iteration: 1400 / 2000 [ 70%] (Sampling)\n", + "Chain [1] Iteration: 1500 / 2000 [ 75%] (Sampling)\n", + "Chain [1] Iteration: 1600 / 2000 [ 80%] (Sampling)\n", + "Chain [1] Iteration: 1700 / 2000 [ 85%] (Sampling)\n", + "Chain [1] Iteration: 1800 / 2000 [ 90%] (Sampling)\n", + "Chain [1] Iteration: 1900 / 2000 [ 95%] (Sampling)\n", + "Chain [1] Iteration: 2000 / 2000 [100%] (Sampling)\n", + "Chain [1] \n", + "Chain [1] Elapsed Time: 0.004 seconds (Warm-up)\n", + "Chain [1] 0.012 seconds (Sampling)\n", + "Chain [1] 0.016 seconds (Total)\n", + "Chain [1] \n", + "Chain [1] \n", + "Chain [2] method = sample (Default)\n", + "Chain [2] sample\n", + "Chain [2] num_samples = 1000 (Default)\n", + "Chain [2] num_warmup = 1000 (Default)\n", + "Chain [2] save_warmup = 0 (Default)\n", + "Chain [2] thin = 1 (Default)\n", + "Chain [2] adapt\n", + "Chain [2] engaged = 1 (Default)\n", + "Chain [2] gamma = 0.050000000000000003 (Default)\n", + "Chain [2] delta = 0.80000000000000004 (Default)\n", + "Chain [2] kappa = 0.75 (Default)\n", + "Chain [2] t0 = 10 (Default)\n", + "Chain [2] init_buffer = 75 (Default)\n", + "Chain [2] term_buffer = 50 (Default)\n", + "Chain [2] window = 25 (Default)\n", + "Chain [2] algorithm = hmc (Default)\n", + "Chain [2] hmc\n", + "Chain [2] engine = nuts (Default)\n", + "Chain [2] nuts\n", + "Chain [2] max_depth = 10 (Default)\n", + "Chain [2] metric = diag_e (Default)\n", + "Chain [2] metric_file = (Default)\n", + "Chain [2] stepsize = 1 (Default)\n", + "Chain [2] stepsize_jitter = 0 (Default)\n", + "Chain [2] num_chains = 1 (Default)\n", + "Chain [2] id = 2\n", + "Chain [2] data\n", + "Chain [2] file = bernoulli.data.json\n", + "Chain [2] init = 2 (Default)\n", + "Chain [2] random\n", + "Chain [2] seed = 75498\n", + "Chain [2] output\n", + "Chain [2] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoulliffqp_qzu/bernoulli-20230922173731_2.csv\n", + "Chain [2] diagnostic_file = (Default)\n", + "Chain [2] refresh = 100 (Default)\n", + "Chain [2] sig_figs = -1 (Default)\n", + "Chain [2] profile_file = profile.csv (Default)\n", + "Chain [2] num_threads = 1 (Default)\n", + "Chain [2] \n", + "Chain [2] \n", + "Chain [2] Gradient evaluation took 6e-06 seconds\n", + "Chain [2] 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.\n", + "Chain [2] Adjust your expectations accordingly!\n", + "Chain [2] \n", + "Chain [2] \n", + "Chain [2] Iteration: 1 / 2000 [ 0%] (Warmup)\n", + "Chain [2] Iteration: 100 / 2000 [ 5%] (Warmup)\n", + "Chain [2] Iteration: 200 / 2000 [ 10%] (Warmup)\n", + "Chain [2] Iteration: 300 / 2000 [ 15%] (Warmup)\n", + "Chain [2] Iteration: 400 / 2000 [ 20%] (Warmup)\n", + "Chain [2] Iteration: 500 / 2000 [ 25%] (Warmup)\n", + "Chain [2] Iteration: 600 / 2000 [ 30%] (Warmup)\n", + "Chain [2] Iteration: 700 / 2000 [ 35%] (Warmup)\n", + "Chain [2] Iteration: 800 / 2000 [ 40%] (Warmup)\n", + "Chain [2] Iteration: 900 / 2000 [ 45%] (Warmup)\n", + "Chain [2] Iteration: 1000 / 2000 [ 50%] (Warmup)\n", + "Chain [2] Iteration: 1001 / 2000 [ 50%] (Sampling)\n", + "Chain [2] Iteration: 1100 / 2000 [ 55%] (Sampling)\n", + "Chain [2] Iteration: 1200 / 2000 [ 60%] (Sampling)\n", + "Chain [2] Iteration: 1300 / 2000 [ 65%] (Sampling)\n", + "Chain [2] Iteration: 1400 / 2000 [ 70%] (Sampling)\n", + "Chain [2] Iteration: 1500 / 2000 [ 75%] (Sampling)\n", + "Chain [2] Iteration: 1600 / 2000 [ 80%] (Sampling)\n", + "Chain [2] Iteration: 1700 / 2000 [ 85%] (Sampling)\n", + "Chain [2] Iteration: 1800 / 2000 [ 90%] (Sampling)\n", + "Chain [2] Iteration: 1900 / 2000 [ 95%] (Sampling)\n", + "Chain [2] Iteration: 2000 / 2000 [100%] (Sampling)\n", + "Chain [2] \n", + "Chain [2] Elapsed Time: 0.004 seconds (Warm-up)\n", + "Chain [2] 0.012 seconds (Sampling)\n", + "Chain [2] 0.016 seconds (Total)\n", + "Chain [2] \n", + "Chain [2] \n" + ] + } + ], "source": [ "fit = model.sample(data='bernoulli.data.json', chains=2, parallel_chains=1, show_console=True)\n", "\n" @@ -257,9 +654,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data {\n", + " int J; // number of schools\n", + " array[J] real y; // estimated treatment effect (school j)\n", + " array[J] real sigma; // std err of effect estimate (school j)\n", + "}\n", + "parameters {\n", + " real mu;\n", + " array[J] real theta;\n", + " real tau;\n", + "}\n", + "model {\n", + " theta ~ normal(mu, tau);\n", + " y ~ normal(theta, sigma);\n", + "}\n", + "\n", + "\n" + ] + } + ], "source": [ "with open('eight_schools.stan', 'r') as fd:\n", " print(fd.read())" @@ -274,9 +694,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"J\" : 8,\n", + " \"y\" : [28,8,-3,7,-1,1,18,12],\n", + " \"sigma\" : [15,10,16,11,9,11,10,18],\n", + " \"tau\" : 25\n", + "}\n", + "\n" + ] + } + ], "source": [ "with open('eight_schools.data.json', 'r') as fd:\n", " print(fd.read())" @@ -300,9 +734,103 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:31 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", + "17:37:40 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", + "17:37:40 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "72f46b1ff85d4b0685cbc6379f655aa1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e48bd901ace04b69a006a362cdaba14d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "31e4d838ecd04603b39ae9de97371bdd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7716552fa3a943619a6be38a50d0a064", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:41 - cmdstanpy - INFO - CmdStan done processing.\n", + "17:37:41 - cmdstanpy - WARNING - Some chains may have failed to converge.\n", + "\tChain 1 had 10 divergent transitions (1.0%)\n", + "\tChain 2 had 143 divergent transitions (14.3%)\n", + "\tChain 3 had 5 divergent transitions (0.5%)\n", + "\tChain 4 had 4 divergent transitions (0.4%)\n", + "\tChain 4 had 6 iterations at max treedepth (0.6%)\n", + "\tUse the \"diagnose()\" method on the CmdStanMCMC object to see further information.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "eight_schools_model = CmdStanModel(stan_file='eight_schools.stan')\n", "eight_schools_fit = eight_schools_model.sample(data='eight_schools.data.json', seed=55157)" @@ -319,9 +847,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "divergences:\n", + "[ 10 143 5 4]\n", + "iterations at max_treedepth:\n", + "[0 0 0 6]\n" + ] + } + ], "source": [ "print(f'divergences:\\n{eight_schools_fit.divergences}\\niterations at max_treedepth:\\n{eight_schools_fit.max_treedepths}')" ] @@ -337,11 +876,213 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MeanMCSEStdDev5%50%95%N_EffN_Eff/sR_hat
lp__-16.475101.3938307.33884-26.342300-17.71920-5.8221427.7228040.178001.12057
mu7.365800.3584415.36980-0.0901576.9870816.39460224.43000325.261001.03328
theta[1]11.020400.6939698.596680.3989029.6709427.12670153.45500222.398001.03967
theta[2]7.445470.2229706.32424-2.1931706.7745818.33640804.499001165.940001.01501
theta[3]5.468140.2301467.65499-7.4367105.2009417.526001106.330001603.370001.00842
theta[4]7.072080.2576626.80149-3.2966206.5840118.50130696.798001009.850001.01756
theta[5]4.596640.1932926.13195-6.0676504.0453714.533601006.390001458.540001.00407
theta[6]5.693260.1990796.77053-5.4029005.3026516.486801156.630001676.270001.00729
theta[7]10.049500.6281187.100790.6752469.1740223.36110127.80000185.218001.03588
theta[8]7.840090.2915268.04207-4.0164207.0073121.58680760.994001102.890001.01463
tau6.603330.5573605.774921.0198505.2141917.68580107.35287155.583871.04858
\n", + "
" + ], + "text/plain": [ + " Mean MCSE StdDev 5% 50% 95% \\\n", + "lp__ -16.47510 1.393830 7.33884 -26.342300 -17.71920 -5.82214 \n", + "mu 7.36580 0.358441 5.36980 -0.090157 6.98708 16.39460 \n", + "theta[1] 11.02040 0.693969 8.59668 0.398902 9.67094 27.12670 \n", + "theta[2] 7.44547 0.222970 6.32424 -2.193170 6.77458 18.33640 \n", + "theta[3] 5.46814 0.230146 7.65499 -7.436710 5.20094 17.52600 \n", + "theta[4] 7.07208 0.257662 6.80149 -3.296620 6.58401 18.50130 \n", + "theta[5] 4.59664 0.193292 6.13195 -6.067650 4.04537 14.53360 \n", + "theta[6] 5.69326 0.199079 6.77053 -5.402900 5.30265 16.48680 \n", + "theta[7] 10.04950 0.628118 7.10079 0.675246 9.17402 23.36110 \n", + "theta[8] 7.84009 0.291526 8.04207 -4.016420 7.00731 21.58680 \n", + "tau 6.60333 0.557360 5.77492 1.019850 5.21419 17.68580 \n", + "\n", + " N_Eff N_Eff/s R_hat \n", + "lp__ 27.72280 40.17800 1.12057 \n", + "mu 224.43000 325.26100 1.03328 \n", + "theta[1] 153.45500 222.39800 1.03967 \n", + "theta[2] 804.49900 1165.94000 1.01501 \n", + "theta[3] 1106.33000 1603.37000 1.00842 \n", + "theta[4] 696.79800 1009.85000 1.01756 \n", + "theta[5] 1006.39000 1458.54000 1.00407 \n", + "theta[6] 1156.63000 1676.27000 1.00729 \n", + "theta[7] 127.80000 185.21800 1.03588 \n", + "theta[8] 760.99400 1102.89000 1.01463 \n", + "tau 107.35287 155.58387 1.04858 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "eight_schools_fit.summary()" ] @@ -357,9 +1098,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing csv files: /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_1.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_2.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_3.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_4.csv\n", + "\n", + "Checking sampler transitions treedepth.\n", + "6 of 4000 (0.15%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.\n", + "Trajectories that are prematurely terminated due to this limit will result in slow exploration.\n", + "For optimal performance, increase this limit.\n", + "\n", + "Checking sampler transitions for divergences.\n", + "162 of 4000 (4.05%) transitions ended with a divergence.\n", + "These divergent transitions indicate that HMC is not fully able to explore the posterior distribution.\n", + "Try increasing adapt delta closer to 1.\n", + "If this doesn't remove all divergences, try to reparameterize the model.\n", + "\n", + "Checking E-BFMI - sampler transitions HMC potential energy.\n", + "The E-BFMI, 0.18, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.\n", + "If possible, try to reparameterize the model.\n", + "\n", + "Effective sample size satisfactory.\n", + "\n", + "Split R-hat values satisfactory all parameters.\n", + "\n", + "Processing complete.\n", + "\n" + ] + } + ], "source": [ "print(eight_schools_fit.diagnose())" ] @@ -373,9 +1144,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:41 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "238ba9344948495c804c0252670bdc0f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9e6f7f62d204450780262b047d718c19", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b52eb94eb7534d8c92ba4b5549db8b01", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "99e31e6382a5407b941c3c21fe1359df", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:37:41 - cmdstanpy - INFO - CmdStan done processing.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "fit = model.sample(data='bernoulli.data.json')" ] @@ -392,9 +1248,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.134474 0.0987819 0.209948 ... 0.211388 0.178338 0.336799 ]\n" + ] + } + ], "source": [ "print(fit.stan_variable('theta'))" ] @@ -408,9 +1272,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: theta, shape: (4000,)\n" + ] + } + ], "source": [ "for k, v in fit.stan_variables().items():\n", " print(f'name: {k}, shape: {v.shape}')" @@ -418,9 +1290,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Package \"xarray\" is not installed, cannot produce draws array.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mfit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraws_xr\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtheta\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/github/stan-dev/cmdstanpy/cmdstanpy/stanfit/mcmc.py:663\u001b[0m, in \u001b[0;36mCmdStanMCMC.draws_xr\u001b[0;34m(self, vars, inc_warmup)\u001b[0m\n\u001b[1;32m 647\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 648\u001b[0m \u001b[38;5;124;03mReturns the sampler draws as a xarray Dataset.\u001b[39;00m\n\u001b[1;32m 649\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;124;03mCmdStanGQ.draws_xr\u001b[39;00m\n\u001b[1;32m 661\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m XARRAY_INSTALLED:\n\u001b[0;32m--> 663\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 664\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPackage \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxarray\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not installed, cannot produce draws array.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 665\u001b[0m )\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inc_warmup \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_warmup:\n\u001b[1;32m 667\u001b[0m get_logger()\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 668\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDraws from warmup iterations not available,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 669\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must run sampler with \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msave_warmup=True\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 670\u001b[0m )\n", + "\u001b[0;31mRuntimeError\u001b[0m: Package \"xarray\" is not installed, cannot produce draws array." + ] + } + ], "source": [ "print(fit.draws_xr('theta'))" ] @@ -436,18 +1321,161 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sample as ndarray: (1000, 4, 8)\n", + "first 2 draws, chain 1:\n", + "[[-7.31892 0.838317 0.951607 1. 3. 0.\n", + " 7.62054 0.134474 ]\n", + " [-7.88059 0.911954 0.951607 1. 1. 0.\n", + " 7.89482 0.0987819]]\n" + ] + } + ], "source": [ "print(f'sample as ndarray: {fit.draws().shape}\\nfirst 2 draws, chain 1:\\n{fit.draws()[:2, 0, :]}')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chain__iter__draw__lp__accept_stat__stepsize__treedepth__n_leapfrog__divergent__energy__theta
01.01.01.0-7.318920.8383170.9516071.03.00.07.620540.134474
11.02.02.0-7.880590.9119540.9516071.01.00.07.894820.098782
21.03.03.0-6.803590.9968580.9516072.03.00.07.863000.209948
31.04.04.0-6.748730.9718150.9516072.03.00.06.999500.245320
41.05.05.0-7.129600.8558230.9516072.03.00.07.558700.368130
\n", + "
" + ], + "text/plain": [ + " chain__ iter__ draw__ lp__ accept_stat__ stepsize__ treedepth__ \\\n", + "0 1.0 1.0 1.0 -7.31892 0.838317 0.951607 1.0 \n", + "1 1.0 2.0 2.0 -7.88059 0.911954 0.951607 1.0 \n", + "2 1.0 3.0 3.0 -6.80359 0.996858 0.951607 2.0 \n", + "3 1.0 4.0 4.0 -6.74873 0.971815 0.951607 2.0 \n", + "4 1.0 5.0 5.0 -7.12960 0.855823 0.951607 2.0 \n", + "\n", + " n_leapfrog__ divergent__ energy__ theta \n", + "0 3.0 0.0 7.62054 0.134474 \n", + "1 1.0 0.0 7.89482 0.098782 \n", + "2 3.0 0.0 7.86300 0.209948 \n", + "3 3.0 0.0 6.99950 0.245320 \n", + "4 3.0 0.0 7.55870 0.368130 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fit.draws_pd().head()" ] diff --git a/docsrc/users-guide/hello_world.rst b/docsrc/users-guide/hello_world.rst index 02db629d..3476eb56 100644 --- a/docsrc/users-guide/hello_world.rst +++ b/docsrc/users-guide/hello_world.rst @@ -107,6 +107,7 @@ By default, the `sample` method runs 4 sampler chains. *Note* this model can be fit using other methods ++ the :meth:`~CmdStanModel.pathfinder` method does approximate Bayesian inference and returns a :class:`CmdStanPathfinder` object + the :meth:`~CmdStanModel.variational` method does approximate Bayesian inference and returns a :class:`CmdStanVB` object + the :meth:`~CmdStanModel.optimize` method does maximum likelihood estimation and returns a :class:`CmdStanMLE` object diff --git a/docsrc/users-guide/overview.rst b/docsrc/users-guide/overview.rst index da1490f5..a7957834 100644 --- a/docsrc/users-guide/overview.rst +++ b/docsrc/users-guide/overview.rst @@ -12,7 +12,8 @@ With CmdStanPy, you can: + Exact Bayesian estimation using the `NUTS-HMC sampler `__. - + Approximate Bayesian estimation using `ADVI `__. + + Approximate Bayesian estimation algorithms `Pathfinder `__ + and `ADVI `__. + MAP estimation by `optimization `__. diff --git a/docsrc/users-guide/workflow.rst b/docsrc/users-guide/workflow.rst index 1e934f6d..ebca82cb 100644 --- a/docsrc/users-guide/workflow.rst +++ b/docsrc/users-guide/workflow.rst @@ -163,8 +163,9 @@ An example of each is provided in the `next section `__. Validate, view, export the inference engine outputs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The inference engine results objects -:class:`CmdStanMCMC`, :class:`CmdStanVB`, :class:`CmdStanMLE` and :class:`CmdStanGQ,` +The inference method-specific results objects +:class:`CmdStanMCMC`, :class:`CmdStanPathfinder`, :class:`CmdStanVB`, +:class:`CmdStanMLE`, and :class:`CmdStanGQ` contain the CmdStan method configuration information and the location of all output files produced. The provide a common set methods for accessing the inference results and metadata, From b90740d389e92759492007386e4fdcc7eea6fe51 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Fri, 22 Sep 2023 20:08:52 -0400 Subject: [PATCH 5/6] code cleanup --- cmdstanpy/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py index c7454a44..a6449da4 100644 --- a/cmdstanpy/model.py +++ b/cmdstanpy/model.py @@ -1883,7 +1883,7 @@ def laplace_sample( or as the path of a data file in JSON or Rdump format. :param mode: The mode around which to place the approximation, either - + * A :class:`CmdStanMLE` object * A path to a CSV file containing the output of an optimization run. * ``None`` - use default optimizer settings and/or any ``opt_args``. From 3ba5f117a18c649be97cbd8b9c104ae54a45f6b8 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Mon, 25 Sep 2023 17:43:40 -0400 Subject: [PATCH 6/6] changes per docs review --- docsrc/conf.py | 8 +- docsrc/internal_api.rst | 7 + .../users-guide/examples/MCMC Sampling.ipynb | 695 ++++++++++++------ docsrc/users-guide/examples/Pathfinder.ipynb | 36 +- .../examples/VI as Sampler Inits.ipynb | 155 +++- 5 files changed, 674 insertions(+), 227 deletions(-) diff --git a/docsrc/conf.py b/docsrc/conf.py index 484e19a8..40c6ae27 100644 --- a/docsrc/conf.py +++ b/docsrc/conf.py @@ -133,7 +133,7 @@ def emit(self, record): # General information about the project. project = 'CmdStanPy' -copyright = '2022, Stan Development Team' +copyright = '2023, Stan Development Team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -399,8 +399,10 @@ def emit(self, record): "DataFrame": "~pandas.DataFrame", "CmdStanMCMC": "~cmdstanpy.CmdStanMCMC", "CmdStanMLE": "~cmdstanpy.CmdStanMLE", - "CmdStanMCVB": "~cmdstanpy.CmdStanMCVB", - "CmdStanMCGQ": "~cmdstanpy.CmdStanMCGQ", + "CmdStanVB": "~cmdstanpy.CmdStanVB", + "CmdStanGQ": "~cmdstanpy.CmdStanGQ", + "CmdStanLaplace": "~cmdstanpy.CmdStanLaplace", + "CmdStanPathfinder": "~cmdstanpy.CmdStanPathfinder" } nbsphinx_allow_errors = False diff --git a/docsrc/internal_api.rst b/docsrc/internal_api.rst index 20e63582..7d88ca70 100644 --- a/docsrc/internal_api.rst +++ b/docsrc/internal_api.rst @@ -36,6 +36,7 @@ CmdStanArgs .. autoclass:: cmdstanpy.cmdstan_args.CmdStanArgs :members: + SamplerArgs =========== @@ -48,6 +49,12 @@ OptimizeArgs .. autoclass:: cmdstanpy.cmdstan_args.OptimizeArgs :members: +LaplaceArgs +=========== + +.. autoclass:: cmdstanpy.cmdstan_args.LaplaceArgs + :members: + PathfinderArgs ============== diff --git a/docsrc/users-guide/examples/MCMC Sampling.ipynb b/docsrc/users-guide/examples/MCMC Sampling.ipynb index 4808a441..1a659a20 100644 --- a/docsrc/users-guide/examples/MCMC Sampling.ipynb +++ b/docsrc/users-guide/examples/MCMC Sampling.ipynb @@ -64,6 +64,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -72,60 +73,10 @@ "\n", "CmdStanPy displays progress bars during sampling via use of package [tqdm](https://github.com/tqdm/tqdm).\n", "In order for these to display properly in a Jupyter notebook, you must have the \n", - "[ipywidgets](https://ipywidgets.readthedocs.io/en/latest/index.html) package installed,\n", - "and depending on your version of Jupyter or JupyterLab, you must enable it via command:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]\n", - " [--paths] [--json] [--debug]\n", - " [subcommand]\n", - "\n", - "Jupyter: Interactive Computing\n", - "\n", - "positional arguments:\n", - " subcommand the subcommand to launch\n", - "\n", - "optional arguments:\n", - " -h, --help show this help message and exit\n", - " --version show the versions of core jupyter packages and exit\n", - " --config-dir show Jupyter config dir\n", - " --data-dir show Jupyter data dir\n", - " --runtime-dir show Jupyter runtime dir\n", - " --paths show all Jupyter paths. Add --json for machine-readable\n", - " format.\n", - " --json output paths as machine-readable json\n", - " --debug output debug information about paths\n", - "\n", - "Available subcommands: dejavu events execute kernel kernelspec lab\n", - "labextension labhub migrate nbconvert run server troubleshoot trust\n", - "\n", - "Jupyter command `jupyter-nbextension` not found.\n" - ] - } - ], - "source": [ - "!jupyter nbextension enable --py widgetsnbextension" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "[ipywidgets](https://ipywidgets.readthedocs.io/en/latest/index.html) package installed.\n", "For more information, see the the\n", "[installation instructions](https://ipywidgets.readthedocs.io/en/latest/user_install.html#), \n", - "also [this tqdm GitHub issue](https://github.com/tqdm/tqdm/issues/394#issuecomment-384743637).\n", - "\n", - "\n", - " " + "also [this tqdm GitHub issue](https://github.com/tqdm/tqdm/issues/394#issuecomment-384743637)." ] }, { @@ -144,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -166,20 +117,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "17:37:25 - cmdstanpy - INFO - CmdStan start processing\n" + "16:17:55 - cmdstanpy - INFO - CmdStan start processing\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f0d740bc35774bb59e63a11e01290851", + "model_id": "9f05124c597846069d49180ff209d8b6", "version_major": 2, "version_minor": 0 }, @@ -193,7 +144,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bf042311b4fb4994a4357832bcd2bf1d", + "model_id": "041081927b1644c6aa6d0d8b1c2195a0", "version_major": 2, "version_minor": 0 }, @@ -207,7 +158,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "62f7533de2e644a38a35e933bdfcd9e3", + "model_id": "040a168589d940cf9db02d038144de57", "version_major": 2, "version_minor": 0 }, @@ -221,7 +172,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f6b35c2c78c24aee93d088a1c956dbcc", + "model_id": "d24695e07cbe44628dd87ce656daa26a", "version_major": 2, "version_minor": 0 }, @@ -243,7 +194,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37:25 - cmdstanpy - INFO - CmdStan done processing.\n" + "16:17:56 - cmdstanpy - INFO - CmdStan done processing.\n" ] }, { @@ -271,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": { "scrolled": true }, @@ -281,18 +232,18 @@ "text/plain": [ "CmdStanMCMC: model=bernoulli chains=4['method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']\n", " csv_files:\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_1.csv\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_2.csv\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_3.csv\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_4.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_1.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_2.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_3.csv\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_4.csv\n", " output_files:\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_0-stdout.txt\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_1-stdout.txt\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_2-stdout.txt\n", - "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoullicweszbso/bernoulli-20230922173725_3-stdout.txt" + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_0-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_1-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_2-stdout.txt\n", + "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoulli1_4b9oav/bernoulli-20230925161756_3-stdout.txt" ] }, - "execution_count": 4, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -303,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -341,20 +292,20 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "17:37:27 - cmdstanpy - INFO - CmdStan start processing\n" + "16:17:57 - cmdstanpy - INFO - CmdStan start processing\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ee12fa3bf16845ba8cfa3a376ee328a2", + "model_id": "1aeb2ebfa927474e9805898637b15c69", "version_major": 2, "version_minor": 0 }, @@ -368,7 +319,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "503c539d798e477695a57f0c934ba0b1", + "model_id": "bccbe3c795f046c791a5e317a6b83376", "version_major": 2, "version_minor": 0 }, @@ -382,7 +333,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "90a432b9392a46a5ae57ea38606acb2f", + "model_id": "a662703931e649d3866363a1e2e5b201", "version_major": 2, "version_minor": 0 }, @@ -396,7 +347,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bdd07b318c4246148c233ae31ce0f0c7", + "model_id": "a4cac644b61b47d8bb99aa5041022cf7", "version_major": 2, "version_minor": 0 }, @@ -418,7 +369,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37:29 - cmdstanpy - INFO - CmdStan done processing.\n" + "16:17:59 - cmdstanpy - INFO - CmdStan done processing.\n" ] }, { @@ -444,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": { "tags": [] }, @@ -453,10 +404,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37:31 - cmdstanpy - INFO - Chain [1] start processing\n", - "17:37:31 - cmdstanpy - INFO - Chain [1] done processing\n", - "17:37:31 - cmdstanpy - INFO - Chain [2] start processing\n", - "17:37:31 - cmdstanpy - INFO - Chain [2] done processing\n" + "16:18:00 - cmdstanpy - INFO - Chain [1] start processing\n", + "16:18:00 - cmdstanpy - INFO - Chain [1] done processing\n", + "16:18:00 - cmdstanpy - INFO - Chain [2] start processing\n", + "16:18:00 - cmdstanpy - INFO - Chain [2] done processing\n" ] }, { @@ -493,9 +444,9 @@ "Chain [1] file = bernoulli.data.json\n", "Chain [1] init = 2 (Default)\n", "Chain [1] random\n", - "Chain [1] seed = 75498\n", + "Chain [1] seed = 42783\n", "Chain [1] output\n", - "Chain [1] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoulliffqp_qzu/bernoulli-20230922173731_1.csv\n", + "Chain [1] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoullibjig91zd/bernoulli-20230925161800_1.csv\n", "Chain [1] diagnostic_file = (Default)\n", "Chain [1] refresh = 100 (Default)\n", "Chain [1] sig_figs = -1 (Default)\n", @@ -566,9 +517,9 @@ "Chain [2] file = bernoulli.data.json\n", "Chain [2] init = 2 (Default)\n", "Chain [2] random\n", - "Chain [2] seed = 75498\n", + "Chain [2] seed = 42783\n", "Chain [2] output\n", - "Chain [2] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/bernoulliffqp_qzu/bernoulli-20230922173731_2.csv\n", + "Chain [2] file = /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/bernoullibjig91zd/bernoulli-20230925161800_2.csv\n", "Chain [2] diagnostic_file = (Default)\n", "Chain [2] refresh = 100 (Default)\n", "Chain [2] sig_figs = -1 (Default)\n", @@ -654,7 +605,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -694,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -734,22 +685,20 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "17:37:31 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", - "17:37:40 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/eight_schools\n", - "17:37:40 - cmdstanpy - INFO - CmdStan start processing\n" + "16:18:01 - cmdstanpy - INFO - CmdStan start processing\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "72f46b1ff85d4b0685cbc6379f655aa1", + "model_id": "ffbfef209eb84b64b01cf9c18b42514e", "version_major": 2, "version_minor": 0 }, @@ -763,7 +712,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e48bd901ace04b69a006a362cdaba14d", + "model_id": "d66ab35ce0d54be48ab0aa75da85dccf", "version_major": 2, "version_minor": 0 }, @@ -777,7 +726,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "31e4d838ecd04603b39ae9de97371bdd", + "model_id": "b138cfcf18334ba88e09e5f2debedd29", "version_major": 2, "version_minor": 0 }, @@ -791,7 +740,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7716552fa3a943619a6be38a50d0a064", + "model_id": "89e81384e9c14e56a1cb233a8f6f0d27", "version_major": 2, "version_minor": 0 }, @@ -813,8 +762,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37:41 - cmdstanpy - INFO - CmdStan done processing.\n", - "17:37:41 - cmdstanpy - WARNING - Some chains may have failed to converge.\n", + "16:18:01 - cmdstanpy - INFO - CmdStan done processing.\n", + "16:18:01 - cmdstanpy - WARNING - Some chains may have failed to converge.\n", "\tChain 1 had 10 divergent transitions (1.0%)\n", "\tChain 2 had 143 divergent transitions (14.3%)\n", "\tChain 3 had 5 divergent transitions (0.5%)\n", @@ -847,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -876,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": { "scrolled": true }, @@ -923,7 +872,7 @@ " -17.71920\n", " -5.82214\n", " 27.72280\n", - " 40.17800\n", + " 37.87270\n", " 1.12057\n", " \n", " \n", @@ -935,7 +884,7 @@ " 6.98708\n", " 16.39460\n", " 224.43000\n", - " 325.26100\n", + " 306.59800\n", " 1.03328\n", " \n", " \n", @@ -947,7 +896,7 @@ " 9.67094\n", " 27.12670\n", " 153.45500\n", - " 222.39800\n", + " 209.63800\n", " 1.03967\n", " \n", " \n", @@ -959,7 +908,7 @@ " 6.77458\n", " 18.33640\n", " 804.49900\n", - " 1165.94000\n", + " 1099.04000\n", " 1.01501\n", " \n", " \n", @@ -971,7 +920,7 @@ " 5.20094\n", " 17.52600\n", " 1106.33000\n", - " 1603.37000\n", + " 1511.38000\n", " 1.00842\n", " \n", " \n", @@ -983,7 +932,7 @@ " 6.58401\n", " 18.50130\n", " 696.79800\n", - " 1009.85000\n", + " 951.91000\n", " 1.01756\n", " \n", " \n", @@ -995,7 +944,7 @@ " 4.04537\n", " 14.53360\n", " 1006.39000\n", - " 1458.54000\n", + " 1374.85000\n", " 1.00407\n", " \n", " \n", @@ -1007,7 +956,7 @@ " 5.30265\n", " 16.48680\n", " 1156.63000\n", - " 1676.27000\n", + " 1580.09000\n", " 1.00729\n", " \n", " \n", @@ -1019,7 +968,7 @@ " 9.17402\n", " 23.36110\n", " 127.80000\n", - " 185.21800\n", + " 174.59100\n", " 1.03588\n", " \n", " \n", @@ -1031,7 +980,7 @@ " 7.00731\n", " 21.58680\n", " 760.99400\n", - " 1102.89000\n", + " 1039.61000\n", " 1.01463\n", " \n", " \n", @@ -1043,7 +992,7 @@ " 5.21419\n", " 17.68580\n", " 107.35287\n", - " 155.58387\n", + " 146.65692\n", " 1.04858\n", " \n", " \n", @@ -1065,20 +1014,20 @@ "tau 6.60333 0.557360 5.77492 1.019850 5.21419 17.68580 \n", "\n", " N_Eff N_Eff/s R_hat \n", - "lp__ 27.72280 40.17800 1.12057 \n", - "mu 224.43000 325.26100 1.03328 \n", - "theta[1] 153.45500 222.39800 1.03967 \n", - "theta[2] 804.49900 1165.94000 1.01501 \n", - "theta[3] 1106.33000 1603.37000 1.00842 \n", - "theta[4] 696.79800 1009.85000 1.01756 \n", - "theta[5] 1006.39000 1458.54000 1.00407 \n", - "theta[6] 1156.63000 1676.27000 1.00729 \n", - "theta[7] 127.80000 185.21800 1.03588 \n", - "theta[8] 760.99400 1102.89000 1.01463 \n", - "tau 107.35287 155.58387 1.04858 " + "lp__ 27.72280 37.87270 1.12057 \n", + "mu 224.43000 306.59800 1.03328 \n", + "theta[1] 153.45500 209.63800 1.03967 \n", + "theta[2] 804.49900 1099.04000 1.01501 \n", + "theta[3] 1106.33000 1511.38000 1.00842 \n", + "theta[4] 696.79800 951.91000 1.01756 \n", + "theta[5] 1006.39000 1374.85000 1.00407 \n", + "theta[6] 1156.63000 1580.09000 1.00729 \n", + "theta[7] 127.80000 174.59100 1.03588 \n", + "theta[8] 760.99400 1039.61000 1.01463 \n", + "tau 107.35287 146.65692 1.04858 " ] }, - "execution_count": 12, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1098,14 +1047,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Processing csv files: /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_1.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_2.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_3.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp1kzqmyhn/eight_schools_svuyncu/eight_schools-20230922173740_4.csv\n", + "Processing csv files: /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/eight_schoolsz56j88wq/eight_schools-20230925161801_1.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/eight_schoolsz56j88wq/eight_schools-20230925161801_2.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/eight_schoolsz56j88wq/eight_schools-20230925161801_3.csv, /var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp0w4tqjfy/eight_schoolsz56j88wq/eight_schools-20230925161801_4.csv\n", "\n", "Checking sampler transitions treedepth.\n", "6 of 4000 (0.15%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.\n", @@ -1144,20 +1093,20 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "17:37:41 - cmdstanpy - INFO - CmdStan start processing\n" + "16:18:09 - cmdstanpy - INFO - CmdStan start processing\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "238ba9344948495c804c0252670bdc0f", + "model_id": "6135a0a7681d41e28c3f83188acb6f2e", "version_major": 2, "version_minor": 0 }, @@ -1171,7 +1120,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9e6f7f62d204450780262b047d718c19", + "model_id": "88fad8d6b2314633a96698e841380b00", "version_major": 2, "version_minor": 0 }, @@ -1185,7 +1134,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b52eb94eb7534d8c92ba4b5549db8b01", + "model_id": "162d9eec51774f55b92d5e92da31bf1a", "version_major": 2, "version_minor": 0 }, @@ -1199,7 +1148,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "99e31e6382a5407b941c3c21fe1359df", + "model_id": "06c665ed993b4ad5ba2862bb9e444992", "version_major": 2, "version_minor": 0 }, @@ -1221,7 +1170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37:41 - cmdstanpy - INFO - CmdStan done processing.\n" + "16:18:09 - cmdstanpy - INFO - CmdStan done processing.\n" ] }, { @@ -1248,14 +1197,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[0.134474 0.0987819 0.209948 ... 0.211388 0.178338 0.336799 ]\n" + "[0.187637 0.167524 0.454131 ... 0.506709 0.14154 0.333167]\n" ] } ], @@ -1272,7 +1221,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1290,19 +1239,24 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "metadata": {}, "outputs": [ { - "ename": "RuntimeError", - "evalue": "Package \"xarray\" is not installed, cannot produce draws array.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mfit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraws_xr\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtheta\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/github/stan-dev/cmdstanpy/cmdstanpy/stanfit/mcmc.py:663\u001b[0m, in \u001b[0;36mCmdStanMCMC.draws_xr\u001b[0;34m(self, vars, inc_warmup)\u001b[0m\n\u001b[1;32m 647\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 648\u001b[0m \u001b[38;5;124;03mReturns the sampler draws as a xarray Dataset.\u001b[39;00m\n\u001b[1;32m 649\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;124;03mCmdStanGQ.draws_xr\u001b[39;00m\n\u001b[1;32m 661\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m XARRAY_INSTALLED:\n\u001b[0;32m--> 663\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 664\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPackage \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxarray\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not installed, cannot produce draws array.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 665\u001b[0m )\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inc_warmup \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_warmup:\n\u001b[1;32m 667\u001b[0m get_logger()\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 668\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDraws from warmup iterations not available,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 669\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must run sampler with \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msave_warmup=True\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 670\u001b[0m )\n", - "\u001b[0;31mRuntimeError\u001b[0m: Package \"xarray\" is not installed, cannot produce draws array." + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dimensions: (draw: 1000, chain: 4)\n", + "Coordinates:\n", + " * chain (chain) int64 1 2 3 4\n", + " * draw (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999\n", + "Data variables:\n", + " theta (chain, draw) float64 0.1876 0.1675 0.4541 ... 0.5067 0.1415 0.3332\n", + "Attributes:\n", + " stan_version: 2.33.0\n", + " model: bernoulli_model\n", + " num_draws_sampling: 1000\n" ] } ], @@ -1321,7 +1275,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as x" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1330,10 +1293,10 @@ "text": [ "sample as ndarray: (1000, 4, 8)\n", "first 2 draws, chain 1:\n", - "[[-7.31892 0.838317 0.951607 1. 3. 0.\n", - " 7.62054 0.134474 ]\n", - " [-7.88059 0.911954 0.951607 1. 1. 0.\n", - " 7.89482 0.0987819]]\n" + "[[-6.89001 0.981831 0.870591 1. 1. 0. 6.89017\n", + " 0.187637]\n", + " [-7.01004 0.849367 0.870591 2. 3. 0. 8.19459\n", + " 0.167524]]\n" ] } ], @@ -1343,7 +1306,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1386,70 +1349,70 @@ " 1.0\n", " 1.0\n", " 1.0\n", - " -7.31892\n", - " 0.838317\n", - " 0.951607\n", + " -6.89001\n", + " 0.981831\n", + " 0.870591\n", + " 1.0\n", " 1.0\n", - " 3.0\n", " 0.0\n", - " 7.62054\n", - " 0.134474\n", + " 6.89017\n", + " 0.187637\n", " \n", " \n", " 1\n", " 1.0\n", " 2.0\n", " 2.0\n", - " -7.88059\n", - " 0.911954\n", - " 0.951607\n", - " 1.0\n", - " 1.0\n", + " -7.01004\n", + " 0.849367\n", + " 0.870591\n", + " 2.0\n", + " 3.0\n", " 0.0\n", - " 7.89482\n", - " 0.098782\n", + " 8.19459\n", + " 0.167524\n", " \n", " \n", " 2\n", " 1.0\n", " 3.0\n", " 3.0\n", - " -6.80359\n", - " 0.996858\n", - " 0.951607\n", - " 2.0\n", + " -7.81650\n", + " 0.825109\n", + " 0.870591\n", + " 1.0\n", " 3.0\n", " 0.0\n", - " 7.86300\n", - " 0.209948\n", + " 8.77706\n", + " 0.454131\n", " \n", " \n", " 3\n", " 1.0\n", " 4.0\n", " 4.0\n", - " -6.74873\n", - " 0.971815\n", - " 0.951607\n", + " -6.78960\n", + " 1.000000\n", + " 0.870591\n", " 2.0\n", " 3.0\n", " 0.0\n", - " 6.99950\n", - " 0.245320\n", + " 7.79596\n", + " 0.215159\n", " \n", " \n", " 4\n", " 1.0\n", " 5.0\n", " 5.0\n", - " -7.12960\n", - " 0.855823\n", - " 0.951607\n", + " -7.21778\n", + " 0.940115\n", + " 0.870591\n", " 2.0\n", " 3.0\n", " 0.0\n", - " 7.55870\n", - " 0.368130\n", + " 7.32770\n", + " 0.143557\n", " \n", " \n", "\n", @@ -1457,21 +1420,21 @@ ], "text/plain": [ " chain__ iter__ draw__ lp__ accept_stat__ stepsize__ treedepth__ \\\n", - "0 1.0 1.0 1.0 -7.31892 0.838317 0.951607 1.0 \n", - "1 1.0 2.0 2.0 -7.88059 0.911954 0.951607 1.0 \n", - "2 1.0 3.0 3.0 -6.80359 0.996858 0.951607 2.0 \n", - "3 1.0 4.0 4.0 -6.74873 0.971815 0.951607 2.0 \n", - "4 1.0 5.0 5.0 -7.12960 0.855823 0.951607 2.0 \n", + "0 1.0 1.0 1.0 -6.89001 0.981831 0.870591 1.0 \n", + "1 1.0 2.0 2.0 -7.01004 0.849367 0.870591 2.0 \n", + "2 1.0 3.0 3.0 -7.81650 0.825109 0.870591 1.0 \n", + "3 1.0 4.0 4.0 -6.78960 1.000000 0.870591 2.0 \n", + "4 1.0 5.0 5.0 -7.21778 0.940115 0.870591 2.0 \n", "\n", " n_leapfrog__ divergent__ energy__ theta \n", - "0 3.0 0.0 7.62054 0.134474 \n", - "1 1.0 0.0 7.89482 0.098782 \n", - "2 3.0 0.0 7.86300 0.209948 \n", - "3 3.0 0.0 6.99950 0.245320 \n", - "4 3.0 0.0 7.55870 0.368130 " + "0 1.0 0.0 6.89017 0.187637 \n", + "1 3.0 0.0 8.19459 0.167524 \n", + "2 3.0 0.0 8.77706 0.454131 \n", + "3 3.0 0.0 7.79596 0.215159 \n", + "4 3.0 0.0 7.32770 0.143557 " ] }, - "execution_count": 19, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1489,9 +1452,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: lp__, shape: (1000, 4)\n", + "name: accept_stat__, shape: (1000, 4)\n", + "name: stepsize__, shape: (1000, 4)\n", + "name: treedepth__, shape: (1000, 4)\n", + "name: n_leapfrog__, shape: (1000, 4)\n", + "name: divergent__, shape: (1000, 4)\n", + "name: energy__, shape: (1000, 4)\n" + ] + } + ], "source": [ "for k, v in fit.method_variables().items():\n", " print(f'name: {k}, shape: {v.shape}')" @@ -1506,9 +1483,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "adapted step_size per chain\n", + "[0.870591 0.97456 1.00695 0.975628]\n", + "metric_type: diag_e\n", + "metric:\n", + "[[0.557946]\n", + " [0.469587]\n", + " [0.567582]\n", + " [0.467572]]\n" + ] + } + ], "source": [ "print(f'adapted step_size per chain\\n{fit.step_size}\\nmetric_type: {fit.metric_type}\\nmetric:\\n{fit.metric}')" ] @@ -1522,9 +1514,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sample method variables:\n", + "dict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])\n", + "\n", + "stan model variables:\n", + "dict_keys(['theta'])\n" + ] + } + ], "source": [ "print('sample method variables:\\n{}\\n'.format(fit.metadata.method_vars.keys()))\n", "print('stan model variables:\\n{}'.format(fit.metadata.stan_vars.keys()))" @@ -1558,9 +1562,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:18:21 - cmdstanpy - WARNING - CmdStanModel(compile=...) is deprecated and will be removed in the next major version. The constructor will always ensure a model has a compiled executable.\n", + "If you wish to force recompilation, use force_compile=True instead.\n", + "16:18:21 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n", + "16:18:30 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n" + ] + }, + { + "data": { + "text/plain": [ + "{'stan_version_major': '2',\n", + " 'stan_version_minor': '33',\n", + " 'stan_version_patch': '0',\n", + " 'STAN_THREADS': 'true',\n", + " 'STAN_MPI': 'false',\n", + " 'STAN_OPENCL': 'false',\n", + " 'STAN_NO_RANGE_CHECKS': 'false',\n", + " 'STAN_CPP_OPTIMS': 'false'}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model = CmdStanModel(stan_file='bernoulli.stan',\n", " cpp_options={'STAN_THREADS': 'TRUE'},\n", @@ -1591,9 +1623,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:18:30 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "14ffefd5499e40b982f50233584783ce", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c23ec7d12e1a45f8a01ae88133616501", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a8d8be6bda8547649f134d342615dd87", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "aabbfef36fe04a11bc27d5947afe28dc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:18:30 - cmdstanpy - INFO - CmdStan done processing.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "fit = model.sample(data='bernoulli.data.json', parallel_chains=4)" ] @@ -1618,9 +1735,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "functions {\n", + " real partial_sum(array[] int slice_n_redcards, int start, int end,\n", + " array[] int n_games, vector rating, vector beta) {\n", + " return binomial_logit_lpmf(slice_n_redcards | n_games[start : end], beta[1]\n", + " + beta[2]\n", + " * rating[start : end]);\n", + " }\n", + "}\n", + "data {\n", + " int N;\n", + " array[N] int n_redcards;\n", + " array[N] int n_games;\n", + " vector[N] rating;\n", + " int grainsize;\n", + "}\n", + "parameters {\n", + " vector[2] beta;\n", + "}\n", + "model {\n", + " beta[1] ~ normal(0, 10);\n", + " beta[2] ~ normal(0, 1);\n", + " \n", + " target += reduce_sum(partial_sum, n_redcards, grainsize, n_games, rating,\n", + " beta);\n", + "}\n", + "\n", + "\n" + ] + } + ], "source": [ "with open('redcard_reduce_sum.stan', 'r') as fd:\n", " print(fd.read())" @@ -1635,9 +1786,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:18:30 - cmdstanpy - WARNING - CmdStanModel(compile=...) is deprecated and will be removed in the next major version. The constructor will always ensure a model has a compiled executable.\n", + "If you wish to force recompilation, use force_compile=True instead.\n", + "16:18:30 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum\n", + "16:18:38 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/redcard_reduce_sum\n" + ] + }, + { + "data": { + "text/plain": [ + "{'stan_version_major': '2',\n", + " 'stan_version_minor': '33',\n", + " 'stan_version_patch': '0',\n", + " 'STAN_THREADS': 'true',\n", + " 'STAN_MPI': 'false',\n", + " 'STAN_OPENCL': 'false',\n", + " 'STAN_NO_RANGE_CHECKS': 'false',\n", + " 'STAN_CPP_OPTIMS': 'false'}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "redcard_model = CmdStanModel(stan_file='redcard_reduce_sum.stan',\n", " cpp_options={'STAN_THREADS': 'TRUE'},\n", @@ -1654,9 +1833,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:18:38 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4d2a9c60d4a947159bb5bbd42a385438", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0a5bc048a9e046b98ce91dd4a108bd4b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dfe3e157af1b41d9a21a6fee769a0d88", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1374d6284f2e45379a536ea3b2f3b7cb", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "16:19:23 - cmdstanpy - INFO - CmdStan done processing.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "redcard_fit = redcard_model.sample(data='redcard.json', threads_per_chain=4)" ] @@ -1674,9 +1938,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'16'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "os.environ['STAN_NUM_THREADS']" ] diff --git a/docsrc/users-guide/examples/Pathfinder.ipynb b/docsrc/users-guide/examples/Pathfinder.ipynb index efbd48a6..e5f3c564 100644 --- a/docsrc/users-guide/examples/Pathfinder.ipynb +++ b/docsrc/users-guide/examples/Pathfinder.ipynb @@ -44,9 +44,15 @@ "outputs": [], "source": [ "import os\n", - "from cmdstanpy.model import CmdStanModel\n", - "from cmdstanpy.utils import cmdstan_path\n", - "\n", + "from cmdstanpy.model import CmdStanModel, cmdstan_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "bernoulli_dir = os.path.join(cmdstan_path(), 'examples', 'bernoulli')\n", "stan_file = os.path.join(bernoulli_dir, 'bernoulli.stan')\n", "data_file = os.path.join(bernoulli_dir, 'bernoulli.data.json')\n", @@ -114,7 +120,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The method [create_inits](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.create_inits) returns a Python Dict containing a set of per-chain initializations for the model parameters. Each set of initializations is a random draw from the Pathfinder sample." + "### Pathfinders as initialization for the MCMC sampler\n", + "\n", + "The method [create_inits](https://mc-stan.org/cmdstanpy/api.html#cmdstanpy.CmdStanPathfinder.create_inits) returns a Python Dict containing a set of per-chain initializations for the model parameters. Each set of initializations is a random draw from the Pathfinder sample. These initializations can be used as the initial parameter values for Stan's NUTS-HMC sampler, which will reduce the number of warmup iterations needed." ] }, { @@ -126,6 +134,26 @@ "inits = pathfinder.create_inits()\n", "print(inits)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `create_inits` takes two arguments:\n", + "\n", + "* `seed` - used for random selection.\n", + "* `chains` - the number of draws to return, default is 4. This should match the number of sampler chains to run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inits = pathfinder.create_inits(chains=3)\n", + "print(inits)" + ] } ], "metadata": { diff --git a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb index 2e2a2bca..4223a279 100644 --- a/docsrc/users-guide/examples/VI as Sampler Inits.ipynb +++ b/docsrc/users-guide/examples/VI as Sampler Inits.ipynb @@ -6,9 +6,11 @@ "source": [ "## Using Variational Estimates to Initialize the NUTS-HMC Sampler\n", "\n", - "In this example we show how to use the parameter estimates return by Stan's variational inference algorithm\n", + "In this example we show how to use the parameter estimates return by Stan's variational inference algorithms\n", + "[pathfinder ](https://mc-stan.org/docs/cmdstan-guide/pathfinder-config.html) and \n", + "[ADVI ](https://mc-stan.org/docs/cmdstan-guide/variational-config.html) \n", "as the initial parameter values for Stan's NUTS-HMC sampler.\n", - "By default, the sampler algorithm randomly initializes all model parameters in the range uniform\\[-2, 2\\]. When the true parameter value is outside of this range, starting from the ADVI estimates will speed up and improve adaptation.\n", + "By default, the sampler algorithm randomly initializes all model parameters in the range uniform\\[-2, 2\\]. When the true parameter value is outside of this range, starting from the estimates from Pathfinder and ADVI will speed up and improve adaptation.\n", "\n", "### Model and data\n", "\n", @@ -23,9 +25,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data {\n", + " int N;\n", + " int D;\n", + " matrix[N, D] X;\n", + " vector[N] y;\n", + "}\n", + "parameters {\n", + " vector[D] beta;\n", + " real sigma;\n", + "}\n", + "model {\n", + " // prior\n", + " target += normal_lpdf(beta | 0, 10);\n", + " target += normal_lpdf(sigma | 0, 10);\n", + " // likelihood\n", + " target += normal_lpdf(y | X * beta, sigma);\n", + "}\n", + "\n", + "\n" + ] + } + ], "source": [ "import os\n", "from cmdstanpy import CmdStanModel\n", @@ -62,9 +90,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:01:02 - cmdstanpy - INFO - Chain [1] start processing\n", + "17:01:02 - cmdstanpy - INFO - Chain [1] done processing\n" + ] + } + ], "source": [ "pathfinder_fit = model.pathfinder(data=data_file, seed=123)" ] @@ -83,9 +120,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'beta': array([0.996649, 0.999455, 1.00093 , 0.99873 , 1.00207 ]), 'sigma': array(0.934232)}, {'beta': array([1.00016 , 0.998764, 1.00055 , 1.00212 , 1.00047 ]), 'sigma': array(1.04441)}, {'beta': array([1.00139 , 0.997917, 1.00134 , 1.00123 , 1.00116 ]), 'sigma': array(0.946814)}, {'beta': array([0.999491, 0.999225, 1.00114 , 0.999147, 0.998943]), 'sigma': array(0.977812)}]\n" + ] + } + ], "source": [ "pathfinder_inits = pathfinder_fit.create_inits()\n", "print(pathfinder_inits)" @@ -93,9 +138,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:01:05 - cmdstanpy - INFO - CmdStan start processing\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d8a75128e05e4cf88f037897a38d0173", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 1 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3dbf5f498c5a47a889b0b5229d200ac4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 2 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "82aa8eb3e89a4d55852aaefd0cbe856e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 3 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5b1e5ff5b1914fefa8aed58b19dff966", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "chain 4 | | 00:00 Status" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17:01:05 - cmdstanpy - INFO - CmdStan done processing.\n", + "17:01:05 - cmdstanpy - WARNING - Non-fatal error during sampling:\n", + "Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/blr.stan', line 16, column 2 to column 45)\n", + "Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/blr.stan', line 16, column 2 to column 45)\n", + "Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/blr.stan', line 16, column 2 to column 45)\n", + "Consider re-running with show_console=True if the above output is unclear!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "mcmc_pathfinder_inits_fit = model.sample(\n", " data=data_file, inits=pathfinder_inits, iter_warmup=75, seed=12345\n",