Skip to content

Commit

Permalink
tested code in .rst files via notebooks, corrected accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
mitzimorris committed Oct 15, 2019
1 parent 02bfab0 commit c2aa91b
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 170 deletions.
58 changes: 56 additions & 2 deletions docs/notebooks/MCMC Sampling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fitting a model to data\n",
"\n",
"In this example we use the CmdStan example model\n",
"[bernoulli.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli.stan)\n",
"and data file\n",
Expand Down Expand Up @@ -56,9 +58,61 @@
"\n",
"bern_fit = bernoulli_model.sample(data=bernoulli_data)\n",
"bern_fit.sample.shape\n",
"bern_fit.summary()\n",
" "
"bern_fit.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Running a data-generating model using `fixed_param=True`\n",
"\n",
"In this example we use the CmdStan example model\n",
"[bernoulli_datagen.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli_datagen.stan)\n",
"to generate a simulated dataset given fixed data values."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"datagen_stan = os.path.join('..', '..', 'test', 'data', 'bernoulli_datagen.stan')\n",
"datagen_model = Model(stan_file=datagen_stan)\n",
"datagen_model.compile()\n",
"\n",
"sim_data = datagen_model.sample(fixed_param=True)\n",
"\n",
"sim_data.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compute, plot histogram of total successes for `N` Bernoulli trials with chance of success `theta`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drawset_pd = sim_data.get_drawset()\n",
"drawset_pd.columns\n",
"y_sims = drawset_pd.drop(columns=['lp__', 'accept_stat__'])\n",
"y_sums = y_sims.sum(axis=1)\n",
"y_sums.astype('int32').plot.hist(range(0,datagen_data['N']+1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
7 changes: 7 additions & 0 deletions docs/notebooks/Maximum Likelihood Estimation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@
"print(mle.column_names)\n",
"print(mle.optimized_params_dict)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
161 changes: 14 additions & 147 deletions docs/notebooks/Run Generated Quantities.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,37 +49,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:cmdstanpy:compiling c++\n",
"INFO:cmdstanpy:compiled model file: /Users/mitzi/.cmdstanpy/cmdstan-2.20.0/examples/bernoulli/bernoulli\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"data { \n",
" int<lower=0> N; \n",
" int<lower=0,upper=1> y[N];\n",
"} \n",
"parameters {\n",
" real<lower=0,upper=1> theta;\n",
"} \n",
"model {\n",
" theta ~ beta(1,1);\n",
" for (n in 1:N) \n",
" y[n] ~ bernoulli(theta);\n",
"}\n",
"\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"from cmdstanpy import Model, cmdstan_path\n",
Expand All @@ -103,41 +75,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:cmdstanpy:compiling c++\n",
"INFO:cmdstanpy:compiled model file: /Users/mitzi/github/stan-dev/cmdstanpy/docs/notebooks/bernoulli_ppc\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"data { \n",
" int<lower=0> N; \n",
" int<lower=0,upper=1> y[N];\n",
"} \n",
"parameters {\n",
" real<lower=0,upper=1> theta;\n",
"} \n",
"model {\n",
" theta ~ beta(1,1);\n",
" y ~ bernoulli(theta);\n",
"}\n",
"generated quantities {\n",
" int y_rep[N];\n",
" for (n in 1:N)\n",
" y_rep[n] = bernoulli_rng(theta);\n",
"}\n",
"\n"
]
}
],
"outputs": [],
"source": [
"bernoulli_ppc_model = Model(stan_file='bernoulli_ppc.stan')\n",
"bernoulli_ppc_model.compile()\n",
Expand All @@ -153,24 +93,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:cmdstanpy:start chain 1\n",
"INFO:cmdstanpy:start chain 2\n",
"INFO:cmdstanpy:finish chain 1\n",
"INFO:cmdstanpy:finish chain 2\n",
"INFO:cmdstanpy:start chain 3\n",
"INFO:cmdstanpy:start chain 4\n",
"INFO:cmdstanpy:finish chain 3\n",
"INFO:cmdstanpy:finish chain 4\n"
]
}
],
"outputs": [],
"source": [
"# fit the model to the data\n",
"bern_data = os.path.join(bernoulli_dir, 'bernoulli.data.json')\n",
Expand All @@ -186,18 +111,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'N': 10, 'y': [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]}\n",
"mean of y: 0.2\n"
]
}
],
"outputs": [],
"source": [
"import ujson\n",
"import statistics\n",
Expand All @@ -218,24 +134,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:cmdstanpy:start chain 1\n",
"INFO:cmdstanpy:start chain 2\n",
"INFO:cmdstanpy:finish chain 2\n",
"INFO:cmdstanpy:finish chain 1\n",
"INFO:cmdstanpy:start chain 3\n",
"INFO:cmdstanpy:start chain 4\n",
"INFO:cmdstanpy:finish chain 3\n",
"INFO:cmdstanpy:finish chain 4\n"
]
}
],
"outputs": [],
"source": [
"new_quantities = bernoulli_ppc_model.run_generated_quantities(data=bern_data, csv_files=bern_fit.runset.csv_files)"
]
Expand All @@ -249,52 +150,18 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('y_rep.1',\n",
" 'y_rep.2',\n",
" 'y_rep.3',\n",
" 'y_rep.4',\n",
" 'y_rep.5',\n",
" 'y_rep.6',\n",
" 'y_rep.7',\n",
" 'y_rep.8',\n",
" 'y_rep.9',\n",
" 'y_rep.10')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"new_quantities.column_names"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "GENERATED_QUANTITIES",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-9ac900faf457>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnew_quantities\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerated_quantities\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cmdstanpy/stanfit.py\u001b[0m in \u001b[0;36mgenerated_quantities\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 543\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmemory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlikewise\u001b[0m \u001b[0mall\u001b[0m \u001b[0mdraws\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0ma\u001b[0m \u001b[0mchain\u001b[0m \u001b[0mare\u001b[0m \u001b[0mcontiguous\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 544\u001b[0m \"\"\"\n\u001b[0;32m--> 545\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mMethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGENERATED_QUANTITIES\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 546\u001b[0m raise RuntimeError(\n\u001b[1;32m 547\u001b[0m \u001b[0;34m'Bad runset method {}.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/enum.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_member_map_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 346\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 347\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 348\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: GENERATED_QUANTITIES"
]
}
],
"outputs": [],
"source": [
"new_quantities.generated_quantities.shape"
]
Expand Down
31 changes: 15 additions & 16 deletions docs/sample.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ can be used to generate a dataset of simulated data, where each row in the datas

.. code-block::
data {
int<lower=0> N;
real<lower=0,upper=1> theta;
transformed data {
int<lower=0> N = 10;
real<lower=0,upper=1> theta = 0.35;
}
generated quantities {
int y_sim[N];
Expand All @@ -117,6 +117,7 @@ can be used to generate a dataset of simulated data, where each row in the datas
This program doesn't contain parameters or a model block, therefore
we run the sampler without ding any MCMC estimation by
specifying ``fixed_param=True``.
When ``fixed_param=True``, the ``sample`` method only runs 1 chain.
The sampler runs without updating the Markov Chain,
thus the values of all parameters and
transformed parameters are constant across all draws and
Expand All @@ -125,20 +126,18 @@ produced by RNG functions may change.

.. code-block:: python
bern_datagen_stan = os.path.join('test', 'data', 'bernoulli_datagen.stan')
bern_gen_model = Model(stan_file=bern_gen_stan)
bern_gen_model.compile()
bern_datagen_data = { 'N' : 10, 'theta' : 0.33 }
sim_data = bern_gen_model.sample(data=bern_gen_data, fixed_param=True)
datagen_stan = os.path.join('..', '..', 'test', 'data', 'bernoulli_datagen.stan')
datagen_model = Model(stan_file=datagen_stan)
datagen_model.compile()
sim_data = datagen_model.sample(fixed_param=True)
sim_data.summary()
When ``fixed_param=True``, the ``sample`` method only runs 1 chain by default.

Compute statistics on returned drawset
Compute, plot histogram of total successes for `N` Bernoulli trials with chance of success `theta`:

.. code-block:: python
sim_data_summary = sim_data.summary()
thetas_rep = sim_summary['Mean'][1:]
thetas_rep.plot.density()
sample_draw = sim_data.sample[1,0,:]
sample_draw
drawset_pd = sim_data.get_drawset()
drawset_pd.columns
y_sims = drawset_pd.drop(columns=['lp__', 'accept_stat__'])
y_sums = y_sims.sum(axis=1)
y_sums.astype('int32').plot.hist(range(0,datagen_data['N']+1))
8 changes: 3 additions & 5 deletions test/data/bernoulli_datagen.stan
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
data {
int<lower=0> N;
real<lower=0,upper=1> theta;
transformed data {
int<lower=0> N = 10;
real<lower=0,upper=1> theta = 0.35;
}
generated quantities {
int y_sim[N];
real<lower=0,upper=1> theta_rep;
for (n in 1:N)
y_sim[n] = bernoulli_rng(theta);
theta_rep = sum(y) / N;
}

0 comments on commit c2aa91b

Please sign in to comment.