tested code in .rst files via notebooks, corrected accordingly

stan-dev · Oct 15, 2019 · c2aa91b · c2aa91b
1 parent 02bfab0
commit c2aa91b
Show file tree

Hide file tree

Showing 5 changed files with 95 additions and 170 deletions.
diff --git a/docs/notebooks/MCMC Sampling.ipynb b/docs/notebooks/MCMC Sampling.ipynb
@@ -28,6 +28,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Fitting a model to data\n",
+    "\n",
     "In this example we use the CmdStan example model\n",
     "[bernoulli.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli.stan)\n",
     "and data file\n",
@@ -56,9 +58,61 @@
     "\n",
     "bern_fit = bernoulli_model.sample(data=bernoulli_data)\n",
     "bern_fit.sample.shape\n",
-    "bern_fit.summary()\n",
-    "    "
+    "bern_fit.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Running a data-generating model using `fixed_param=True`\n",
+    "\n",
+    "In this example we use the CmdStan example model\n",
+    "[bernoulli_datagen.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli_datagen.stan)\n",
+    "to generate a simulated dataset given fixed data values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datagen_stan = os.path.join('..', '..', 'test', 'data', 'bernoulli_datagen.stan')\n",
+    "datagen_model = Model(stan_file=datagen_stan)\n",
+    "datagen_model.compile()\n",
+    "\n",
+    "sim_data = datagen_model.sample(fixed_param=True)\n",
+    "\n",
+    "sim_data.summary()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compute, plot histogram of total successes for `N` Bernoulli trials with chance of success `theta`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drawset_pd = sim_data.get_drawset()\n",
+    "drawset_pd.columns\n",
+    "y_sims = drawset_pd.drop(columns=['lp__', 'accept_stat__'])\n",
+    "y_sums = y_sims.sum(axis=1)\n",
+    "y_sums.astype('int32').plot.hist(range(0,datagen_data['N']+1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/notebooks/Maximum Likelihood Estimation.ipynb b/docs/notebooks/Maximum Likelihood Estimation.ipynb
@@ -57,6 +57,13 @@
     "print(mle.column_names)\n",
     "print(mle.optimized_params_dict)\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/notebooks/Run Generated Quantities.ipynb b/docs/notebooks/Run Generated Quantities.ipynb
@@ -49,37 +49,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:cmdstanpy:compiling c++\n",
-      "INFO:cmdstanpy:compiled model file: /Users/mitzi/.cmdstanpy/cmdstan-2.20.0/examples/bernoulli/bernoulli\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "data { \n",
-      "  int<lower=0> N; \n",
-      "  int<lower=0,upper=1> y[N];\n",
-      "} \n",
-      "parameters {\n",
-      "  real<lower=0,upper=1> theta;\n",
-      "} \n",
-      "model {\n",
-      "  theta ~ beta(1,1);\n",
-      "  for (n in 1:N) \n",
-      "    y[n] ~ bernoulli(theta);\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "from cmdstanpy import Model, cmdstan_path\n",
@@ -103,41 +75,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:cmdstanpy:compiling c++\n",
-      "INFO:cmdstanpy:compiled model file: /Users/mitzi/github/stan-dev/cmdstanpy/docs/notebooks/bernoulli_ppc\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "data { \n",
-      "  int<lower=0> N; \n",
-      "  int<lower=0,upper=1> y[N];\n",
-      "} \n",
-      "parameters {\n",
-      "  real<lower=0,upper=1> theta;\n",
-      "} \n",
-      "model {\n",
-      "  theta ~ beta(1,1);\n",
-      "  y ~ bernoulli(theta);\n",
-      "}\n",
-      "generated quantities {\n",
-      "  int y_rep[N];\n",
-      "  for (n in 1:N)\n",
-      "    y_rep[n] = bernoulli_rng(theta);\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "bernoulli_ppc_model = Model(stan_file='bernoulli_ppc.stan')\n",
     "bernoulli_ppc_model.compile()\n",
@@ -153,24 +93,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:cmdstanpy:start chain 1\n",
-      "INFO:cmdstanpy:start chain 2\n",
-      "INFO:cmdstanpy:finish chain 1\n",
-      "INFO:cmdstanpy:finish chain 2\n",
-      "INFO:cmdstanpy:start chain 3\n",
-      "INFO:cmdstanpy:start chain 4\n",
-      "INFO:cmdstanpy:finish chain 3\n",
-      "INFO:cmdstanpy:finish chain 4\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# fit the model to the data\n",
     "bern_data = os.path.join(bernoulli_dir, 'bernoulli.data.json')\n",
@@ -186,18 +111,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'N': 10, 'y': [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]}\n",
-      "mean of y: 0.2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import ujson\n",
     "import statistics\n",
@@ -218,24 +134,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:cmdstanpy:start chain 1\n",
-      "INFO:cmdstanpy:start chain 2\n",
-      "INFO:cmdstanpy:finish chain 2\n",
-      "INFO:cmdstanpy:finish chain 1\n",
-      "INFO:cmdstanpy:start chain 3\n",
-      "INFO:cmdstanpy:start chain 4\n",
-      "INFO:cmdstanpy:finish chain 3\n",
-      "INFO:cmdstanpy:finish chain 4\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "new_quantities = bernoulli_ppc_model.run_generated_quantities(data=bern_data, csv_files=bern_fit.runset.csv_files)"
    ]
@@ -249,52 +150,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('y_rep.1',\n",
-       " 'y_rep.2',\n",
-       " 'y_rep.3',\n",
-       " 'y_rep.4',\n",
-       " 'y_rep.5',\n",
-       " 'y_rep.6',\n",
-       " 'y_rep.7',\n",
-       " 'y_rep.8',\n",
-       " 'y_rep.9',\n",
-       " 'y_rep.10')"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "new_quantities.column_names"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "GENERATED_QUANTITIES",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-7-9ac900faf457>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnew_quantities\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerated_quantities\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cmdstanpy/stanfit.py\u001b[0m in \u001b[0;36mgenerated_quantities\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    543\u001b[0m         \u001b[0;32min\u001b[0m \u001b[0mmemory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlikewise\u001b[0m \u001b[0mall\u001b[0m \u001b[0mdraws\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0ma\u001b[0m \u001b[0mchain\u001b[0m \u001b[0mare\u001b[0m \u001b[0mcontiguous\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    544\u001b[0m         \"\"\"\n\u001b[0;32m--> 545\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mMethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGENERATED_QUANTITIES\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    546\u001b[0m             raise RuntimeError(\n\u001b[1;32m    547\u001b[0m                 \u001b[0;34m'Bad runset method {}.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/enum.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m    344\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_member_map_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    345\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 346\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    347\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    348\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAttributeError\u001b[0m: GENERATED_QUANTITIES"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "new_quantities.generated_quantities.shape"
    ]

diff --git a/docs/sample.rst b/docs/sample.rst
@@ -104,9 +104,9 @@ can be used to generate a dataset of simulated data, where each row in the datas
 
 .. code-block::
 
-    data { 
-      int<lower=0> N; 
-      real<lower=0,upper=1> theta;
+    transformed data { 
+      int<lower=0> N = 10;
+      real<lower=0,upper=1> theta = 0.35;
     } 
     generated quantities {
       int y_sim[N];
@@ -117,6 +117,7 @@ can be used to generate a dataset of simulated data, where each row in the datas
 This program doesn't contain parameters or a model block, therefore
 we run the sampler without ding any MCMC estimation by
 specifying ``fixed_param=True``.
+When ``fixed_param=True``, the ``sample`` method only runs 1 chain.
 The sampler runs without updating the Markov Chain,
 thus the values of all parameters and
 transformed parameters are constant across all draws and
@@ -125,20 +126,18 @@ produced by RNG functions may change.
 
 .. code-block:: python
 
-    bern_datagen_stan = os.path.join('test', 'data', 'bernoulli_datagen.stan')
-    bern_gen_model = Model(stan_file=bern_gen_stan)
-    bern_gen_model.compile()
-    bern_datagen_data = { 'N' : 10, 'theta' : 0.33 }
-    sim_data = bern_gen_model.sample(data=bern_gen_data, fixed_param=True)
+    datagen_stan = os.path.join('..', '..', 'test', 'data', 'bernoulli_datagen.stan')
+    datagen_model = Model(stan_file=datagen_stan)
+    datagen_model.compile()
+    sim_data = datagen_model.sample(fixed_param=True)
+    sim_data.summary()
 
-When ``fixed_param=True``, the ``sample`` method only runs 1 chain by default.
-
-Compute statistics on returned drawset
+Compute, plot histogram of total successes for `N` Bernoulli trials with chance of success `theta`:
 
 .. code-block:: python
 
-    sim_data_summary = sim_data.summary()
-    thetas_rep = sim_summary['Mean'][1:]
-    thetas_rep.plot.density()
-    sample_draw = sim_data.sample[1,0,:]
-    sample_draw
+    drawset_pd = sim_data.get_drawset()
+    drawset_pd.columns
+    y_sims = drawset_pd.drop(columns=['lp__', 'accept_stat__'])
+    y_sums = y_sims.sum(axis=1)
+    y_sums.astype('int32').plot.hist(range(0,datagen_data['N']+1))
diff --git a/test/data/bernoulli_datagen.stan b/test/data/bernoulli_datagen.stan
@@ -1,11 +1,9 @@
-data { 
-  int<lower=0> N; 
-  real<lower=0,upper=1> theta;
+transformed data { 
+  int<lower=0> N = 10;
+  real<lower=0,upper=1> theta = 0.35;
 } 
 generated quantities {
   int y_sim[N];
-  real<lower=0,upper=1> theta_rep;
   for (n in 1:N)
     y_sim[n] = bernoulli_rng(theta);
-  theta_rep = sum(y) / N;
 }