pymc-devs · aloctavodia · Jun 1, 2022 · Jun 1, 2022 · Jun 1, 2022 · Jun 1, 2022
diff --git a/examples/case_studies/bayesian_ab_testing.ipynb b/examples/case_studies/bayesian_ab_testing.ipynb
diff --git a/myst_nbs/case_studies/bayesian_ab_testing.myst.md b/myst_nbs/case_studies/bayesian_ab_testing.myst.md
@@ -19,12 +19,9 @@ import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import pymc3 as pm
-import pymc3.math as pmm
+import pymc as pm
 
 from scipy.stats import bernoulli, expon
-
-print(f"Running on PyMC3 v{pm.__version__}")
 ```
 
 ```{code-cell} ipython3
@@ -33,6 +30,12 @@ rng = np.random.default_rng(RANDOM_SEED)
 
 %config InlineBackend.figure_format = 'retina'
 az.style.use("arviz-darkgrid")
+
+plotting_defaults = dict(
+    bins=50,
+    kind="hist",
+    textsize=10,
+)
 ```
 
 This notebook demonstrates how to implement a Bayesian analysis of an A/B test. We implement the models discussed in VWO's [Bayesian A/B Testing Whitepaper](https://vwo.com/downloads/VWO_SmartStats_technical_whitepaper.pdf), and discuss the effect of different prior choices for these models. This notebook does _not_ discuss other related topics like how to choose a prior, early stopping, and power analysis.
@@ -76,13 +79,13 @@ $$y_A \sim \mathrm{Binomial}(n = N_A, p = \theta_A), y_B \sim \mathrm{Binomial}(
 
 With this, we can sample from the joint posterior of $\theta_A, \theta_B$. 
 
-You may have noticed that the Beta distribution is the conjugate prior for the Binomial, so we don't need MCMC sampling to estimate the posterior (the exact solution can be found in the VWO paper). We'll still demonstrate how sampling can be done with PyMC3 though, and doing this makes it easier to extend the model with different priors, dependency assumptions, etc.
+You may have noticed that the Beta distribution is the conjugate prior for the Binomial, so we don't need MCMC sampling to estimate the posterior (the exact solution can be found in the VWO paper). We'll still demonstrate how sampling can be done with PyMC though, and doing this makes it easier to extend the model with different priors, dependency assumptions, etc.
 
 Finally, remember that our outcome of interest is whether B is better than A. A common measure in practice for whether B is better than is the _relative uplift in conversion rates_, i.e. the percentage difference of $\theta_B$ over $\theta_A$:
 
 $$\mathrm{reluplift}_B = \theta_B / \theta_A - 1$$
 
-We'll implement this model setup in PyMC3 below.
+We'll implement this model setup in PyMC below.
 
 ```{code-cell} ipython3
 @dataclass
@@ -127,7 +130,7 @@ We illustrate these points with prior predictive checks.
 
 +++
 
-Note that we can pass in arbitrary values for the observed data in these prior predictive checks. PyMC3 will not use that data when sampling from the prior predictive distribution.
+Note that we can pass in arbitrary values for the observed data in these prior predictive checks. PyMC will not use that data when sampling from the prior predictive distribution.
 
 ```{code-cell} ipython3
 weak_prior = ConversionModelTwoVariant(BetaPrior(alpha=100, beta=100))
@@ -139,22 +142,22 @@ strong_prior = ConversionModelTwoVariant(BetaPrior(alpha=10000, beta=10000))
 
 ```{code-cell} ipython3
 with weak_prior.create_model(data=[BinomialData(1, 1), BinomialData(1, 1)]):
-    weak_prior_predictive = pm.sample_prior_predictive(samples=10000)
+    weak_prior_predictive = pm.sample_prior_predictive(samples=10000, return_inferencedata=False)
 ```
 
 ```{code-cell} ipython3
 with strong_prior.create_model(data=[BinomialData(1, 1), BinomialData(1, 1)]):
-    strong_prior_predictive = pm.sample_prior_predictive(samples=10000)
+    strong_prior_predictive = pm.sample_prior_predictive(samples=10000, return_inferencedata=False)
 ```
 
 ```{code-cell} ipython3
 fig, axs = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
-az.plot_posterior(weak_prior_predictive["reluplift_b"], textsize=10, ax=axs[0], kind="hist")
+az.plot_posterior(weak_prior_predictive["reluplift_b"], ax=axs[0], **plotting_defaults)
 axs[0].set_title(f"B vs. A Rel Uplift Prior Predictive, {weak_prior.priors}", fontsize=10)
 axs[0].axvline(x=0, color="red")
-az.plot_posterior(strong_prior_predictive["reluplift_b"], textsize=10, ax=axs[1], kind="hist")
+az.plot_posterior(strong_prior_predictive["reluplift_b"], ax=axs[1], **plotting_defaults)
 axs[1].set_title(f"B vs. A Rel Uplift Prior Predictive, {strong_prior.priors}", fontsize=10)
-axs[1].axvline(x=0, color="red")
+axs[1].axvline(x=0, color="red");
 ```
 
 With the weak prior our 94% HDI for the relative uplift for B over A is roughly [-20%, +20%], whereas it is roughly [-2%, +2%] with the strong prior. This is effectively the "starting point" for the relative uplift distribution, and will affect how the observed conversions translate to the posterior distribution.
@@ -204,26 +207,27 @@ def run_scenario_twovariant(
     generated = generate_binomial_data(variants, true_rates, samples_per_variant)
     data = [BinomialData(**generated[v].to_dict()) for v in variants]
     with ConversionModelTwoVariant(priors=weak_prior).create_model(data):
-        trace_weak = pm.sample(draws=5000, return_inferencedata=True, cores=1, chains=2)
+        trace_weak = pm.sample(draws=5000)
     with ConversionModelTwoVariant(priors=strong_prior).create_model(data):
-        trace_strong = pm.sample(draws=5000, return_inferencedata=True, cores=1, chains=2)
+        trace_strong = pm.sample(draws=5000)
 
     true_rel_uplift = true_rates[1] / true_rates[0] - 1
 
     fig, axs = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
-    az.plot_posterior(trace_weak.posterior["reluplift_b"], textsize=10, ax=axs[0], kind="hist")
+    az.plot_posterior(trace_weak.posterior["reluplift_b"], ax=axs[0], **plotting_defaults)
     axs[0].set_title(f"True Rel Uplift = {true_rel_uplift:.1%}, {weak_prior}", fontsize=10)
     axs[0].axvline(x=0, color="red")
-    az.plot_posterior(trace_strong.posterior["reluplift_b"], textsize=10, ax=axs[1], kind="hist")
+    az.plot_posterior(trace_strong.posterior["reluplift_b"], ax=axs[1], **plotting_defaults)
     axs[1].set_title(f"True Rel Uplift = {true_rel_uplift:.1%}, {strong_prior}", fontsize=10)
     axs[1].axvline(x=0, color="red")
     fig.suptitle("B vs. A Rel Uplift")
+    return trace_weak, trace_strong
 ```
 
 #### Scenario 1 - same underlying conversion rates
 
 ```{code-cell} ipython3
-run_scenario_twovariant(
+trace_weak, trace_strong = run_scenario_twovariant(
     variants=["A", "B"],
     true_rates=[0.23, 0.23],
     samples_per_variant=100000,
@@ -290,7 +294,7 @@ class ConversionModel:
                 elif comparison_method == "best_of_rest":
                     others = [p[j] for j in range(num_variants) if j != i]
                     if len(others) > 1:
-                        comparison = pmm.maximum(*others)
+                        comparison = pm.math.maximum(*others)
                     else:
                         comparison = others[0]
                 else:
@@ -310,17 +314,15 @@ def run_scenario_bernoulli(
     generated = generate_binomial_data(variants, true_rates, samples_per_variant)
     data = [BinomialData(**generated[v].to_dict()) for v in variants]
     with ConversionModel(priors).create_model(data=data, comparison_method=comparison_method):
-        trace = pm.sample(draws=5000, return_inferencedata=True, chains=2, cores=1)
+        trace = pm.sample(draws=5000)
 
     n_plots = len(variants)
     fig, axs = plt.subplots(nrows=n_plots, ncols=1, figsize=(3 * n_plots, 7), sharex=True)
     for i, variant in enumerate(variants):
         if i == 0 and comparison_method == "compare_to_control":
             axs[i].set_yticks([])
         else:
-            az.plot_posterior(
-                trace.posterior[f"reluplift_{i}"], textsize=10, ax=axs[i], kind="hist"
-            )
+            az.plot_posterior(trace.posterior[f"reluplift_{i}"], ax=axs[i], **plotting_defaults)
         axs[i].set_title(f"Rel Uplift {variant}, True Rate = {true_rates[i]:.2%}", fontsize=10)
         axs[i].axvline(x=0, color="red")
     fig.suptitle(f"Method {comparison_method}, {priors}")
@@ -451,9 +453,9 @@ class RevenueModel:
                     others_lam = [1 / lam[j] for j in range(num_variants) if j != i]
                     others_rpv = [revenue_per_visitor[j] for j in range(num_variants) if j != i]
                     if len(others_rpv) > 1:
-                        comparison_theta = pmm.maximum(*others_theta)
-                        comparison_lam = pmm.maximum(*others_lam)
-                        comparison_rpv = pmm.maximum(*others_rpv)
+                        comparison_theta = pm.math.maximum(*others_theta)
+                        comparison_lam = pm.math.maximum(*others_lam)
+                        comparison_rpv = pm.math.maximum(*others_rpv)
                     else:
                         comparison_theta = others_theta[0]
                         comparison_lam = others_lam[0]
@@ -492,14 +494,14 @@ data = [
 
 ```{code-cell} ipython3
 with RevenueModel(c_prior, mp_prior).create_model(data, "best_of_rest"):
-    revenue_prior_predictive = pm.sample_prior_predictive(samples=10000)
+    revenue_prior_predictive = pm.sample_prior_predictive(samples=10000, return_inferencedata=False)
 ```
 
 ```{code-cell} ipython3
 fig, ax = plt.subplots()
-az.plot_posterior(revenue_prior_predictive["reluplift_1"], textsize=10, ax=ax, kind="hist")
+az.plot_posterior(revenue_prior_predictive["reluplift_1"], ax=ax, **plotting_defaults)
 ax.set_title(f"Revenue Rel Uplift Prior Predictive, {c_prior}, {mp_prior}", fontsize=10)
-ax.axvline(x=0, color="red")
+ax.axvline(x=0, color="red");
 ```
 
 Similar to the model for Bernoulli conversions, the width of the prior predictive uplift distribution will depend on the strength of our priors. See the Bernoulli conversions section for a discussion of the benefits and disadvantages of using a weak vs. strong prior.
@@ -553,17 +555,15 @@ def run_scenario_value(
     with RevenueModel(conversion_rate_prior, mean_purchase_prior).create_model(
         data, comparison_method
     ):
-        trace = pm.sample(draws=5000, return_inferencedata=True, chains=2, cores=1)
+        trace = pm.sample(draws=5000, chains=2, cores=1)
 
     n_plots = len(variants)
     fig, axs = plt.subplots(nrows=n_plots, ncols=1, figsize=(3 * n_plots, 7), sharex=True)
     for i, variant in enumerate(variants):
         if i == 0 and comparison_method == "compare_to_control":
             axs[i].set_yticks([])
         else:
-            az.plot_posterior(
-                trace.posterior[f"reluplift_{i}"], textsize=10, ax=axs[i], kind="hist"
-            )
+            az.plot_posterior(trace.posterior[f"reluplift_{i}"], ax=axs[i], **plotting_defaults)
         true_rpv = true_conversion_rates[i] * true_mean_purchase[i]
         axs[i].set_title(f"Rel Uplift {variant}, True RPV = {true_rpv:.2f}", fontsize=10)
         axs[i].axvline(x=0, color="red")
@@ -611,8 +611,7 @@ scenario_value_2 = run_scenario_value(
 axs = az.plot_posterior(
     scenario_value_2,
     var_names=["theta_reluplift_1", "reciprocal_lam_reluplift_1"],
-    textsize=10,
-    kind="hist",
+    **plotting_defaults,
 )
 axs[0].set_title(f"Conversion Rate Uplift B, True Uplift = {(0.04 / 0.05 - 1):.2%}", fontsize=10)
 axs[0].axvline(x=0, color="red")
@@ -655,9 +654,9 @@ There are many other considerations to implementing a Bayesian framework to anal
 * How do we plan the length and size of A/B tests using power analysis, if we're using Bayesian models to analyse the results?
 * Outside of the conversion rates (bernoulli random variables for each visitor), many value distributions in online software cannot be fit with nice densities like Normal, Gamma, etc. How do we model these?
 
-Various textbooks and online resources dive into these areas in more detail. [Doing Bayesian Data Analysis](http://doingbayesiandataanalysis.blogspot.com/) by John Kruschke is a great resource, and has been translated to PyMC3 here: https://github.com/JWarmenhoven/DBDA-python.
+Various textbooks and online resources dive into these areas in more detail. [Doing Bayesian Data Analysis](http://doingbayesiandataanalysis.blogspot.com/) by John Kruschke is a great resource, and has been translated to PyMC here: https://github.com/JWarmenhoven/DBDA-python.
 
-We also plan to create more PyMC3 tutorials on these topics, so stay tuned!
+We also plan to create more PyMC tutorials on these topics, so stay tuned!
 
 ---
 
@@ -669,5 +668,5 @@ Author: [Cuong Duong](https://github.com/tcuongd) (2021-05-23)
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p theano,xarray
+%watermark -n -u -v -iv -w -p aesara,xarray
 ```