[release] Version 2.0.0

- links to some intro to Bayes articles - change from `alpha` to `credible_mass`. - add BestResults.hdi() - clarifications in docs
treszkai · Sep 19, 2019 · 2c6ec83 · 2c6ec83
2 parents b3dff61 + c3977bb
commit 2c6ec83
Show file tree

Hide file tree

Showing 9 changed files with 141 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ The [documentation](https://best.readthedocs.io) describes the API in detail.
 
 ## Installation ##
 
-Ensure your Python version is sufficiently up-to-date:
+Ensure your Python version is sufficiently up-to-date (at least 3.5.4):
 
 ```bash
 $ python --version
@@ -51,7 +51,7 @@ Python 3.5.6
 
 Then install with Pip:
 ```bash
-$ pip install https://github.com/treszkai/best/archive/master.tar.gz
+$ pip install best
 ```
 
 ## Developer notes ##

diff --git a/best/model.py b/best/model.py
@@ -21,6 +21,11 @@
 import scipy.stats as st
 
 
+def check_credible_mass(credible_mass):
+    if not 0 < credible_mass < 1:
+        raise ValueError('credible_mass parameter must be between 0 and 1, non-inclusive')
+
+
 class BestModel(ABC):
     """Base class for BEST models"""
 
@@ -43,10 +48,10 @@ def model(self) -> pm.Model:
         pass
 
     @abstractmethod
-    def observed_data(self, group_id):
+    def observed_data(self, group_id: int):
         """Return the observed data as a NumPy array
 
-        (This property is accessible primarily for internal purposes.)
+        (This method is accessible primarily for internal purposes.)
         """
         pass
 
@@ -123,7 +128,7 @@ def version(self):
     def model(self):
         return self._model
 
-    def observed_data(self, group_id):
+    def observed_data(self, group_id: int):
         if group_id == 1:
             return self.y
         else:
@@ -200,7 +205,7 @@ def version(self):
     def model(self):
         return self._model
 
-    def observed_data(self, group_id):
+    def observed_data(self, group_id: int):
         if group_id == 1:
             return self.y1
         elif group_id == 2:
@@ -241,43 +246,50 @@ def trace(self) -> MultiTrace:
         """
         return self._trace
 
-    def observed_data(self, group_id):
+    def observed_data(self, group_id: int):
+        """Return the observed data as a NumPy array
+
+        (This method is accessible primarily for internal purposes.)
+        """
         return self.model.observed_data(group_id)
 
-    def summary(self, alpha=0.05):
+    def summary(self, credible_mass: float = 0.95):
         """Return summary statistics of the results
 
         Parameters
         ----------
-        alpha : float
+        credible_mass : float
             The highest posterior density intervals in the summary will cover
-            (1–alpha) * 100% of the probability mass.
-            For example, alpha=0.05 results in a 95% credible interval.
-            Default: 0.05.
+            credible_mass * 100% of the probability mass.
+            For example, credible_mass=0.95 results in 95% credible intervals.
+            Default: 0.95.
         """
-        return pm.summary(self.trace, alpha=alpha)
+        check_credible_mass(credible_mass)
+        return pm.summary(self.trace, alpha=(1 - credible_mass))
 
-    def hpd(self, var_name: str, alpha: float = 0.05):
-        """Calculate the highest posterior density (HPD) interval
+    def hdi(self, var_name: str, credible_mass: float = 0.95):
+        """Calculate the highest posterior density interval (HDI)
 
-        This is a 1-alpha *credible interval* which contains the
-        most likely values of the parameter.
+        This function calculates a *credible interval* which contains the
+        ``credible_mass`` most likely values of the parameter, given the data.
+        Also known as an HPD interval.
 
         Parameters
         ----------
         var_name : str
             Name of variable.
-        alpha : float
-            The HPD will cover (1–alpha) * 100% of the probability mass.
-            For example, alpha=0.05 results in a 95% credible interval.
-            Default: 0.05.
+        credible_mass : float
+            The HDI will cover credible_mass * 100% of the probability mass.
+            Default: 0.95, i.e. a 95% HDI.
 
         Returns
         -------
         (float, float)
             The endpoints of the HPD
         """
-        return tuple(pm.hpd(self.trace[var_name], alpha=alpha))
+        check_credible_mass(credible_mass)
+
+        return tuple(pm.hpd(self.trace[var_name], alpha=(1 - credible_mass)))
 
     def posterior_prob(self, var_name: str, low: float = -np.inf, high: float = np.inf):
         r"""Calculate the posterior probability that a variable is in a given interval

diff --git a/best/plot.py b/best/plot.py
@@ -17,13 +17,16 @@
 import numpy as np
 import matplotlib.lines as mpllines
 from matplotlib.transforms import blended_transform_factory
-from matplotlib.ticker import LogFormatter
 import scipy.stats as st
 
 from .model import BestResults, BestResultsOne, BestResultsTwo
 
-# Only 99.5% of the samples are displayed, to prevent the long tails
-DISPLAYED_ALPHA = 0.005
+# Only this ratio of the samples are displayed,
+#  to prevent the long tails distorting the plot.
+DISPLAYED_MASS = 0.995
+
+DEFAULT_HDI_MASS = 0.95
+
 PRETTY_BLUE = '#89d1ea'
 
 
@@ -86,7 +89,7 @@ def plot_posterior(best_results: BestResults,
         >>> plt.show()
     """
     samples = best_results.trace[var_name]
-    samples_min, samples_max = best_results.hpd(var_name, DISPLAYED_ALPHA)
+    samples_min, samples_max = best_results.hdi(var_name, DISPLAYED_MASS)
     samples = samples[(samples_min <= samples) * (samples <= samples_max)]
 
     trans = blended_transform_factory(ax.transData, ax.transAxes)
@@ -118,7 +121,7 @@ def plot_posterior(best_results: BestResults,
         ax.axvline(ref_val, linestyle=':')
 
     # plot HDI
-    hdi_min, hdi_max = best_results.hpd(var_name, alpha=0.05)
+    hdi_min, hdi_max = best_results.hdi(var_name, DEFAULT_HDI_MASS)
 
     hdi_line, = ax.plot([hdi_min, hdi_max], [0, 0],
                         lw=5.0, color='k')
@@ -162,7 +165,7 @@ def plot_normality_posterior(best_results, ax, bins, title):
     var_name = 'Normality'
     samples = best_results.trace[var_name]
     norm_bins = np.logspace(np.log10(best_results.model.nu_min),
-                            np.log10(best_results.hpd(var_name, DISPLAYED_ALPHA)[-1]),
+                            np.log10(best_results.hdi(var_name, DISPLAYED_MASS)[-1]),
                             num=bins + 1)
     plot_posterior(best_results,
                    var_name,
@@ -173,7 +176,7 @@ def plot_normality_posterior(best_results, ax, bins, title):
     ax.set_xlim(2.4, norm_bins[-1] * 1.05)
     ax.semilogx()
     # don't use scientific notation for tick labels
-    tick_fmt = LogFormatter()
+    tick_fmt = plt.LogFormatter()
     ax.xaxis.set_major_formatter(tick_fmt)
     ax.xaxis.set_minor_formatter(tick_fmt)
 
@@ -363,8 +366,8 @@ def plot_all_two(best_results: BestResultsTwo,
     posterior_std1 = trace['Group 1 SD']
     posterior_std2 = trace['Group 2 SD']
 
-    std1_min, std1_max = best_results.hpd('Group 1 SD', DISPLAYED_ALPHA)
-    std2_min, std2_max = best_results.hpd('Group 2 SD', DISPLAYED_ALPHA)
+    std1_min, std1_max = best_results.hdi('Group 1 SD', DISPLAYED_MASS)
+    std2_min, std2_max = best_results.hdi('Group 2 SD', DISPLAYED_MASS)
     std_min = min(std1_min, std2_min)
     std_max = max(std1_max, std2_max)
     stds = np.concatenate((posterior_std1, posterior_std2))

diff --git a/docs/explanations.rst b/docs/explanations.rst
@@ -15,7 +15,8 @@ with an :ref:`additional parameter <sec-normality>` that controls the prevalence
 After assigning a prior distribution to the parameters,
 these distributions are updated in accordance with how well each parameter setting fits the data,
 resulting in the so-called *posterior distribution*.
-(The specifics of the prior are explained on the :ref:`Model history <sec-model-latest>` page.)
+(The model used by this library is not exactly like the one described in (Kruschke, 2013);
+the :ref:`Model history <sec-model-latest>` page gives an exact description.)
 
 The resulting t-distributions can be plotted with the :func:`plot_data_and_prediction` function,
 whose output is a figure like this:

diff --git a/docs/index.rst b/docs/index.rst
@@ -24,11 +24,16 @@ For example, to find the probability that the first group's mean is larger by at
    >>> best_out.posterior_prob('Difference of means', low=0.5)
    0.87425
 
-The parameter estimation is described briefly in :ref:`the relevant section <brief-description>`,
+The 95% highest posterior density interval (HDI) can be queried just as easily::
+
+    >>> best_out.hdi('Difference of means', 0.95)
+    (0.12..., 1.84...)
+
+The parameter estimation is described briefly in :ref:`the relevant section <brief-description>` of this documentation,
 or in detail in the following publication:
 
-   John K. Kruschke, 2013. Bayesian estimation supersedes the *t* test.
-   Journal of Experimental Psychology: General, 2013, v.142(2), pp.573-603.
+   Kruschke, J. K. (2013). Bayesian estimation supersedes the *t* test.
+   *Journal of Experimental Psychology: General* **142(2)**, pp.573-603.
    (doi: `10.1037/a0029146 <https://dx.doi.org/10.1037/a0029146>`_)
 
 The publication's `accompanying website <http://www.indiana.edu/~kruschke/BEST/>`_
@@ -44,6 +49,25 @@ while keeping the tools simple.
 It is *not* intended as a comprehensive data analysis tool.
 For that purpose, please refer to books like John K. Kruschke's
 `Doing Bayesian Data Analysis <https://sites.google.com/site/doingbayesiandataanalysis/>`_.
+Quote from chapter 1 of the book:
+
+    This book explains how to actually do Bayesian data analysis, by real people (like
+    you), for realistic data (like yours). The book starts at the basics, with elementary
+    notions of probability and programming. You do not need to already know statistics
+    and programming.
+
+The following open access papers give an intuitive introduction to Bayesian data analysis:
+
+ - Kruschke, J. K. and Liddell, T. M. (2018).
+    The Bayesian New Statistics: Hypothesis testing, estimation, meta-analysis, and power analysis from a Bayesian perspective.
+    *Psychonomic Bulletin & Review* **25**, pp. 178-206.
+    `R code <https://osf.io/j6364/files/>`_;
+    see also the `Shiny App <http://www.indiana.edu/~kruschke/bayesAndFreqApp.html>`_.
+    (doi: `10.3758/s13423-016-1221-4 <https://doi.org/10.3758/s13423-016-1221-4>`_)
+ -  Kruschke, J. K. and Liddell, T. M. (2018).
+    Bayesian data analysis for newcomers.
+    *Psychonomic Bulletin & Review* **25**, pp. 155-177.
+    (doi: `10.3758/s13423-017-1272-1 <https://doi.org/10.3758/s13423-017-1272-1>`_)
 
 Installation
 ------------
@@ -53,7 +77,7 @@ and can be installed with *pip*:
 
 .. code-block:: bash
 
-   pip install https://github.com/treszkai/best/archive/master.zip
+   pip install best
 
 This command installs the following dependencies:
 
@@ -64,16 +88,19 @@ This command installs the following dependencies:
 Get in touch
 ------------
 
-If you have trouble installing or using `best`, or understanding the results, or you found an error,
+If you find the documentation lacking or you have found an error,
 please `open an issue <https://github.com/treszkai/best/issues>`_  at the project's GitHub page,
 or open a `pull request <https://github.com/treszkai/best/pulls>`_ if you have a proposed solution.
 
+Your feedback and contribution are welcome!
+
 Further documentation
 ---------------------
 
 .. toctree::
-   :maxdepth: 2
+    :maxdepth: 2
 
-   api
-   explanations
-   model_history
+    api
+    explanations
+    model_history
+    version_history
diff --git a/docs/model_history.rst b/docs/model_history.rst
@@ -24,6 +24,7 @@ Where :math:`\hat\mu` and :math:`\hat\sigma` are the sample mean and
 sample standard deviation of all the data from the two groups.
 The effect size is calculated as :math:`(\mu_1 - \mu_2) \big/ \sqrt{(\sigma_1^2 + \sigma_2^2) \,/\, 2}`.
 
+.. _sec-model-v2:
 .. _sec-model-latest:
 
 v2
@@ -47,28 +48,28 @@ and the formula for effect size also uses :math:`\mathrm{sd}_i` instead of :math
     Setting a bound of 2.5 prevents strong outliers and extremely large standard deviations.
 
 Another change concerns the sampling of :math:`\sigma_i`.
-In the original model :math:`\sigma_i` was uniformly distributed between
-:math:`\hat\sigma\, / \,1000` and :math:`1000\,\hat\sigma`,
+In the original model :math:`\sigma_i \,/\, \hat\sigma` was uniformly distributed between
+:math:`1 \, / \,1000` and :math:`1000`,
 meaning the *prior* probability of :math:`\sigma > \hat\sigma` was 1000 times that of :math:`\sigma < \hat\sigma`,
 which caused an overestimation of :math:`\sigma` with low sample sizes (around :math:`N = 5`).
-To make these probabilities equal, now :math:`\log(\sigma_i)` is distributed uniformly between
-:math:`\log(\hat\sigma\, / \,1000)` and :math:`\log(1000\, \hat\sigma)`.
+To make these probabilities equal, now :math:`\log(\sigma_i \,/\, \hat\sigma)` is distributed uniformly between
+:math:`\log(1\, / \,1000)` and :math:`\log(1000)`.
 At :math:`N=25` this change in the prior does not cause a perceptible change in the posterior.
 
 *Summary of changes*:
  - Lower bound of :math:`\nu` is 2.5.
  - SD is calculated as :math:`\sigma \sqrt{ \nu / (\nu - 2)}`.
  - Effect size is calculated as :math:`(\mu_1 - \mu_2) \big/ \sqrt{(\mathrm{sd}_1^2 + \mathrm{sd}_2^2) \,/\, 2}`.
- - :math:`\log(\sigma_i)` is uniformly distributed.
+ - :math:`\log(\sigma_i \,/\, \hat\sigma)` is uniformly distributed.
 
 The model for two-group analysis is described by the following sampling statements:
 
 .. math::
 
     \mu_1 &\sim \text{Normal}(\hat\mu, 1000 \, \hat\sigma) \\
     \mu_2 &\sim \text{Normal}(\hat\mu, 1000 \, \hat\sigma) \\
-    \log(\sigma_1) &\sim \text{Uniform}(\log(\hat\sigma \, / \, 1000), \log(1000\, \hat\sigma)) \\
-    \log(\sigma_2) &\sim \text{Uniform}(\log(\hat\sigma \, / \, 1000), \log(1000\, \hat\sigma)) \\
+    \log(\sigma_1 \,/\, \hat\sigma) &\sim \text{Uniform}(\log(1 \, / \, 1000), \log(1000)) \\
+    \log(\sigma_2 \,/\, \hat\sigma) &\sim \text{Uniform}(\log(1 \, / \, 1000), \log(1000)) \\
     \nu &\sim \text{Exponential}(1\, / \, 27.5) + 2.5 \\
     y_1 &\sim t_\nu(\mu_1, \sigma_1) \\
     y_2 &\sim t_\nu(\mu_2, \sigma_2)

diff --git a/docs/version_history.rst b/docs/version_history.rst
@@ -0,0 +1,21 @@
+.. _ch-version-history:
+
+.. module:: best
+   :noindex:
+
+Version History
+===============
+
+1.0
+---
+Initial release by `Andrew Straw <https://github.com/strawlab/best>`_.
+This version supported Python 2.7, Matplotlib 2.2, and PyMC.
+
+2.0.0
+-----
+Changes from version 1.0:
+
+ - Upgrade to Python 3.5, PyMC3, and Matplotlib 3.0.0.
+ - :ref:`Model version 2 <sec-model-v2>`.
+ - Introduction of object-oriented API.
+ - Changes in plot appearance.
diff --git a/setup.py b/setup.py
@@ -1,22 +1,23 @@
 from setuptools import setup
 
 
-with open("README.md", "r") as fh:
+with open('README.md', 'r') as fh:
     long_description = fh.read()
 
 setup(name='best',
       description='Bayesian estimation supersedes the t-test',
       author='Andrew Straw and Laszlo Treszkai',
-      author_email='strawman@astraw.com',
+      author_email='laszlo.treszkai@gmail.com',
       long_description=long_description,
-      long_description_content_type="text/markdown",
-      url='https://github.com/strawlab/best',
+      long_description_content_type='text/markdown',
+      url='https://github.com/treszkai/best',
       version='2.0.0',
       packages=['best'],
       classifiers=[
-              "Programming Language :: Python :: 3",
-              "License :: OSI Approved :: MIT License",
-              "Operating System :: OS Independent",
+              'Programming Language :: Python :: 3',
+              'License :: OSI Approved :: MIT License',
+              'Operating System :: OS Independent',
       ],
       license='MIT',
-      )
+      python_requires='>=3.5',
+)