Merge pull request #68 from prmiles/dev_1.8.0

Version 1.8.0
prmiles · Jun 28, 2019 · f14a39a · f14a39a
2 parents 8eb9248 + 86891d4
commit f14a39a
Show file tree

Hide file tree

Showing 26 changed files with 1,040 additions and 252 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 =========
 
+v1.8.0 (June 28, 2019)
+---------
+- Added acceptance rate display feature when calling chain statistics
+- User can specify `skip` or `maxpoints` in pairwise correlation and chain panel plots in order to thin chain.
+- User can request item definitions when calling `chainstats`.
+- Resolved #51 by adding method to check if limits are incompatible.  User is provided with more descriptive error message.
+- Updated documentation for Bayesian components.
+- Added saving routines to accomodate post-processing.  A lighter version of the results dictionary is saved to json when log routines are used for chains.  This should address #55.
+- Option to return KDE objects when using `plot_density_panel`.  User can then evaluate pdf directly.  This should address #66.
+
 v1.7.1 (May 3, 2019)
 ---------------------
 .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.2660145.svg
@@ -21,7 +31,7 @@ v1.7.0 (April 5, 2019)
 - Added original covariance matrix to results structure for reference.
 
 v1.6.0 (August 31, 2018)
-----------------------
+------------------------
 .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1407136.svg
    :target: https://doi.org/10.5281/zenodo.1407136
 

diff --git a/README.rst b/README.rst
@@ -37,11 +37,8 @@ Getting Started
 
 - `Tutorial notebooks <https://nbviewer.jupyter.org/github/prmiles/pymcmcstat/tree/master/tutorials/index.ipynb>`_
 - `Documentation <http://pymcmcstat.readthedocs.io/>`_
-- `Release history`_
-- `Contributing guidelines`_
-
-.. _Release history: CHANGELOG.rst
-.. _Contributing guidelines: CONTRIBUTING.rst
+- `Release history <https://github.com/prmiles/pymcmcstat/blob/master/CHANGELOG.rst>`_
+- `Contributing guidelines <https://github.com/prmiles/pymcmcstat/blob/master/CONTRIBUTING.rst>`_
 
 License
 =======

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -38,9 +38,9 @@ def get_version():
 #with open('../../pymcmcstat/__init__.py','r') as f:
 #    version = f.read()
 release = get_version()
-#version = ''
-# The full version, including alpha/beta/rc tags
-#release = 'v1.4.0'
+# The short X.Y version
+tmp = release.split('.')
+version = str('{}.{}'.format(tmp[0], tmp[1]))
 print(release)
 
 # -- General configuration ---------------------------------------------------

diff --git a/pymcmcstat/MCMC.py b/pymcmcstat/MCMC.py
@@ -113,8 +113,14 @@ def run_simulation(self, use_previous_results=False):
         # Generate Results
         self.__generate_simulation_results()
         if self.simulation_options.save_to_json is True:
-            if self.simulation_results.basic is True:  # check that results structure has been created
-                self.simulation_results.export_simulation_results_to_json_file(results=self.simulation_results.results)
+            # check that results structure has been created
+            if self.simulation_results.basic is True:
+                if self.simulation_options.save_lightly is True:
+                    self.simulation_results.export_lightly(
+                            results=self.simulation_results.results)
+                else:
+                    self.simulation_results.export_simulation_results_to_json_file(
+                            results=self.simulation_results.results)
         self.mcmcplot = MCMCPlotting.Plot()
         self.PI = PredictionIntervals()
         self.chainstats = ChainStatistics.chainstats
@@ -173,6 +179,8 @@ def _initialize_simulation(self):
         self.model_settings._check_dependent_model_settings(self.data, self.simulation_options)
         # open and parse the parameter structure
         self.parameters._openparameterstructure(self.model_settings.nbatch)
+        # check parameter limits
+        self.parameters._check_parameter_limits()
         # check initial parameter values are inside range
         self.parameters._check_initial_values_wrt_parameter_limits()
         # add check that prior standard deviation > 0

diff --git a/pymcmcstat/ParallelMCMC.py b/pymcmcstat/ParallelMCMC.py
@@ -16,6 +16,7 @@
 import os
 import copy
 import time
+import warnings
 
 
 class ParallelMCMC:
@@ -372,7 +373,10 @@ def assign_number_of_cores(num_cores=1):
 
 
 # -------------------------
-def load_parallel_simulation_results(savedir):
+def load_parallel_simulation_results(
+        savedir, extension='h5', chainfile='chainfile',
+        sschainfile='sschainfile', s2chainfile='s2chainfile',
+        covchainfile='covchainfile'):
     '''
     Load results from parallel simulation directory json files.
 
@@ -389,6 +393,19 @@ def load_parallel_simulation_results(savedir):
         for key in pr.keys():
             if isinstance(pres[ii][key], list):
                 pres[ii][key] = np.array(pres[ii][key])
+    # check whether chains included in json file
+    if 'chain' not in pr.keys():
+        out = CP.read_in_parallel_savedir_files(
+                savedir, extension=extension, chainfile=chainfile,
+                sschainfile=sschainfile, s2chainfile=s2chainfile,
+                covchainfile=covchainfile)
+        if out[0]['chain'] == []:
+            warnings.warn('WARNING: No chains found in saved results.', UserWarning)
+    for ii, pr in enumerate(pres):
+        pr['chain'] = out[ii]['chain']
+        pr['sschain'] = out[ii]['sschain']
+        pr['s2chain'] = out[ii]['s2chain']
+        pr['covchain'] = out[ii]['covchain']
     return pres
 
 

diff --git a/pymcmcstat/__init__.py b/pymcmcstat/__init__.py
@@ -1 +1 @@
-__version__ = "1.7.1"
+__version__ = "1.8.0rc1"
diff --git a/pymcmcstat/chain/ChainProcessing.py b/pymcmcstat/chain/ChainProcessing.py
@@ -52,6 +52,87 @@ def print_log_files(savedir):
         print('--------------------------\n')
 
 
+# -------------------------
+def load_serial_simulation_results(
+        savedir, json_file=None, extension='h5', chainfile='chainfile',
+        sschainfile='sschainfile', s2chainfile='s2chainfile',
+        covchainfile='covchainfile'):
+    '''
+    Load results from serial simulation directory json files.
+
+    Lists in json files are converted to numpy arrays.
+
+    Args:
+        * **savedir** (:py:class:`str`): String indicated path to results directory.
+
+    Returns:
+        * **results** (:py:class:`dict`): Each element of list is an MCMC result dictionary.
+    '''
+    items = os.listdir(savedir)
+    for item in items:
+        if item.endswith('.json'):
+            json_file = item
+            break
+    if json_file is not None:
+        results = load_json_object(savedir + os.sep + json_file)
+    else:
+        results = {}
+    # convert lists to arrays
+    for key in results.keys():
+        if isinstance(results[key], list):
+            results[key] = np.array(results[key])
+    # check whether chains included in results dict
+    if 'chain' not in results.keys():
+        out = read_in_savedir_files(
+                savedir, extension=extension, chainfile=chainfile,
+                sschainfile=sschainfile, s2chainfile=s2chainfile,
+                covchainfile=covchainfile)
+        if out['chain'] == []:
+            warnings.warn('WARNING: No chains found in saved results.', UserWarning)
+        results['chain'] = out['chain']
+        results['sschain'] = out['sschain']
+        results['s2chain'] = out['s2chain']
+        results['covchain'] = out['covchain']
+    return results
+
+
+# -------------------------
+def load_parallel_simulation_results(
+        savedir, extension='h5', chainfile='chainfile',
+        sschainfile='sschainfile', s2chainfile='s2chainfile',
+        covchainfile='covchainfile'):
+    '''
+    Load results from parallel simulation directory json files.
+
+    Lists in json files are converted to numpy arrays.
+
+    Args:
+        * **savedir** (:py:class:`str`): String indicated path to parallel directory.
+
+    Returns:
+        * **pres** (:py:class:`list`): Each element of list is an MCMC result dictionary.
+    '''
+    pres = read_in_parallel_json_results_files(savedir)
+    for ii, pr in enumerate(pres):
+        for key in pr.keys():
+            if isinstance(pres[ii][key], list):
+                pres[ii][key] = np.array(pres[ii][key])
+    # check whether chains included in json file
+    if 'chain' not in pr.keys():
+        out = read_in_parallel_savedir_files(
+                savedir, extension=extension, chainfile=chainfile,
+                sschainfile=sschainfile, s2chainfile=s2chainfile,
+                covchainfile=covchainfile)
+        if out[0]['chain'] == []:
+            warnings.warn('WARNING: No chains found in saved results.', UserWarning)
+    for ii, pr in enumerate(pres):
+        pr['chain'] = out[ii]['chain']
+        pr['sschain'] = out[ii]['sschain']
+        pr['s2chain'] = out[ii]['s2chain']
+        pr['covchain'] = out[ii]['covchain']
+    return pres
+
+
 def read_in_savedir_files(savedir, extension='h5', chainfile='chainfile', sschainfile='sschainfile',
                           s2chainfile='s2chainfile', covchainfile='covchainfile'):
     '''
@@ -98,7 +179,8 @@ def read_in_savedir_files(savedir, extension='h5', chainfile='chainfile', sschai
 
 
 def read_in_parallel_savedir_files(parallel_dir, extension='h5', chainfile='chainfile',
-                                   sschainfile='sschainfile', s2chainfile='s2chainfile', covchainfile='covchainfile'):
+                                   sschainfile='sschainfile', s2chainfile='s2chainfile',
+                                   covchainfile='covchainfile'):
     '''
     Read in log files from directory containing results from parallel MCMC simulation.
 

diff --git a/pymcmcstat/chain/ChainStatistics.py b/pymcmcstat/chain/ChainStatistics.py
@@ -18,7 +18,7 @@
 
 
 # display chain statistics
-def chainstats(chain=None, results=None, returnstats=False):
+def chainstats(chain=None, results=None, returnstats=False, display_details=False):
     '''
     Calculate chain statistics.
 
@@ -53,6 +53,11 @@ def chainstats(chain=None, results=None, returnstats=False):
         tau, m = integrated_autocorrelation_time(chain)
         # print statistics
         print_chain_statistics(names, meanii, stdii, mcerr, tau, p)
+        if display_details is True:
+            _display_stat_defs()
+        print(30*'=')
+        # print acceptance rate
+        print_chain_acceptance_info(chain, results=results)
         # assign stats to dictionary
         stats = dict(mean=list(meanii),
                      std=list(stdii),
@@ -88,7 +93,8 @@ def print_chain_statistics(names, meanii, stdii, mcerr, tau, p):
     '''
     npar = len(names)
     # print statistics
-    print('\n---------------------')
+    print('\n')
+    print(30*'-')
     print('{:10s}: {:>10s} {:>10s} {:>10s} {:>10s} {:>10s}'.format(
             'name',
             'mean',
@@ -112,7 +118,70 @@ def print_chain_statistics(names, meanii, stdii, mcerr, tau, p):
                     mcerr[ii],
                     tau[ii],
                     p[ii]))
-    print('---------------------')
+    print(30*'-')
+
+
+# ----------------------------------------------------
+def print_chain_acceptance_info(chain, results=None):
+    '''
+    Print chain acceptance rate(s)
+
+    If results structure is provided, it will try to print
+    acceptance rates with respect to delayed rejection as well.
+
+    Example display (if results dictionary provided):
+
+    ::
+
+        ------------------------------
+        Acceptance rate information
+        ---------------
+        Results dictionary:
+        Stage 1: 3.32%
+        Stage 2: 22.60%
+        Net    : 25.92% -> 1296/5000
+        ---------------
+        Chain provided:
+        Net    : 32.10% -> 963/3000
+        ---------------
+        Note, the net acceptance rate from the results dictionary
+        may be different if you only provided a subset of the chain,
+        e.g., removed the first part for burnin-in.
+        ------------------------------
+
+    Args:
+        * **chain** (:class:`~numpy.ndarray`): Sampling chain.
+        * **results** (:py:class:`dict`): Results from MCMC simulation. \
+        Default is `None`.
+    '''
+    print('Acceptance rate information')
+    flag = False
+    if results is not None:
+        if 'iacce' in results:
+            if 'nsimu' in results:
+                nsimu = results['nsimu']
+            else:
+                nsimu = chain.shape[0]
+            print(15*'-')
+            print('Results dictionary:')
+            for ii, stage in enumerate(results['iacce']):
+                print('Stage {:d}: {:4.2f}%'.format(ii + 1, stage/nsimu * 100))
+            print('Net    : {:4.2f}% -> {:d}/{:d}'.format(
+                    results['iacce'].sum()/nsimu * 100,
+                    results['iacce'].sum(), nsimu))
+            print(15*'-')
+            flag = True
+    print('Chain provided:')
+    unique_elem = np.unique(chain[:, 0]).size
+    print('Net    : {:4.2f}% -> {:d}/{:d}'.format(
+            unique_elem/chain.shape[0] * 100,
+            unique_elem, chain.shape[0]))
+    if flag is True:
+        print(15*'-')
+        print('Note, the net acceptance rate from the results dictionary\n'
+              + 'may be different if you only provided a subset of the chain,\n'
+              + 'e.g., removed the first part for burnin-in.')
+    print(30*'-')
 
 
 # ----------------------------------------------------
@@ -410,3 +479,13 @@ def get_parameter_names(nparam, results):
         names = results['names']
         names = extend_names_to_match_nparam(names, nparam)
     return names
+
+
+def _display_stat_defs():
+    print('Definition for items displayed:')
+    print('"mean": Average value of parameter chains.')
+    print('"std": Standard deviation of parameter chains.')
+    print('"MC_err": Normalized batch mean standard deviation.')
+    print('"tau": Integrated autocorrelation time.')
+    print('"geweke": Geweke\'s convergence diagnostic.')
+    print(30*'-')