Skip to content

Commit

Permalink
Merge pull request #68 from prmiles/dev_1.8.0
Browse files Browse the repository at this point in the history
Version 1.8.0
  • Loading branch information
Paul Miles committed Jun 28, 2019
2 parents 8eb9248 + 86891d4 commit f14a39a
Show file tree
Hide file tree
Showing 26 changed files with 1,040 additions and 252 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Changelog
=========

v1.8.0 (June 28, 2019)
---------
- Added acceptance rate display feature when calling chain statistics
- User can specify `skip` or `maxpoints` in pairwise correlation and chain panel plots in order to thin chain.
- User can request item definitions when calling `chainstats`.
- Resolved #51 by adding method to check if limits are incompatible. User is provided with more descriptive error message.
- Updated documentation for Bayesian components.
- Added saving routines to accomodate post-processing. A lighter version of the results dictionary is saved to json when log routines are used for chains. This should address #55.
- Option to return KDE objects when using `plot_density_panel`. User can then evaluate pdf directly. This should address #66.

v1.7.1 (May 3, 2019)
---------------------
.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.2660145.svg
Expand All @@ -21,7 +31,7 @@ v1.7.0 (April 5, 2019)
- Added original covariance matrix to results structure for reference.

v1.6.0 (August 31, 2018)
----------------------
------------------------
.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1407136.svg
:target: https://doi.org/10.5281/zenodo.1407136

Expand Down
7 changes: 2 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,8 @@ Getting Started

- `Tutorial notebooks <https://nbviewer.jupyter.org/github/prmiles/pymcmcstat/tree/master/tutorials/index.ipynb>`_
- `Documentation <http://pymcmcstat.readthedocs.io/>`_
- `Release history`_
- `Contributing guidelines`_

.. _Release history: CHANGELOG.rst
.. _Contributing guidelines: CONTRIBUTING.rst
- `Release history <https://github.com/prmiles/pymcmcstat/blob/master/CHANGELOG.rst>`_
- `Contributing guidelines <https://github.com/prmiles/pymcmcstat/blob/master/CONTRIBUTING.rst>`_

License
=======
Expand Down
6 changes: 3 additions & 3 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def get_version():
#with open('../../pymcmcstat/__init__.py','r') as f:
# version = f.read()
release = get_version()
#version = ''
# The full version, including alpha/beta/rc tags
#release = 'v1.4.0'
# The short X.Y version
tmp = release.split('.')
version = str('{}.{}'.format(tmp[0], tmp[1]))
print(release)

# -- General configuration ---------------------------------------------------
Expand Down
12 changes: 10 additions & 2 deletions pymcmcstat/MCMC.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,14 @@ def run_simulation(self, use_previous_results=False):
# Generate Results
self.__generate_simulation_results()
if self.simulation_options.save_to_json is True:
if self.simulation_results.basic is True: # check that results structure has been created
self.simulation_results.export_simulation_results_to_json_file(results=self.simulation_results.results)
# check that results structure has been created
if self.simulation_results.basic is True:
if self.simulation_options.save_lightly is True:
self.simulation_results.export_lightly(
results=self.simulation_results.results)
else:
self.simulation_results.export_simulation_results_to_json_file(
results=self.simulation_results.results)
self.mcmcplot = MCMCPlotting.Plot()
self.PI = PredictionIntervals()
self.chainstats = ChainStatistics.chainstats
Expand Down Expand Up @@ -173,6 +179,8 @@ def _initialize_simulation(self):
self.model_settings._check_dependent_model_settings(self.data, self.simulation_options)
# open and parse the parameter structure
self.parameters._openparameterstructure(self.model_settings.nbatch)
# check parameter limits
self.parameters._check_parameter_limits()
# check initial parameter values are inside range
self.parameters._check_initial_values_wrt_parameter_limits()
# add check that prior standard deviation > 0
Expand Down
19 changes: 18 additions & 1 deletion pymcmcstat/ParallelMCMC.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
import copy
import time
import warnings


class ParallelMCMC:
Expand Down Expand Up @@ -372,7 +373,10 @@ def assign_number_of_cores(num_cores=1):


# -------------------------
def load_parallel_simulation_results(savedir):
def load_parallel_simulation_results(
savedir, extension='h5', chainfile='chainfile',
sschainfile='sschainfile', s2chainfile='s2chainfile',
covchainfile='covchainfile'):
'''
Load results from parallel simulation directory json files.
Expand All @@ -389,6 +393,19 @@ def load_parallel_simulation_results(savedir):
for key in pr.keys():
if isinstance(pres[ii][key], list):
pres[ii][key] = np.array(pres[ii][key])
# check whether chains included in json file
if 'chain' not in pr.keys():
out = CP.read_in_parallel_savedir_files(
savedir, extension=extension, chainfile=chainfile,
sschainfile=sschainfile, s2chainfile=s2chainfile,
covchainfile=covchainfile)
if out[0]['chain'] == []:
warnings.warn('WARNING: No chains found in saved results.', UserWarning)
for ii, pr in enumerate(pres):
pr['chain'] = out[ii]['chain']
pr['sschain'] = out[ii]['sschain']
pr['s2chain'] = out[ii]['s2chain']
pr['covchain'] = out[ii]['covchain']
return pres


Expand Down
2 changes: 1 addition & 1 deletion pymcmcstat/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.7.1"
__version__ = "1.8.0rc1"
84 changes: 83 additions & 1 deletion pymcmcstat/chain/ChainProcessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,87 @@ def print_log_files(savedir):
print('--------------------------\n')


# -------------------------
def load_serial_simulation_results(
savedir, json_file=None, extension='h5', chainfile='chainfile',
sschainfile='sschainfile', s2chainfile='s2chainfile',
covchainfile='covchainfile'):
'''
Load results from serial simulation directory json files.
Lists in json files are converted to numpy arrays.
Args:
* **savedir** (:py:class:`str`): String indicated path to results directory.
Returns:
* **results** (:py:class:`dict`): Each element of list is an MCMC result dictionary.
'''
items = os.listdir(savedir)
for item in items:
if item.endswith('.json'):
json_file = item
break
if json_file is not None:
results = load_json_object(savedir + os.sep + json_file)
else:
results = {}
# convert lists to arrays
for key in results.keys():
if isinstance(results[key], list):
results[key] = np.array(results[key])
# check whether chains included in results dict
if 'chain' not in results.keys():
out = read_in_savedir_files(
savedir, extension=extension, chainfile=chainfile,
sschainfile=sschainfile, s2chainfile=s2chainfile,
covchainfile=covchainfile)
if out['chain'] == []:
warnings.warn('WARNING: No chains found in saved results.', UserWarning)
results['chain'] = out['chain']
results['sschain'] = out['sschain']
results['s2chain'] = out['s2chain']
results['covchain'] = out['covchain']
return results


# -------------------------
def load_parallel_simulation_results(
savedir, extension='h5', chainfile='chainfile',
sschainfile='sschainfile', s2chainfile='s2chainfile',
covchainfile='covchainfile'):
'''
Load results from parallel simulation directory json files.
Lists in json files are converted to numpy arrays.
Args:
* **savedir** (:py:class:`str`): String indicated path to parallel directory.
Returns:
* **pres** (:py:class:`list`): Each element of list is an MCMC result dictionary.
'''
pres = read_in_parallel_json_results_files(savedir)
for ii, pr in enumerate(pres):
for key in pr.keys():
if isinstance(pres[ii][key], list):
pres[ii][key] = np.array(pres[ii][key])
# check whether chains included in json file
if 'chain' not in pr.keys():
out = read_in_parallel_savedir_files(
savedir, extension=extension, chainfile=chainfile,
sschainfile=sschainfile, s2chainfile=s2chainfile,
covchainfile=covchainfile)
if out[0]['chain'] == []:
warnings.warn('WARNING: No chains found in saved results.', UserWarning)
for ii, pr in enumerate(pres):
pr['chain'] = out[ii]['chain']
pr['sschain'] = out[ii]['sschain']
pr['s2chain'] = out[ii]['s2chain']
pr['covchain'] = out[ii]['covchain']
return pres


def read_in_savedir_files(savedir, extension='h5', chainfile='chainfile', sschainfile='sschainfile',
s2chainfile='s2chainfile', covchainfile='covchainfile'):
'''
Expand Down Expand Up @@ -98,7 +179,8 @@ def read_in_savedir_files(savedir, extension='h5', chainfile='chainfile', sschai


def read_in_parallel_savedir_files(parallel_dir, extension='h5', chainfile='chainfile',
sschainfile='sschainfile', s2chainfile='s2chainfile', covchainfile='covchainfile'):
sschainfile='sschainfile', s2chainfile='s2chainfile',
covchainfile='covchainfile'):
'''
Read in log files from directory containing results from parallel MCMC simulation.
Expand Down
85 changes: 82 additions & 3 deletions pymcmcstat/chain/ChainStatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


# display chain statistics
def chainstats(chain=None, results=None, returnstats=False):
def chainstats(chain=None, results=None, returnstats=False, display_details=False):
'''
Calculate chain statistics.
Expand Down Expand Up @@ -53,6 +53,11 @@ def chainstats(chain=None, results=None, returnstats=False):
tau, m = integrated_autocorrelation_time(chain)
# print statistics
print_chain_statistics(names, meanii, stdii, mcerr, tau, p)
if display_details is True:
_display_stat_defs()
print(30*'=')
# print acceptance rate
print_chain_acceptance_info(chain, results=results)
# assign stats to dictionary
stats = dict(mean=list(meanii),
std=list(stdii),
Expand Down Expand Up @@ -88,7 +93,8 @@ def print_chain_statistics(names, meanii, stdii, mcerr, tau, p):
'''
npar = len(names)
# print statistics
print('\n---------------------')
print('\n')
print(30*'-')
print('{:10s}: {:>10s} {:>10s} {:>10s} {:>10s} {:>10s}'.format(
'name',
'mean',
Expand All @@ -112,7 +118,70 @@ def print_chain_statistics(names, meanii, stdii, mcerr, tau, p):
mcerr[ii],
tau[ii],
p[ii]))
print('---------------------')
print(30*'-')


# ----------------------------------------------------
def print_chain_acceptance_info(chain, results=None):
'''
Print chain acceptance rate(s)
If results structure is provided, it will try to print
acceptance rates with respect to delayed rejection as well.
Example display (if results dictionary provided):
::
------------------------------
Acceptance rate information
---------------
Results dictionary:
Stage 1: 3.32%
Stage 2: 22.60%
Net : 25.92% -> 1296/5000
---------------
Chain provided:
Net : 32.10% -> 963/3000
---------------
Note, the net acceptance rate from the results dictionary
may be different if you only provided a subset of the chain,
e.g., removed the first part for burnin-in.
------------------------------
Args:
* **chain** (:class:`~numpy.ndarray`): Sampling chain.
* **results** (:py:class:`dict`): Results from MCMC simulation. \
Default is `None`.
'''
print('Acceptance rate information')
flag = False
if results is not None:
if 'iacce' in results:
if 'nsimu' in results:
nsimu = results['nsimu']
else:
nsimu = chain.shape[0]
print(15*'-')
print('Results dictionary:')
for ii, stage in enumerate(results['iacce']):
print('Stage {:d}: {:4.2f}%'.format(ii + 1, stage/nsimu * 100))
print('Net : {:4.2f}% -> {:d}/{:d}'.format(
results['iacce'].sum()/nsimu * 100,
results['iacce'].sum(), nsimu))
print(15*'-')
flag = True
print('Chain provided:')
unique_elem = np.unique(chain[:, 0]).size
print('Net : {:4.2f}% -> {:d}/{:d}'.format(
unique_elem/chain.shape[0] * 100,
unique_elem, chain.shape[0]))
if flag is True:
print(15*'-')
print('Note, the net acceptance rate from the results dictionary\n'
+ 'may be different if you only provided a subset of the chain,\n'
+ 'e.g., removed the first part for burnin-in.')
print(30*'-')


# ----------------------------------------------------
Expand Down Expand Up @@ -410,3 +479,13 @@ def get_parameter_names(nparam, results):
names = results['names']
names = extend_names_to_match_nparam(names, nparam)
return names


def _display_stat_defs():
print('Definition for items displayed:')
print('"mean": Average value of parameter chains.')
print('"std": Standard deviation of parameter chains.')
print('"MC_err": Normalized batch mean standard deviation.')
print('"tau": Integrated autocorrelation time.')
print('"geweke": Geweke\'s convergence diagnostic.')
print(30*'-')

0 comments on commit f14a39a

Please sign in to comment.