Skip to content

Commit

Permalink
Save memory in posterior predictive (#3556)
Browse files Browse the repository at this point in the history
Build the numpy ndarrays for the posterior predictive array lazily, but skip the intermediate step of building a very big python array.
  • Loading branch information
rpgoldman committed Aug 16, 2019
1 parent 3a2a765 commit 1642e73
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 7 deletions.
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Expand Up @@ -15,6 +15,7 @@
- Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559))
- SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
- SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)
- `sample_posterior_predictive` now preallocates the memory required for its output to improve memory usage. Addresses problems raised in this [discourse thread](https://discourse.pymc.io/t/memory-error-with-posterior-predictive-sample/2891/4).

## PyMC3 3.7 (May 29 2019)

Expand Down
68 changes: 61 additions & 7 deletions pymc3/sampling.py
@@ -1,6 +1,6 @@
from typing import Dict, List, Optional, TYPE_CHECKING, cast
if TYPE_CHECKING:
from typing import Any
from typing import Any, Tuple
from typing import Iterable as TIterable
from collections.abc import Iterable
from collections import defaultdict
Expand Down Expand Up @@ -1010,6 +1010,60 @@ def stop_tuning(step):
step.stop_tuning()
return step

class _DefaultTrace():
'''
This class is a utility for collecting a number of samples
into a dictionary. Name comes from its similarity to `defaultdict` --
entries are lazily created.
Parameters
----------
samples : int
The number of samples that will be collected, per variable,
into the trace.
Attributes
----------
trace_dict : Dict[str, np.ndarray]
A dictionary constituting a trace. Should be extracted
after a procedure has filled the `_DefaultTrace` using the
`insert()` method
'''
trace_dict = {} # type: Dict[str, np.ndarray]
_len = None # type: int
def __init__(self, samples):
self._len = samples
self.trace_dict = {}

def insert(self, k: str, v, idx: int):
'''
Insert `v` as the value of the `idx`th sample for the variable `k`.
Parameters
----------
k : str
Name of the variable.
v : anything that can go into a numpy array (including a numpy array)
The value of the `idx`th sample from variable `k`
ids : int
The index of the sample we are inserting into the trace.
'''
if hasattr(v, 'shape'):
value_shape = tuple(v.shape) # type: Tuple[int, ...]
else:
value_shape = ()

# initialize if necessary
if k not in self.trace_dict:
array_shape = (self._len,) + value_shape
self.trace_dict[k] = np.full(array_shape, np.nan)

# do the actual insertion
if value_shape == ():
self.trace_dict[k][idx] = v
else:
self.trace_dict[k][idx,:] = v


def sample_posterior_predictive(trace,
samples: Optional[int]=None,
Expand Down Expand Up @@ -1097,10 +1151,11 @@ def sample_posterior_predictive(trace,

indices = np.arange(samples)


if progressbar:
indices = tqdm(indices, total=samples)

ppc_trace = defaultdict(list) # type: Dict[str, List[Any]]
ppc_trace_t = _DefaultTrace(samples)
try:
for idx in indices:
if nchain > 1:
Expand All @@ -1111,7 +1166,7 @@ def sample_posterior_predictive(trace,

values = draw_values(vars, point=param, size=size)
for k, v in zip(vars, values):
ppc_trace[k.name].append(v)
ppc_trace_t.insert(k.name, v, idx)

except KeyboardInterrupt:
pass
Expand All @@ -1120,13 +1175,12 @@ def sample_posterior_predictive(trace,
if progressbar:
indices.close()

ppc_trace = ppc_trace_t.trace_dict
if keep_size:
for k, ary in ppc_trace.items():
ary = np.asarray(ary)
ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))
return ppc_trace
else:
return {k: np.asarray(v) for k, v in ppc_trace.items()}

return ppc_trace


def sample_ppc(*args, **kwargs):
Expand Down

0 comments on commit 1642e73

Please sign in to comment.