Skip to content

Commit

Permalink
fix: Delete uncacheable fits
Browse files Browse the repository at this point in the history
Delete fits which are uncacheable. Draws from a model
made without setting a random seed will never be retrieved a
second time because the random seed is unavailable. Users
who do want their fits to be cached should set a random seed.

This fix helps solve the problem of the cache growing without limit.
The cache will now grow much slower.

Closes #154
  • Loading branch information
riddell-stan committed Oct 11, 2020
1 parent 5609670 commit 9aa950d
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
7 changes: 7 additions & 0 deletions stan/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ def is_iteration_or_elapsed_time_logger_message(msg):
for msg in non_standard_logger_messages:
text = msg.feature[0].string_list.value[0].replace("info:", " ")
io.error(f"<info>{text}</info>\n")

# clean up after ourselves when fit is uncacheable (no random seed)
if self.random_seed is None:
async with aiohttp.request("DELETE", f"http://{host}:{port}/v1/{fit_name}") as resp:
if resp.status not in {200, 202, 204}:
raise RuntimeError((await resp.json())["message"])

return stan.fit.Fit(
stan_outputs,
num_chains,
Expand Down
44 changes: 44 additions & 0 deletions tests/test_fit_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Tests related to cached fits."""
import shutil
import pathlib
import random

import stan
import httpstan.cache

program_code = "parameters {real y;} model {y ~ normal(0,1);}"


def cache_path():
return pathlib.Path(httpstan.cache.model_directory("models/abcdef")).parent


def test_fit_cache():
"""Test that a fit with a random seed set is cached."""

cache_size_before = shutil.disk_usage(cache_path()).used
print(cache_size_before)
# this fit is cacheable
random_seed = random.randrange(1, 2 ** 16)
normal_posterior = stan.build(program_code, random_seed=random_seed)
normal_posterior.sample()
cache_size_after = shutil.disk_usage(cache_path()).used
print(cache_size_after)
assert cache_size_after > cache_size_before

# fit is now in cache
cache_size_before = shutil.disk_usage(cache_path()).used
normal_posterior.sample()
cache_size_after = shutil.disk_usage(cache_path()).used
assert cache_size_before == cache_size_after


def test_fit_cache_uncacheable():
"""Test that a fit with a random seed set is cached."""
cache_size_before = shutil.disk_usage(cache_path()).used
# this fit is NOT cacheable, should not be saved
normal_posterior = stan.build(program_code)
normal_posterior.sample()
cache_size_after = shutil.disk_usage(cache_path()).used
# allow for a 4096 byte difference (an empty directory takes 4K)
assert abs(cache_size_before - cache_size_after) <= 4096

0 comments on commit 9aa950d

Please sign in to comment.