Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable configuration of local store size limits #11777

Merged
merged 4 commits into from Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/python/pants/engine/internals/native_engine.pyi
Expand Up @@ -99,13 +99,13 @@ def scheduler_create(
tasks: PyTasks,
types: PyTypes,
build_root: str,
local_store_dir: str,
local_execution_root_dir: str,
named_caches_dir: str,
ca_certs_path: str | None,
ignore_patterns: Sequence[str],
use_gitignore: bool,
remoting_options: PyRemotingOptions,
local_store_options: PyLocalStoreOptions,
exec_strategy_opts: PyExecutionStrategyOptions,
) -> PyScheduler: ...
def scheduler_execute(
Expand Down Expand Up @@ -183,6 +183,9 @@ class PyNailgunClient:
class PyRemotingOptions:
def __init__(self, **kwargs: Any) -> None: ...

class PyLocalStoreOptions:
def __init__(self, **kwargs: Any) -> None: ...

class PyScheduler:
pass

Expand Down
20 changes: 16 additions & 4 deletions src/python/pants/engine/internals/scheduler.py
Expand Up @@ -37,6 +37,7 @@
PyExecutionRequest,
PyExecutionStrategyOptions,
PyExecutor,
PyLocalStoreOptions,
PyRemotingOptions,
PyScheduler,
PySession,
Expand All @@ -56,7 +57,11 @@
)
from pants.engine.rules import Rule, RuleIndex, TaskRule
from pants.engine.unions import UnionMembership, union
from pants.option.global_options import ExecutionOptions
from pants.option.global_options import (
LOCAL_STORE_LEASE_TIME_SECS,
ExecutionOptions,
LocalStoreOptions,
)
from pants.util.contextutil import temporary_file_path
from pants.util.logging import LogLevel
from pants.util.strutil import pluralize
Expand Down Expand Up @@ -100,13 +105,13 @@ def __init__(
ignore_patterns: List[str],
use_gitignore: bool,
build_root: str,
local_store_dir: str,
local_execution_root_dir: str,
named_caches_dir: str,
ca_certs_path: Optional[str],
rules: Iterable[Rule],
union_membership: UnionMembership,
execution_options: ExecutionOptions,
local_store_options: LocalStoreOptions,
executor: PyExecutor,
include_trace_on_error: bool = True,
visualize_to_dir: Optional[str] = None,
Expand All @@ -116,13 +121,13 @@ def __init__(
:param ignore_patterns: A list of gitignore-style file patterns for pants to ignore.
:param use_gitignore: If set, pay attention to .gitignore files.
:param build_root: The build root as a string.
:param local_store_dir: The directory to use for storing the engine's LMDB store in.
:param local_execution_root_dir: The directory to use for local execution sandboxes.
:param named_caches_dir: The directory to use as the root for named mutable caches.
:param ca_certs_path: Path to pem file for custom CA, if needed.
:param rules: A set of Rules which is used to compute values in the graph.
:param union_membership: All the registered and normalized union rules.
:param execution_options: Execution options for (remote) processes.
:param local_store_options: Options for the engine's LMDB store(s).
:param include_trace_on_error: Include the trace through the graph upon encountering errors.
:param validate_reachability: True to assert that all rules in an otherwise successfully
constructed rule graph are reachable: if a graph cannot be successfully constructed, it
Expand Down Expand Up @@ -176,6 +181,13 @@ def __init__(
execution_headers=tuple(execution_options.remote_execution_headers.items()),
execution_overall_deadline_secs=execution_options.remote_execution_overall_deadline_secs,
)
py_local_store_options = PyLocalStoreOptions(
store_dir=local_store_options.store_dir,
process_cache_max_size_bytes=local_store_options.processes_max_size_bytes,
files_max_size_bytes=local_store_options.files_max_size_bytes,
directories_max_size_bytes=local_store_options.directories_max_size_bytes,
lease_time_millis=LOCAL_STORE_LEASE_TIME_SECS * 1000,
)
exec_stategy_opts = PyExecutionStrategyOptions(
local_parallelism=execution_options.process_execution_local_parallelism,
remote_parallelism=execution_options.process_execution_remote_parallelism,
Expand All @@ -190,13 +202,13 @@ def __init__(
tasks,
types,
build_root,
local_store_dir,
local_execution_root_dir,
named_caches_dir,
ca_certs_path,
ignore_patterns,
use_gitignore,
remoting_options,
py_local_store_options,
exec_stategy_opts,
)

Expand Down
5 changes: 2 additions & 3 deletions src/python/pants/engine/internals/scheduler_test_base.py
Expand Up @@ -6,7 +6,7 @@
from pants.engine.internals.native_engine import PyExecutor
from pants.engine.internals.scheduler import Scheduler, SchedulerSession
from pants.engine.unions import UnionMembership
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS, DEFAULT_LOCAL_STORE_OPTIONS
from pants.util.contextutil import temporary_file_path
from pants.util.dirutil import safe_mkdtemp, safe_rmtree

Expand Down Expand Up @@ -35,21 +35,20 @@ def mk_scheduler(
build_root = os.path.join(work_dir, "build_root")
os.makedirs(build_root)

local_store_dir = os.path.realpath(safe_mkdtemp())
local_execution_root_dir = os.path.realpath(safe_mkdtemp())
named_caches_dir = os.path.realpath(safe_mkdtemp())
scheduler = Scheduler(
ignore_patterns=[],
use_gitignore=False,
build_root=build_root,
local_store_dir=local_store_dir,
local_execution_root_dir=local_execution_root_dir,
named_caches_dir=named_caches_dir,
ca_certs_path=None,
rules=rules,
union_membership=UnionMembership({}),
executor=self._executor,
execution_options=DEFAULT_EXECUTION_OPTIONS,
local_store_options=DEFAULT_LOCAL_STORE_OPTIONS,
include_trace_on_error=include_trace_on_error,
)
return scheduler.new_session(
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/engine/rules_test.py
Expand Up @@ -30,7 +30,7 @@
rule,
)
from pants.engine.unions import UnionMembership
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS, DEFAULT_LOCAL_STORE_OPTIONS
from pants.testutil.rule_runner import MockGet, run_rule_with_mocks
from pants.util.enums import match
from pants.util.logging import LogLevel
Expand All @@ -44,14 +44,14 @@ def create_scheduler(rules, validate=True):
ignore_patterns=[],
use_gitignore=False,
build_root=str(Path.cwd()),
local_store_dir="./.pants.d/lmdb_store",
local_execution_root_dir="./.pants.d",
named_caches_dir="./.pants.d/named_caches",
ca_certs_path=None,
rules=rules,
union_membership=UnionMembership({}),
executor=_EXECUTOR,
execution_options=DEFAULT_EXECUTION_OPTIONS,
local_store_options=DEFAULT_LOCAL_STORE_OPTIONS,
validate_reachability=validate,
)

Expand Down
14 changes: 10 additions & 4 deletions src/python/pants/init/engine_initializer.py
Expand Up @@ -30,7 +30,12 @@
from pants.engine.target import RegisteredTargetTypes
from pants.engine.unions import UnionMembership
from pants.init import specs_calculator
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS, ExecutionOptions, GlobalOptions
from pants.option.global_options import (
DEFAULT_EXECUTION_OPTIONS,
ExecutionOptions,
GlobalOptions,
LocalStoreOptions,
)
from pants.option.options_bootstrapper import OptionsBootstrapper
from pants.option.subsystem import Subsystem
from pants.util.ordered_set import FrozenOrderedSet
Expand Down Expand Up @@ -175,13 +180,14 @@ def setup_graph(
assert bootstrap_options is not None
executor = executor or GlobalOptions.create_py_executor(bootstrap_options)
execution_options = ExecutionOptions.from_options(options, env, local_only=local_only)
local_store_options = LocalStoreOptions.from_options(bootstrap_options)
return EngineInitializer.setup_graph_extended(
build_configuration,
execution_options,
executor=executor,
pants_ignore_patterns=GlobalOptions.compute_pants_ignore(build_root, bootstrap_options),
use_gitignore=bootstrap_options.pants_ignore_use_gitignore,
local_store_dir=bootstrap_options.local_store_dir,
local_store_options=local_store_options,
local_execution_root_dir=bootstrap_options.local_execution_root_dir,
named_caches_dir=bootstrap_options.named_caches_dir,
ca_certs_path=bootstrap_options.ca_certs_path,
Expand All @@ -198,7 +204,7 @@ def setup_graph_extended(
executor: PyExecutor,
pants_ignore_patterns: List[str],
use_gitignore: bool,
local_store_dir: str,
local_store_options: LocalStoreOptions,
local_execution_root_dir: str,
named_caches_dir: str,
ca_certs_path: Optional[str] = None,
Expand Down Expand Up @@ -280,14 +286,14 @@ def ensure_optional_absolute_path(v: Optional[str]) -> Optional[str]:
ignore_patterns=pants_ignore_patterns,
use_gitignore=use_gitignore,
build_root=build_root,
local_store_dir=ensure_absolute_path(local_store_dir),
local_execution_root_dir=ensure_absolute_path(local_execution_root_dir),
named_caches_dir=ensure_absolute_path(named_caches_dir),
ca_certs_path=ensure_optional_absolute_path(ca_certs_path),
rules=rules,
union_membership=union_membership,
executor=executor,
execution_options=execution_options,
local_store_options=local_store_options,
include_trace_on_error=include_trace_on_error,
visualize_to_dir=native_engine_visualize_to,
)
Expand Down
99 changes: 97 additions & 2 deletions src/python/pants/option/global_options.py
Expand Up @@ -251,11 +251,59 @@ def from_options(
)


@dataclass(frozen=True)
class LocalStoreOptions:
"""A collection of all options related to the local store.

TODO: These options should move to a Subsystem once we add support for "bootstrap" Subsystems (ie,
allowing Subsystems to be consumed before the Scheduler has been created).
"""

store_dir: str
processes_max_size_bytes: int
files_max_size_bytes: int
directories_max_size_bytes: int

def target_total_size_bytes(self) -> int:
"""Returns the target total size of all of the stores.

The `max_size` values are caps on the total size of each store: the "target" size
is the size that garbage collection will attempt to shrink the stores to each time
it runs.

NB: This value is not currently configurable, but that could be desirable in the future.
"""
max_total_size_bytes = (
self.processes_max_size_bytes
+ self.files_max_size_bytes
+ self.directories_max_size_bytes
)
return max_total_size_bytes // 10
Comment on lines +281 to +295
Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that this is calculated from the new higher default ended up changing the concrete target to a higher value.


@classmethod
def from_options(cls, options: OptionValueContainer) -> LocalStoreOptions:
return cls(
store_dir=Path(options.local_store_dir).resolve().as_posix(),
stuhood marked this conversation as resolved.
Show resolved Hide resolved
processes_max_size_bytes=options.local_store_processes_max_size_bytes,
files_max_size_bytes=options.local_store_files_max_size_bytes,
directories_max_size_bytes=options.local_store_directories_max_size_bytes,
)


_CPU_COUNT = (
len(os.sched_getaffinity(0)) if hasattr(os, "sched_getaffinity") else os.cpu_count()
) or 2


# The time that leases are acquired for in the local store. Configured on the Python side
# in order to ease interaction with the StoreGCService, which needs to be aware of its value.
LOCAL_STORE_LEASE_TIME_SECS = 2 * 60 * 60


MEGABYTES = 1000000
stuhood marked this conversation as resolved.
Show resolved Hide resolved
GIGABYTES = 1000 * MEGABYTES


DEFAULT_EXECUTION_OPTIONS = ExecutionOptions(
# Remote execution strategy.
remote_execution=False,
Expand Down Expand Up @@ -285,6 +333,13 @@ def from_options(
remote_execution_overall_deadline_secs=60 * 60, # one hour
)

DEFAULT_LOCAL_STORE_OPTIONS = LocalStoreOptions(
store_dir=os.path.join(get_pants_cachedir(), "lmdb_store"),
processes_max_size_bytes=(4 * GIGABYTES),
files_max_size_bytes=(128 * GIGABYTES),
directories_max_size_bytes=(4 * GIGABYTES),
stuhood marked this conversation as resolved.
Show resolved Hide resolved
)


class GlobalOptions(Subsystem):
options_scope = GLOBAL_SCOPE
Expand Down Expand Up @@ -701,12 +756,13 @@ def register_bootstrap_options(cls, register):
),
)

local_store_dir_flag = "--local-store-dir"
cache_instructions = (
"The path may be absolute or relative. If the directory is within the build root, be "
"sure to include it in `--pants-ignore`."
)
register(
"--local-store-dir",
local_store_dir_flag,
advanced=True,
help=(
f"Directory to use for the local file store, which stores the results of "
Expand All @@ -715,7 +771,46 @@ def register_bootstrap_options(cls, register):
# This default is also hard-coded into the engine's rust code in
# fs::Store::default_path so that tools using a Store outside of pants
# are likely to be able to use the same storage location.
default=os.path.join(get_pants_cachedir(), "lmdb_store"),
default=DEFAULT_LOCAL_STORE_OPTIONS.store_dir,
)
register(
"--local-store-processes-max-size-bytes",
type=int,
advanced=True,
help=(
"The maximum size in bytes of the local store containing process cache entries. "
f"Stored below `{local_store_dir_flag}`."
),
default=DEFAULT_LOCAL_STORE_OPTIONS.processes_max_size_bytes,
)
register(
"--local-store-files-max-size-bytes",
type=int,
advanced=True,
help=(
"The maximum size in bytes of the local store containing files. "
f"Stored below `{local_store_dir_flag}`."
"\n\n"
"NB: This size value bounds the total size of all files, but (due to sharding of the "
"store on disk) it also bounds the per-file size to (VALUE / 16)."
stuhood marked this conversation as resolved.
Show resolved Hide resolved
"\n\n"
"This value doesn't reflect space allocated on disk, or RAM allocated (it "
"may be reflected in VIRT but not RSS). However, the default is lower than you "
"might otherwise choose because macOS creates core dumps that include MMAP'd "
"pages, and setting this too high might cause core dumps to use an unreasonable "
"amount of disk if they are enabled."
),
default=DEFAULT_LOCAL_STORE_OPTIONS.files_max_size_bytes,
)
register(
"--local-store-directories-max-size-bytes",
type=int,
advanced=True,
help=(
"The maximum size in bytes of the local store containing directories. "
f"Stored below `{local_store_dir_flag}`."
),
default=DEFAULT_LOCAL_STORE_OPTIONS.directories_max_size_bytes,
)
Comment on lines +771 to 809
Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One potential alternative to exposing all three options would be to expose one option, and to then bake in a ratio.

register(
"--named-caches-dir",
Expand Down
7 changes: 5 additions & 2 deletions src/python/pants/pantsd/pants_daemon.py
Expand Up @@ -21,7 +21,7 @@
from pants.init.engine_initializer import GraphScheduler
from pants.init.logging import initialize_stdio, pants_log_path
from pants.init.util import init_workdir
from pants.option.global_options import GlobalOptions
from pants.option.global_options import GlobalOptions, LocalStoreOptions
from pants.option.option_value_container import OptionValueContainer
from pants.option.options import Options
from pants.option.options_bootstrapper import OptionsBootstrapper
Expand Down Expand Up @@ -102,7 +102,10 @@ def _setup_services(
max_memory_usage_in_bytes=bootstrap_options.pantsd_max_memory_usage,
)

store_gc_service = StoreGCService(graph_scheduler.scheduler)
store_gc_service = StoreGCService(
graph_scheduler.scheduler,
local_store_options=LocalStoreOptions.from_options(bootstrap_options),
)
return PantsServices(services=(scheduler_service, store_gc_service))

def __init__(
Expand Down