Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid capturing Snapshots for previously digested codegen outputs #7241

@@ -3,10 +3,12 @@

'elements.proto', # Order matters here.
# NB: Order matters for these two paths, so we set `ordered_sources=True` below.
@@ -32,6 +32,7 @@ def __init__(self,
:param string service_writer: the name of the class to pass as the --service_writer option to
@@ -43,6 +44,9 @@ def __init__(self,
doubt, specify com.squareup.wire.SimpleServiceWriter
:param list enum_options: list of enums to pass to as the --enum-enum_options option, # optional
:param boolean no_options: boolean that determines if --no_options flag is passed
:param boolean ordered_sources: boolean that declares whether the sources argument represents
literal ordered sources to be passed directly to the compiler. If false, no ordering is
guaranteed for the sources passed to an individual compiler invoke.

if not service_writer and service_writer_options:
@@ -59,6 +63,7 @@ def __init__(self,
'registry_class': PrimitiveField(registry_class or None),
'enum_options': PrimitiveField(enum_options or []),
'no_options': PrimitiveField(no_options or False),
'ordered_sources': PrimitiveField(ordered_sources or False),

super(JavaWireLibrary, self).__init__(payload=payload, **kwargs)
@@ -13,10 +13,12 @@
from pants.backend.jvm.targets.java_library import JavaLibrary
from pants.backend.jvm.tasks.nailgun_task import NailgunTaskBase
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
from pants.base.exceptions import TargetDefinitionException, TaskError
from pants.base.workunit import WorkUnitLabel
from import JarDependency
from pants.source.filespec import globs_matches
from pants.task.simple_codegen_task import SimpleCodegenTask
from pants.util.dirutil import fast_relpath

logger = logging.getLogger(__name__)
@@ -61,24 +63,47 @@ def synthetic_target_extra_dependencies(self, target, target_workdir):
wire_runtime_deps_spec = self.get_options().javadeps
return self.resolve_deps([wire_runtime_deps_spec])

def format_args_for_target(self, target, target_workdir):
"""Calculate the arguments to pass to the command line for a single target."""
sources = OrderedSet(target.sources_relative_to_buildroot())

def _compute_sources(self, target):
relative_sources = OrderedSet()
source_roots = set()
for source in sources:
source_roots = OrderedSet()

def capture_and_relativize_to_source_root(source):
source_root = self.context.source_roots.find_by_path(source)
if not source_root:
source_root = self.context.source_roots.find(target)
relative_source = os.path.relpath(source, source_root.path)
return fast_relpath(source, source_root.path)

if target.payload.get_field_value('ordered_sources'):

This comment has been minimized.

Copy link

illicitonion Feb 15, 2019


It feels like we should probably not have this option, and just treat wire as always having ordered_sources=True; this just feels like a footgun.

This comment has been minimized.

Copy link

stuhood Feb 15, 2019

Author Member

The issue with that is that it will fail for something like globs('*'), where we cannot apply ordering.

This comment has been minimized.

Copy link

stuhood Feb 15, 2019

Author Member

My hope is that most consumers of this tool use it in ways that don't care about ordering. If I'm wrong there, can reevaluate.

# Re-match the filespecs against the sources in order to apply them in the literal order
# they were specified in.
filespec = target.globs_relative_to_buildroot()
excludes = filespec.get('excludes', [])
for filespec in filespec.get('globs', []):
sources = [s for s in target.sources_relative_to_buildroot()
if globs_matches([s], [filespec], excludes)]
if len(sources) != 1:
raise TargetDefinitionException(
'With `ordered_sources=True`, expected one match for each file literal, '
'but got: {} for literal `{}`.'.format(sources, filespec)
# Otherwise, use the default (unspecified) snapshot ordering.
for source in target.sources_relative_to_buildroot():
return relative_sources, source_roots

def format_args_for_target(self, target, target_workdir):
"""Calculate the arguments to pass to the command line for a single target."""

args = ['--java_out={0}'.format(target_workdir)]

# Add all params in payload to args

relative_sources, source_roots = self._compute_sources(target)

if target.payload.get_field_value('no_options'):

@@ -40,7 +40,7 @@ def console_output(self, targets):
input_snapshots = tuple(
target.sources_snapshot(scheduler=self.context._scheduler) for target in targets
input_files = {f.path for snapshot in input_snapshots for f in snapshot.files}
input_files = {f for snapshot in input_snapshots for f in snapshot.files}

# TODO: Work out a nice library-like utility for writing an argfile, as this will be common.
with temporary_dir() as tmpdir:
@@ -209,8 +209,8 @@ def _execute_hermetic_compile(self, cmd, ctx):
# Assume no extra .class files to grab. We'll fix up that case soon.
# Drop the source_root from the file path.
# Assumes `-d .` has been put in the command.
os.path.relpath(f.path.replace('.java', '.class'),
for f in input_snapshot.files if f.path.endswith('.java')
os.path.relpath(f.replace('.java', '.class'),
for f in input_snapshot.files if f.endswith('.java')
exec_process_request = ExecuteProcessRequest(
@@ -33,8 +33,7 @@
from import JarDependency
from pants.reporting.reporting_utils import items_to_report_element
from pants.util.contextutil import Timer
from pants.util.dirutil import (fast_relpath, fast_relpath_optional, maybe_read_file,
safe_file_dump, safe_mkdir)
from pants.util.dirutil import fast_relpath, fast_relpath_optional, safe_mkdir
from pants.util.memo import memoized_property

@@ -60,22 +59,6 @@ def stdout_contents(wu):

def write_digest(output_dir, digest):
payload='{}:{}'.format(digest.fingerprint, digest.serialized_bytes_length))

def load_digest(output_dir):
read_file = maybe_read_file('{}.digest'.format(output_dir), binary_mode=False)
if read_file:
fingerprint, length = read_file.split(':')
return Digest(fingerprint, int(length))
return None

def _create_desandboxify_fn(possible_path_patterns):
# Takes a collection of possible canonical prefixes, and returns a function that
# if it finds a matching prefix, strips the path prior to the prefix and returns it
@@ -132,7 +115,7 @@ def __init__(self, *args, **kwargs):

def implementation_version(cls):
return super(RscCompile, cls).implementation_version() + [('RscCompile', 171)]
return super(RscCompile, cls).implementation_version() + [('RscCompile', 172)]

def register_options(cls, register):
@@ -218,7 +201,7 @@ def pathglob_for(filename):
def to_classpath_entries(paths, scheduler):
# list of path ->
# list of (path, optional<digest>) ->
path_and_digests = [(p, load_digest(os.path.dirname(p))) for p in paths]
path_and_digests = [(p, Digest.load(os.path.dirname(p))) for p in paths]
# partition: list of path, list of tuples
paths_without_digests = [p for (p, d) in path_and_digests if not d]
if paths_without_digests:
@@ -825,7 +808,7 @@ def _runtool_hermetic(self, main, tool_name, args, distribution, tgt=None, input
raise TaskError(res.stderr)

if output_dir:
write_digest(output_dir, res.output_directory_digest)
# NB the first element here is the root to materialize into, not the dir to snapshot
@@ -64,7 +64,7 @@ def run_python_test(transitive_hydrated_target, pytest):
# pex27, where it should be hermetically provided in some way.
output_pytest_requirements_pex_filename = 'pytest-with-requirements.pex'
requirements_pex_argv = [
'--python', python_binary,
'-e', 'pytest:main',
'-o', output_pytest_requirements_pex_filename,
@@ -219,7 +219,7 @@ def addresses_from_address_families(address_mapper, specs):
# Capture a Snapshot covering all paths for these Specs, then group by directory.
snapshot = yield Get(Snapshot, PathGlobs, _spec_to_globs(address_mapper, specs))
dirnames = {dirname(f.stat.path) for f in snapshot.files}
dirnames = {dirname(f) for f in snapshot.files}
address_families = yield [Get(AddressFamily, Dir(d)) for d in dirnames]
address_family_by_directory = {af.namespace: af for af in address_families}

@@ -4,14 +4,16 @@

from __future__ import absolute_import, division, print_function, unicode_literals

import os

from future.utils import binary_type, text_type

from pants.base.project_tree import Dir, File
from pants.engine.objects import Collection
from pants.engine.rules import RootRule
from pants.option.custom_types import GlobExpansionConjunction
from pants.option.global_options import GlobMatchErrorBehavior
from pants.util.objects import datatype
from pants.util.dirutil import maybe_read_file, safe_delete, safe_file_dump
from pants.util.objects import Exactly, datatype

class FileContent(datatype([('path', text_type), ('content', binary_type)])):
@@ -62,10 +64,6 @@ def __new__(cls, include, exclude=(), glob_match_error_behavior=None, conjunctio
conjunction=GlobExpansionConjunction.create(conjunction, none_is_default=True))

class PathGlobsAndRoot(datatype([('path_globs', PathGlobs), ('root', text_type)])):

class Digest(datatype([('fingerprint', text_type), ('serialized_bytes_length', int)])):
"""A Digest is a content-digest fingerprint, and a length of underlying content.
@@ -84,6 +82,33 @@ class Digest(datatype([('fingerprint', text_type), ('serialized_bytes_length', i

def _path(cls, directory):
return '{}.digest'.format(directory.rstrip(os.sep))

def clear(cls, directory):
"""Clear any existing Digest file adjacent to the given directory."""

def load(cls, directory):
"""Load a Digest from a `.digest` file adjacent to the given directory.
:return: A Digest, or None if the Digest did not exist.
read_file = maybe_read_file(cls._path(directory), binary_mode=False)
if read_file:
fingerprint, length = read_file.split(':')
return Digest(fingerprint, int(length))
return None

def dump(self, directory):
"""Dump this Digest object adjacent to the given directory."""
payload = '{}:{}'.format(self.fingerprint, self.serialized_bytes_length)
safe_file_dump(self._path(directory), payload=payload, mode='w')

def __repr__(self):
return '''Digest(fingerprint={}, serialized_bytes_length={})'''.format(
@@ -94,8 +119,25 @@ def __str__(self):
return repr(self)

class Snapshot(datatype([('directory_digest', Digest), ('path_stats', tuple)])):
"""A Snapshot is a collection of Files and Dirs fingerprinted by their names/content.
class PathGlobsAndRoot(datatype([
('path_globs', PathGlobs),
('root', text_type),
('digest_hint', Exactly(Digest, type(None))),
"""A set of PathGlobs to capture relative to some root (which may exist outside of the buildroot).
If the `digest_hint` is set, it must be the Digest that we would expect to get if we were to
expand and Digest the globs. The hint is an optimization that allows for bypassing filesystem
operations in cases where the expected Digest is known, and the content for the Digest is already

def __new__(cls, path_globs, root, digest_hint=None):
return super(PathGlobsAndRoot, cls).__new__(cls, path_globs, root, digest_hint)

class Snapshot(datatype([('directory_digest', Digest), ('files', tuple), ('dirs', tuple)])):
"""A Snapshot is a collection of file paths and dir paths fingerprinted by their names/content.
Snapshots are used to make it easier to isolate process execution by fixing the contents
of the files being operated on and easing their movement to and from isolated execution
@@ -106,22 +148,6 @@ class Snapshot(datatype([('directory_digest', Digest), ('path_stats', tuple)])):
def is_empty(self):
return self == EMPTY_SNAPSHOT

def dirs(self):
return [p for p in self.path_stats if type(p.stat) == Dir]

def dir_stats(self):
return [p.stat for p in self.dirs]

def files(self):
return [p for p in self.path_stats if type(p.stat) == File]

def file_stats(self):
return [p.stat for p in self.files]

class MergedDirectories(datatype([('directories', tuple)])):
@@ -150,7 +176,8 @@ class UrlToFetch(datatype([('url', text_type), ('digest', Digest)])):


@@ -690,10 +690,6 @@ def new_scheduler(self,
@@ -722,10 +718,6 @@ def tc(constraint):
# TypeConstraints.
@@ -14,8 +14,7 @@
from pants.base.project_tree import Dir, File, Link
from pants.build_graph.address import Address
from pants.engine.fs import (Digest, DirectoryToMaterialize, FileContent, FilesContent,
MergedDirectories, Path, PathGlobs, PathGlobsAndRoot, Snapshot,
MergedDirectories, PathGlobs, PathGlobsAndRoot, Snapshot, UrlToFetch)
from pants.engine.isolated_process import ExecuteProcessRequest, FallibleExecuteProcessResult
from pants.engine.native import Function, TypeConstraint, TypeId
from pants.engine.nodes import Return, Throw
@@ -101,10 +100,6 @@ def __init__(
@@ -9,6 +9,9 @@

def glob_to_regex(pattern):
"""Given a glob pattern, return an equivalent regex expression.
TODO: Replace with implementation in ``. See
:param string glob: The glob pattern. "**" matches 0 or more dirs recursively.
"*" only matches patterns in a single dir.
:returns: A regex string that matches same paths as the input glob does.
@@ -99,8 +99,10 @@ def files(self):

def files_relative_to_buildroot(self):
fds = self._snapshot.path_stats if self._include_dirs else self._snapshot.files
return tuple(fd.path for fd in fds)
res = self._snapshot.files
if self._include_dirs:
res += self._snapshot.dirs
return res

def files_hash(self):
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.