Fix PytestRun to handle multiple source roots. (#5400)

Although an attempt was made to handle multiple source roots when mapping python source paths for `py.test` and `coverage`, there were ambiguities in the `coverage` mapping in particular leading to the inability to run tests and collect coverage for code under test across all python source roots. The python task pipeline is amended to produce a source pex per source root, allowing `PytestRun` in turn to be source root aware in its execution of coverage reports and a multi-source-root test is added to ensure this case is handled. In addition, resource targets are duplicated to all source pexes containing python code that needs access to the resources. Previously, unrelated resources could also be added to the source pexes (e.g: a Java resource) and this is fixed as well. Fixes #5314 Fixes #5401
pantsbuild · Jan 28, 2018 · 1a4dcfc · 1a4dcfc
1 parent ef15589
commit 1a4dcfc
Show file tree

Hide file tree

Showing 7 changed files with 441 additions and 302 deletions.
diff --git a/src/python/pants/backend/python/tasks/gather_sources.py b/src/python/pants/backend/python/tasks/gather_sources.py
@@ -6,13 +6,14 @@
                         unicode_literals, with_statement)
 
 import os
+from collections import OrderedDict
 
 from pex.interpreter import PythonInterpreter
 from pex.pex import PEX
 from pex.pex_builder import PEXBuilder
 
 from pants.backend.python.tasks.pex_build_util import (dump_sources, has_python_sources,
-                                                       has_resources)
+                                                       has_resources, is_python_target)
 from pants.invalidation.cache_manager import VersionedTargetSet
 from pants.task.task import Task
 from pants.util.dirutil import safe_concurrent_creation
@@ -21,32 +22,100 @@
 class GatherSources(Task):
   """Gather local Python sources.
 
-  Creates an (unzipped) PEX on disk containing the local Python sources.
-  This PEX can be merged with a requirements PEX to create a unified Python environment
+  Creates one or more (unzipped) PEXs on disk containing the local Python sources.
+  These PEXes can be merged with a requirements PEX to create a unified Python environment
   for running the relevant python code.
   """
-  PYTHON_SOURCES = 'python_sources'
+
+  class PythonSources(object):
+    """A mapping of unzipped source PEXs by the targets whose sources the PEXs contain."""
+
+    class UnmappedTargetError(Exception):
+      """Indicates that no python source pex could be found for a given target."""
+
+    def __init__(self, pex_by_target_base):
+      self._pex_by_target_base = pex_by_target_base
+
+    def for_target(self, target):
+      """Return the unzipped PEX containing the given target's sources.
+
+      :returns: An unzipped PEX containing at least the given target's sources.
+      :rtype: :class:`pex.pex.PEX`
+      :raises: :class:`GatherSources.PythonSources.UnmappedTargetError` if no pex containing the
+               given target's sources could be found.
+      """
+      pex = self._pex_by_target_base.get(target.target_base)
+      if pex is None:
+        raise self.UnmappedTargetError()
+      return pex
+
+    def all(self):
+      """Return all the unzipped source PEXs needed for this round."""
+      return self._pex_by_target_base.values()
 
   @classmethod
   def implementation_version(cls):
-    return super(GatherSources, cls).implementation_version() + [('GatherSources', 3)]
+    return super(GatherSources, cls).implementation_version() + [('GatherSources', 4)]
 
   @classmethod
   def product_types(cls):
-    return [cls.PYTHON_SOURCES]
+    return [cls.PythonSources]
 
   @classmethod
   def prepare(cls, options, round_manager):
     round_manager.require_data(PythonInterpreter)
     round_manager.require_data('python')  # For codegen.
 
   def execute(self):
-    targets = self.context.targets(predicate=lambda t: has_python_sources(t) or has_resources(t))
     interpreter = self.context.products.get_data(PythonInterpreter)
 
-    with self.invalidated(targets) as invalidation_check:
-      pex = self._get_pex_for_versioned_targets(interpreter, invalidation_check.all_vts)
-      self.context.products.register_data(self.PYTHON_SOURCES, pex)
+    pex_by_target_base = OrderedDict()  # Preserve ~PYTHONPATH ordering over pexes.
+    for target_base, targets in self._iter_targets_by_base():
+      with self.invalidated(targets) as invalidation_check:
+        pex = self._get_pex_for_versioned_targets(interpreter, invalidation_check.all_vts)
+        pex_by_target_base[target_base] = pex
+    self.context.products.register_data(self.PythonSources, self.PythonSources(pex_by_target_base))
+
+  def _iter_targets_by_base(self):
+    # N.B: Files and Resources targets belong with the consuming (dependee) targets so that those
+    # targets can be ensured of access to the files in their PEX chroot. This means a given Files
+    # or Resources target could be embedded in multiple pexes.
+
+    context = self.context
+    python_target_addresses = [p.address for p in context.targets(predicate=is_python_target)]
+
+    targets_by_base = OrderedDict()  # Preserve ~PYTHONPATH ordering over source roots.
+    resource_targets = set()
+
+    def collect_source_targets(target):
+      if has_python_sources(target):
+        targets = targets_by_base.get(target.target_base)
+        if targets is None:
+          targets = set()
+          targets_by_base[target.target_base] = targets
+        targets.add(target)
+      elif has_resources(target):
+        resource_targets.add(target)
+
+    build_graph = context.build_graph
+    build_graph.walk_transitive_dependency_graph(addresses=python_target_addresses,
+                                                 work=collect_source_targets)
+
+    for resource_target in resource_targets:
+      dependees = build_graph.transitive_dependees_of_addresses([resource_target.address])
+      for target_base, targets in targets_by_base.items():
+        for dependee in dependees:
+          if dependee in targets:
+            # N.B.: This can add the resource to too many pexes. A canonical example is
+            # test -> lib -> resource where test and lib have separate source roots. In this case
+            # the resource is added to both the test pex and the lib pex and it's only needed in the
+            # lib pex. The upshot is we allow python code access to undeclared (ie: indirect)
+            # resource dependencies which is no worse than historical precedent, but could be
+            # improved with a more complex algorithm.
+            targets.add(resource_target)
+            break
+
+    return targets_by_base.items()
 
   def _get_pex_for_versioned_targets(self, interpreter, versioned_targets):
     if versioned_targets:

diff --git a/src/python/pants/backend/python/tasks/pex_build_util.py b/src/python/pants/backend/python/tasks/pex_build_util.py
@@ -23,11 +23,15 @@
 from pants.python.python_repos import PythonRepos
 
 
-def has_python_sources(tgt):
+def is_python_target(tgt):
   # We'd like to take all PythonTarget subclasses, but currently PythonThriftLibrary and
   # PythonAntlrLibrary extend PythonTarget, and until we fix that (which we can't do until
   # we remove the old python pipeline entirely) we want to ignore those target types here.
-  return isinstance(tgt, (PythonLibrary, PythonTests, PythonBinary)) and tgt.has_sources()
+  return isinstance(tgt, (PythonLibrary, PythonTests, PythonBinary))
+
+
+def has_python_sources(tgt):
+  return is_python_target(tgt) and tgt.has_sources()
 
 
 def has_resources(tgt):

diff --git a/src/python/pants/backend/python/tasks/pytest_run.py b/src/python/pants/backend/python/tasks/pytest_run.py
@@ -11,11 +11,13 @@
 import time
 import traceback
 import uuid
+from collections import OrderedDict
 from contextlib import contextmanager
 from textwrap import dedent
 
 from six import StringIO
 from six.moves import configparser
+from twitter.common.collections import OrderedSet
 
 from pants.backend.python.targets.python_tests import PythonTests
 from pants.backend.python.tasks.gather_sources import GatherSources
@@ -192,28 +194,23 @@ def _debug(self):
     return self.get_options().level == 'debug'
 
   def _generate_coverage_config(self, source_mappings):
-    # For the benefit of macos testing, add the 'real' path the directory as an equivalent.
-    def add_realpath(path):
-      realpath = os.path.realpath(path)
-      if realpath != canonical and realpath not in alternates:
-        realpaths.add(realpath)
-
     cp = configparser.SafeConfigParser()
     cp.readfp(StringIO(self.DEFAULT_COVERAGE_CONFIG))
 
     # We use the source_mappings to setup the `combine` coverage command to transform paths in
     # coverage data files into canonical form.
     # See the "[paths]" entry here: http://nedbatchelder.com/code/coverage/config.html for details.
     cp.add_section('paths')
-    for canonical, alternates in source_mappings.items():
+    for canonical, alternate in source_mappings.items():
       key = canonical.replace(os.sep, '.')
-      realpaths = set()
-      add_realpath(canonical)
-      for path in alternates:
-        add_realpath(path)
-      cp.set('paths',
-             key,
-             self._format_string_list([canonical] + list(alternates) + list(realpaths)))
+
+      # For the benefit of macos testing, add the 'real' paths as equivalents.
+      paths = OrderedSet([canonical,
+                          alternate,
+                          os.path.realpath(canonical),
+                          os.path.realpath(alternate)])
+
+      cp.set('paths', key, self._format_string_list(paths))
 
     # See the debug options here: http://nedbatchelder.com/code/coverage/cmd.html#cmd-run-debug
     if self._debug:
@@ -250,14 +247,15 @@ def _maybe_emit_coverage_data(self, workdirs, targets, pex):
       yield []
       return
 
-    pex_src_root = os.path.relpath(self._source_chroot_path, get_buildroot())
+    def pex_src_root(tgt):
+      return os.path.relpath(self._source_chroot_path([tgt]), get_buildroot())
 
     source_mappings = {}
     for target in targets:
       libs = (tgt for tgt in target.closure()
               if tgt.has_sources('.py') and not isinstance(tgt, PythonTests))
       for lib in libs:
-        source_mappings[lib.target_base] = [pex_src_root]
+        source_mappings[lib.target_base] = pex_src_root(lib)
 
     def ensure_trailing_sep(path):
       return path if path.endswith(os.path.sep) else path + os.path.sep
@@ -287,13 +285,13 @@ def compute_coverage_sources(tgt):
           rel_source = os.path.relpath(source, get_buildroot())
           rel_source = ensure_trailing_sep(rel_source)
           found_target_base = False
-          for target_base in source_mappings:
+          for target_base, pex_root in source_mappings.items():
             prefix = ensure_trailing_sep(target_base)
             if rel_source.startswith(prefix):
               # ... rel_source will match on prefix=src/python/ ...
               suffix = rel_source[len(prefix):]
               # ... suffix will equal foo/bar ...
-              coverage_sources.append(os.path.join(pex_src_root, suffix))
+              coverage_sources.append(os.path.join(pex_root, suffix))
               found_target_base = True
               # ... and we end up appending <pex_src_root>/foo/bar to the coverage_sources.
               break
@@ -312,8 +310,11 @@ def compute_coverage_sources(tgt):
         env = {
           'PEX_MODULE': 'coverage.cmdline:main'
         }
-        def pex_run(arguments):
-          return self._pex_run(pex, workunit_name='coverage', args=arguments, env=env)
+        def coverage_run(subcommand, arguments):
+          return self._pex_run(pex,
+                               workunit_name='coverage-{}'.format(subcommand),
+                               args=[subcommand] + arguments,
+                               env=env)
 
         # On failures or timeouts, the .coverage file won't be written.
         if not os.path.exists('.coverage'):
@@ -323,13 +324,15 @@ def pex_run(arguments):
           # This swaps the /tmp pex chroot source paths for the local original source paths
           # the pex was generated from and which the user understands.
           shutil.move('.coverage', '.coverage.raw')
-          pex_run(['combine', '--rcfile', coverage_rc])
-          pex_run(['report', '-i', '--rcfile', coverage_rc])
+          # N.B.: This transforms the contents of .coverage.raw and moves it back into .coverage.
+          coverage_run('combine', ['--rcfile', coverage_rc])
+
+          coverage_run('report', ['-i', '--rcfile', coverage_rc])
 
           coverage_workdir = workdirs.coverage_path
-          pex_run(['html', '-i', '--rcfile', coverage_rc, '-d', coverage_workdir])
+          coverage_run('html', ['-i', '--rcfile', coverage_rc, '-d', coverage_workdir])
           coverage_xml = os.path.join(coverage_workdir, 'coverage.xml')
-          pex_run(['xml', '-i', '--rcfile', coverage_rc, '-o', coverage_xml])
+          coverage_run('xml', ['-i', '--rcfile', coverage_rc, '-o', coverage_xml])
 
   def _get_shard_conftest_content(self):
     shard_spec = self.get_options().test_shard
@@ -386,7 +389,7 @@ def _get_conftest_content(self, sources_map):
       import pytest
 
       # Map from source path relative to chroot -> source path relative to buildroot.
-      _SOURCES_MAP = {}
+      _SOURCES_MAP = {!r}
 
       @pytest.hookimpl(hookwrapper=True)
       def pytest_runtest_protocol(item, nextitem):
@@ -400,7 +403,7 @@ def pytest_runtest_protocol(item, nextitem):
           yield
         finally:
           item._nodeid = real_nodeid
-    """.format(sources_map))
+    """.format(dict(sources_map)))
     # Add in the sharding conftest, if any.
     shard_conftest_content = self._get_shard_conftest_content()
     return (console_output_conftest_content + shard_conftest_content).encode('utf8')
@@ -463,7 +466,7 @@ def _do_run_tests_with_args(self, pex, args):
       return PytestResult.exception()
 
   def _map_relsrc_to_targets(self, targets):
-    pex_src_root = os.path.relpath(self._source_chroot_path, get_buildroot())
+    pex_src_root = os.path.relpath(self._source_chroot_path(targets), get_buildroot())
     # First map chrooted sources back to their targets.
     relsrc_to_target = {os.path.join(pex_src_root, src): target for target in targets
       for src in target.sources_relative_to_source_root()}
@@ -505,7 +508,15 @@ def _iter_partitions(self, targets):
     # TODO(John Sirois): Consume `py.test` pexes matched to the partitioning in effect after
     # https://github.com/pantsbuild/pants/pull/4638 lands.
     if self.get_options().fast:
-      yield tuple(targets)
+      targets_by_target_base = OrderedDict()
+      for target in targets:
+        targets_for_base = targets_by_target_base.get(target.target_base)
+        if targets_for_base is None:
+          targets_for_base = []
+          targets_by_target_base[target.target_base] = targets_for_base
+        targets_for_base.append(target)
+      for targets in targets_by_target_base.values():
+        yield tuple(targets)
     else:
       for target in targets:
         yield (target,)
@@ -685,10 +696,10 @@ def _run_pytest(self, workdirs, targets):
     if self._run_in_chroot:
       path_func = lambda rel_src: rel_src
     else:
-      source_chroot = os.path.relpath(self._source_chroot_path, get_buildroot())
+      source_chroot = os.path.relpath(self._source_chroot_path(targets), get_buildroot())
       path_func = lambda rel_src: os.path.join(source_chroot, rel_src)
 
-    sources_map = {}  # Path from chroot -> Path from buildroot.
+    sources_map = OrderedDict()  # Path from chroot -> Path from buildroot.
     for t in targets:
       for p in t.sources_relative_to_source_root():
         sources_map[path_func(p)] = os.path.join(t.target_base, p)
@@ -726,7 +737,8 @@ def _run_pytest(self, workdirs, targets):
       if os.path.exists(junitxml_path):
         os.unlink(junitxml_path)
 
-      result = self._do_run_tests_with_args(pex, args)
+      with self._maybe_run_in_chroot(targets):
+        result = self._do_run_tests_with_args(pex, args)
 
       # There was a problem prior to test execution preventing junit xml file creation so just let
       # the failure result bubble.
@@ -748,9 +760,16 @@ def parse_error_handler(parse_error):
 
       return result.with_failed_targets(failed_targets)
 
-  @memoized_property
-  def _source_chroot_path(self):
-    return self.context.products.get_data(GatherSources.PYTHON_SOURCES).path()
+  def _source_chroot_path(self, targets):
+    if len(targets) > 1:
+      target_bases = {target.target_base for target in targets}
+      assert len(target_bases) == 1, ('Expected targets to live in the same source root, given '
+                                      'targets living under the following source roots: {}'
+                                      .format(', '.join(sorted(target_bases))))
+    representative_target = targets[0]
+
+    python_sources = self.context.products.get_data(GatherSources.PythonSources)
+    return python_sources.for_target(representative_target).path()
 
   def _pex_run(self, pex, workunit_name, args, env):
     with self.context.new_workunit(name=workunit_name,
@@ -764,21 +783,20 @@ def _run_in_chroot(self):
     return self.get_options().chroot
 
   @contextmanager
-  def _maybe_run_in_chroot(self):
+  def _maybe_run_in_chroot(self, targets):
     if self._run_in_chroot:
-      with pushd(self._source_chroot_path):
+      with pushd(self._source_chroot_path(targets)):
         yield
     else:
       yield
 
   def _spawn(self, pex, workunit, args, setsid=False, env=None):
-    with self._maybe_run_in_chroot():
-      env = env or {}
-      process = pex.run(args,
-                        with_chroot=False,  # We handle chrooting ourselves.
-                        blocking=False,
-                        setsid=setsid,
-                        env=env,
-                        stdout=workunit.output('stdout'),
-                        stderr=workunit.output('stderr'))
-      return SubprocessProcessHandler(process)
+    env = env or {}
+    process = pex.run(args,
+                      with_chroot=False,  # We handle chrooting ourselves.
+                      blocking=False,
+                      setsid=setsid,
+                      env=env,
+                      stdout=workunit.output('stdout'),
+                      stderr=workunit.output('stderr'))
+    return SubprocessProcessHandler(process)