pantsbuild · stuhood · Jul 26, 2019 · Jul 22, 2019 · Jul 23, 2019 · Jul 24, 2019
diff --git a/src/python/pants/backend/jvm/tasks/analysis_extraction.py b/src/python/pants/backend/jvm/tasks/analysis_extraction.py
@@ -1,17 +1,20 @@
 # Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
+import io
 import json
 import os
-from collections import defaultdict
+import re
+import subprocess
+from contextlib import contextmanager
 
 from pants.backend.jvm.subsystems.dependency_context import DependencyContext
 from pants.backend.jvm.subsystems.zinc import Zinc
 from pants.backend.jvm.tasks.nailgun_task import NailgunTask
-from pants.base.build_environment import get_buildroot
-from pants.base.exceptions import TaskError
-from pants.base.workunit import WorkUnitLabel
-from pants.goal.products import MultipleRootedProducts
+from pants.base.deprecated import deprecated_conditional
+from pants.java.distribution.distribution import DistributionLocator
+from pants.java.executor import SubprocessExecutor
+from pants.util.contextutil import temporary_dir
 from pants.util.memo import memoized_property
 
 
@@ -33,12 +36,11 @@ def register_options(cls, register):
   @classmethod
   def prepare(cls, options, round_manager):
     super().prepare(options, round_manager)
-    round_manager.require_data('zinc_analysis')
     round_manager.require_data('runtime_classpath')
 
   @classmethod
   def product_types(cls):
-    return ['classes_by_source', 'product_deps_by_src']
+    return ['product_deps_by_target', 'classes_by_source', 'product_deps_by_src']
 
   def _create_products_if_should_run(self):
     """If this task should run, initialize empty products that it will populate.
@@ -47,106 +49,126 @@ def _create_products_if_should_run(self):
     """
 
     should_run = False
-    if self.context.products.is_required_data('classes_by_source'):
+    if self.context.products.is_required_data('product_deps_by_target'):
       should_run = True
-      make_products = lambda: defaultdict(MultipleRootedProducts)
-      self.context.products.safe_create_data('classes_by_source', make_products)
-
-    if self.context.products.is_required_data('product_deps_by_src'):
-      should_run = True
-      self.context.products.safe_create_data('product_deps_by_src', dict)
+      self.context.products.safe_create_data('product_deps_by_target', dict)
     return should_run
 
   @memoized_property
   def _zinc(self):
     return Zinc.Factory.global_instance().create(self.context.products, self.get_options().execution_strategy)
 
-  def _summary_json_file(self, vt):
-    return os.path.join(vt.results_dir, 'summary.json')
+  def _jdeps_output_json(self, vt):
+    return os.path.join(vt.results_dir, 'jdeps_output.json')
 
-  @memoized_property
-  def _analysis_by_runtime_entry(self):
-    zinc_analysis = self.context.products.get_data('zinc_analysis')
-    return {cp_entry: analysis_file for _, cp_entry, analysis_file in zinc_analysis.values()}
+  @contextmanager
+  def aliased_classpaths(self, classpaths):
+    """
+    Create unique names for each classpath entry as symlinks in
+    a temporary directory.  returns: dict[str -> classpath entry]
+    which maps string paths of symlinks to classpaths.
+
+    ClasspathEntries generally point to a .jar of
+    the .class files generated for java_library targets.
+    These jars all have the same basename, z.jar, which
+    confuses the `jdeps` tool. Jdeps expects unique, and
+    descriptive, basenames for jars. When all basenames are
+    the same the deps collide in the jdeps output, some
+    .class files can't be found and the summary output
+    is not complete.
+    """
+    with temporary_dir() as tempdir:
+      aliases = {}
+      for i, cp in enumerate(classpaths):
+        alias = os.path.join(tempdir, f"{i}.jar" if not os.path.isdir(cp) else f"{i}")
+        os.symlink(cp, alias)
+        aliases[alias] = cp
+      yield aliases
 
   def execute(self):
     # If none of our computed products are necessary, return immediately.
+    deprecated_conditional(
+      lambda: self.context.products.is_required_data('classes_by_source'),
+      '1.20.0.dev2',
+      'The `classes_by_source` product depends on internal compiler details and is no longer produced.'
+    )
+    deprecated_conditional(
+      lambda: self.context.products.is_required_data('product_deps_by_src'),
+      '1.20.0.dev2',
+      'The `product_deps_by_src` product depends on internal compiler details and is no longer produced. '
+      'For similar functionality consume `product_deps_by_target`.'
+    )
+
     if not self._create_products_if_should_run():
       return
 
-    zinc_analysis = self.context.products.get_data('zinc_analysis')
     classpath_product = self.context.products.get_data('runtime_classpath')
-    classes_by_source = self.context.products.get_data('classes_by_source')
-    product_deps_by_src = self.context.products.get_data('product_deps_by_src')
+    product_deps_by_target = self.context.products.get_data('product_deps_by_target')
 
     fingerprint_strategy = DependencyContext.global_instance().create_fingerprint_strategy(
         classpath_product)
 
-    targets = list(zinc_analysis.keys())
+    # classpath fingerprint strategy only works on targets with a classpath.
+    targets = [target for target in self.context.targets() if hasattr(target, 'strict_deps')]
     with self.invalidated(targets,
                           fingerprint_strategy=fingerprint_strategy,
                           invalidate_dependents=True) as invalidation_check:
-      # Extract and parse products for any relevant targets.
       for vt in invalidation_check.all_vts:
-        summary_json_file = self._summary_json_file(vt)
-        cp_entry, _, analysis_file = zinc_analysis[vt.target]
+        # A list of class paths to the artifacts created by the target we are computing deps for.
+        target_artifact_classpaths = [path for _, path in classpath_product.get_for_target(vt.target)]
+        potential_deps_classpaths = self._zinc.compile_classpath('runtime_classpath', vt.target)
+
+        jdeps_output_json = self._jdeps_output_json(vt)
         if not vt.valid:
-          self._extract_analysis(vt.target, analysis_file, summary_json_file)
+          self._run_jdeps_analysis(
+            vt.target,
+            target_artifact_classpaths,
+            potential_deps_classpaths,
+            jdeps_output_json
+          )
         self._register_products(vt.target,
-                                cp_entry,
-                                summary_json_file,
-                                classes_by_source,
-                                product_deps_by_src)
-
-  def _extract_analysis(self, target, analysis_file, summary_json_file):
-    target_classpath = self._zinc.compile_classpath('runtime_classpath', target)
-    analysis_by_cp_entry = self._analysis_by_runtime_entry
-    upstream_analysis = list(self._upstream_analysis(target_classpath, analysis_by_cp_entry))
+                                jdeps_output_json,
+                                product_deps_by_target)
+
+  @memoized_property
+  def _jdeps_summary_line_regex(self):
+    return re.compile(r"^.+\s->\s(.+)$")
+
+  def _run_jdeps_analysis(self, target, target_artifact_classpaths, potential_deps_classpaths, jdeps_output_json):
+    with self.aliased_classpaths(potential_deps_classpaths) as classpaths_by_alias:
+      with open(jdeps_output_json, 'w') as f:
+        if len(target_artifact_classpaths):
+          jdeps_stdout, jdeps_stderr = self._spawn_jdeps_command(
+            target, target_artifact_classpaths, classpaths_by_alias.keys()
+          ).communicate()
+          deps_classpaths = set()
+          for line in io.StringIO(jdeps_stdout.decode('utf-8')):
+            match = self._jdeps_summary_line_regex.fullmatch(line.strip()).group(1)
+            deps_classpaths.add(classpaths_by_alias.get(match, match))
+
+        else:
+          deps_classpaths = []
+        json.dump(list(deps_classpaths), f)
+
+  def _spawn_jdeps_command(self, target, target_artifact_classpaths, potential_deps_classpaths):
+    jdk = DistributionLocator.cached(jdk=True)
+    tool_classpath = jdk.find_libs(['tools.jar'])
+
     args = [
-        '-summary-json', summary_json_file,
-        '-analysis-cache', analysis_file,
-        '-classpath', ':'.join(target_classpath),
-        '-analysis-map', ','.join('{}:{}'.format(k, v) for k, v in upstream_analysis),
-      ]
-
-    result = self.runjava(classpath=self._zinc.extractor,
-                          main=Zinc.ZINC_EXTRACT_MAIN,
-                          args=args,
-                          workunit_name=Zinc.ZINC_EXTRACTOR_TOOL_NAME,
-                          workunit_labels=[WorkUnitLabel.MULTITOOL])
-    if result != 0:
-      raise TaskError('Failed to parse analysis for {}'.format(target.address.spec),
-                      exit_code=result)
-
-  def _upstream_analysis(self, target_classpath, analysis_by_cp_entry):
-    for entry in target_classpath:
-      analysis_file = analysis_by_cp_entry.get(entry)
-      if analysis_file is not None:
-        yield entry, analysis_file
+      "-summary",
+      '-classpath', ":".join(cp for cp in potential_deps_classpaths),
+    ] + target_artifact_classpaths
+
+    java_executor = SubprocessExecutor(jdk)
+    return java_executor.spawn(classpath=tool_classpath,
+                               main='com.sun.tools.jdeps.Main',
+                               jvm_options=self.get_options().jvm_options,
+                               args=args,
+                               stdout=subprocess.PIPE)
 
   def _register_products(self,
                          target,
-                         target_cp_entry,
-                         summary_json_file,
-                         classes_by_source,
-                         product_deps_by_src):
-    summary_json = self._parse_summary_json(summary_json_file)
-
-    # Register a mapping between sources and classfiles (if requested).
-    if classes_by_source is not None:
-      buildroot = get_buildroot()
-      for abs_src, classes in summary_json['products'].items():
-        source = os.path.relpath(os.path.normpath(abs_src), buildroot)
-        classes = [os.path.normpath(c) for c in classes]
-        classes_by_source[source].add_abs_paths(target_cp_entry, classes)
-
-    # Register classfile product dependencies (if requested).
-    if product_deps_by_src is not None:
-      product_deps_by_src[target] = {
-          os.path.normpath(src): [os.path.normpath(dep) for dep in deps]
-          for src, deps in summary_json['dependencies'].items()
-        }
-
-  def _parse_summary_json(self, summary_json_file):
-    with open(summary_json_file, 'r') as f:
-      return json.load(f, encoding='utf-8')
+                         jdeps_output_json,
+                         product_deps_by_target):
+    with open(jdeps_output_json) as f:
+      product_deps_by_target[target] = json.load(f)
diff --git a/src/python/pants/backend/jvm/tasks/jvm_dependency_check.py b/src/python/pants/backend/jvm/tasks/jvm_dependency_check.py
@@ -64,9 +64,8 @@ def _skip(options):
   def prepare(cls, options, round_manager):
     super().prepare(options, round_manager)
     if not cls._skip(options):
-      round_manager.require_data('product_deps_by_src')
+      round_manager.require_data('product_deps_by_target')
       round_manager.require_data('runtime_classpath')
-      round_manager.require_data('zinc_analysis')
 
   def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
@@ -102,15 +101,15 @@ def execute(self):
     fingerprint_strategy = DependencyContext.global_instance().create_fingerprint_strategy(
         classpath_product)
 
-    targets = list(self.context.products.get_data('zinc_analysis').keys())
+    targets = [target for target in self.context.targets() if hasattr(target, 'strict_deps')]
 
     with self.invalidated(targets,
                           fingerprint_strategy=fingerprint_strategy,
                           invalidate_dependents=True) as invalidation_check:
       for vt in invalidation_check.invalid_vts:
-        product_deps_by_src = self.context.products.get_data('product_deps_by_src').get(vt.target)
-        if product_deps_by_src is not None:
-          self.check(vt.target, product_deps_by_src)
+        product_deps_for_target = self.context.products.get_data('product_deps_by_target').get(vt.target)
+        if product_deps_for_target is not None:
+          self.check(vt.target, product_deps_for_target)
 
   def check(self, src_tgt, actual_deps):
     """Check for missing deps.
@@ -130,16 +129,19 @@ def shorten(path):  # Make the output easier to read.
 
       def filter_whitelisted(missing_deps):
         # Removing any targets that exist in the whitelist from the list of dependency issues.
-        return [(tgt_pair, evidence) for (tgt_pair, evidence) in missing_deps
-                            if tgt_pair[0].address not in self._target_whitelist]
+        return [
+          (tgt_pair, evidence)
+          for (tgt_pair, evidence) in missing_deps
+          if tgt_pair[0].address not in self._target_whitelist
+        ]
 
       missing_direct_tgt_deps = filter_whitelisted(missing_direct_tgt_deps)
 
       if self._check_missing_direct_deps and missing_direct_tgt_deps:
         log_fn = (self.context.log.error if self._check_missing_direct_deps == 'fatal'
                   else self.context.log.warn)
         for (tgt_pair, evidence) in missing_direct_tgt_deps:
-          evidence_str = '\n'.join(['  {} uses {}'.format(shorten(e[0]), shorten(e[1]))
+          evidence_str = '\n'.join(['  {} uses {}'.format(e[0].address.spec, shorten(e[1]))
                                     for e in evidence])
           log_fn('Missing direct BUILD dependency {} -> {} because:\n{}'
                  .format(tgt_pair[0].address.spec, tgt_pair[1].address.spec, evidence_str))
@@ -180,6 +182,7 @@ def _compute_missing_deps(self, src_tgt, actual_deps):
     All paths in the input and output are absolute.
     """
     analyzer = self._analyzer
+
     def must_be_explicit_dep(dep):
       # We don't require explicit deps on the java runtime, so we shouldn't consider that
       # a missing dep.
@@ -204,22 +207,23 @@ def target_or_java_dep_in_targets(target, targets):
     missing_direct_tgt_deps_map = defaultdict(list)  # The same, but for direct deps.
 
     targets_by_file = analyzer.targets_by_file(self.context.targets())
-    buildroot = get_buildroot()
-    abs_srcs = [os.path.join(buildroot, src) for src in src_tgt.sources_relative_to_buildroot()]
-    for src in abs_srcs:
-      for actual_dep in filter(must_be_explicit_dep, actual_deps.get(src, [])):
-        actual_dep_tgts = targets_by_file.get(actual_dep)
-        # actual_dep_tgts is usually a singleton. If it's not, we only need one of these
-        # to be in our declared deps to be OK.
-        if actual_dep_tgts is None:
-          missing_file_deps.add((src_tgt, actual_dep))
-        elif not target_or_java_dep_in_targets(src_tgt, actual_dep_tgts):
-          # Obviously intra-target deps are fine.
-          canonical_actual_dep_tgt = next(iter(actual_dep_tgts))
-          if canonical_actual_dep_tgt not in src_tgt.dependencies:
-            # The canonical dep is the only one a direct dependency makes sense on.
-            missing_direct_tgt_deps_map[(src_tgt, canonical_actual_dep_tgt)].append(
-                (src, actual_dep))
+    for actual_dep in filter(must_be_explicit_dep, actual_deps):
+      actual_dep_tgts = targets_by_file.get(actual_dep)
+      # actual_dep_tgts is usually a singleton. If it's not, we only need one of these
+      # to be in our declared deps to be OK.
+      if actual_dep_tgts is None:
+        missing_file_deps.add((src_tgt, actual_dep))
+      elif not target_or_java_dep_in_targets(src_tgt, actual_dep_tgts):
+        # Obviously intra-target deps are fine.
+        canonical_actual_dep_tgt = next(iter(actual_dep_tgts))
+        if canonical_actual_dep_tgt not in src_tgt.dependencies:
+          # The canonical dep is the only one a direct dependency makes sense on.
+          # TODO get rid of src usage here. we dont have a way to map class
+          # files back to source files when using jdeps. I think we can get away without
+          # listing the src file directly and just list the target which has the transient
+          # dep
+          missing_direct_tgt_deps_map[(src_tgt, canonical_actual_dep_tgt)].append(
+              (src_tgt, actual_dep))
 
     return (list(missing_file_deps),
             list(missing_direct_tgt_deps_map.items()))
@@ -254,6 +258,8 @@ def _compute_unnecessary_deps(self, target, actual_deps):
     """
     # Flatten the product deps of this target.
     product_deps = set()
+    # TODO update actual deps will just be a list, not a dict when switching to
+    # product_deps_by_target_product.
     for dep_entries in actual_deps.values():
       product_deps.update(dep_entries)