Skip to content

Commit

Permalink
Unrevert #8093 and fix jdeps parsing. (#8125)
Browse files Browse the repository at this point in the history
### Problem

Previous jdeps PR (#8093) was reverted due to a bug where regex parsing of jdeps output failed.

### Solution

Mitigate root cause where incorrect arguments were passed to jdeps when targets had no deps with classpaths. Add extra layer of safety checking when matching individual lines of the summary output.

### Result

dep-usage task functions more reliably.
  • Loading branch information
Henry Fuller authored and stuhood committed Jul 30, 2019
1 parent ea100e2 commit 9004bdd
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 145 deletions.
190 changes: 108 additions & 82 deletions src/python/pants/backend/jvm/tasks/analysis_extraction.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import io
import json
import os
from collections import defaultdict
import re
import subprocess
from contextlib import contextmanager

from pants.backend.jvm.subsystems.dependency_context import DependencyContext
from pants.backend.jvm.subsystems.zinc import Zinc
from pants.backend.jvm.tasks.nailgun_task import NailgunTask
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
from pants.base.workunit import WorkUnitLabel
from pants.goal.products import MultipleRootedProducts
from pants.base.deprecated import deprecated_conditional
from pants.java.distribution.distribution import DistributionLocator
from pants.java.executor import SubprocessExecutor
from pants.util.contextutil import temporary_dir
from pants.util.memo import memoized_property


Expand All @@ -33,12 +36,11 @@ def register_options(cls, register):
@classmethod
def prepare(cls, options, round_manager):
super().prepare(options, round_manager)
round_manager.require_data('zinc_analysis')
round_manager.require_data('runtime_classpath')

@classmethod
def product_types(cls):
return ['classes_by_source', 'product_deps_by_src']
return ['product_deps_by_target', 'classes_by_source', 'product_deps_by_src']

def _create_products_if_should_run(self):
"""If this task should run, initialize empty products that it will populate.
Expand All @@ -47,106 +49,130 @@ def _create_products_if_should_run(self):
"""

should_run = False
if self.context.products.is_required_data('classes_by_source'):
if self.context.products.is_required_data('product_deps_by_target'):
should_run = True
make_products = lambda: defaultdict(MultipleRootedProducts)
self.context.products.safe_create_data('classes_by_source', make_products)

if self.context.products.is_required_data('product_deps_by_src'):
should_run = True
self.context.products.safe_create_data('product_deps_by_src', dict)
self.context.products.safe_create_data('product_deps_by_target', dict)
return should_run

@memoized_property
def _zinc(self):
return Zinc.Factory.global_instance().create(self.context.products, self.get_options().execution_strategy)

def _summary_json_file(self, vt):
return os.path.join(vt.results_dir, 'summary.json')
def _jdeps_output_json(self, vt):
return os.path.join(vt.results_dir, 'jdeps_output.json')

@memoized_property
def _analysis_by_runtime_entry(self):
zinc_analysis = self.context.products.get_data('zinc_analysis')
return {cp_entry: analysis_file for _, cp_entry, analysis_file in zinc_analysis.values()}
@contextmanager
def aliased_classpaths(self, classpaths):
"""
Create unique names for each classpath entry as symlinks in
a temporary directory. returns: dict[str -> classpath entry]
which maps string paths of symlinks to classpaths.
ClasspathEntries generally point to a .jar of
the .class files generated for java_library targets.
These jars all have the same basename, z.jar, which
confuses the `jdeps` tool. Jdeps expects unique, and
descriptive, basenames for jars. When all basenames are
the same the deps collide in the jdeps output, some
.class files can't be found and the summary output
is not complete.
"""
with temporary_dir() as tempdir:
aliases = {}
for i, cp in enumerate(classpaths):
alias = os.path.join(tempdir, f"{i}.jar" if not os.path.isdir(cp) else f"{i}")
os.symlink(cp, alias)
aliases[alias] = cp
yield aliases

def execute(self):
# If none of our computed products are necessary, return immediately.
deprecated_conditional(
lambda: self.context.products.is_required_data('classes_by_source'),
'1.20.0.dev2',
'The `classes_by_source` product depends on internal compiler details and is no longer produced.'
)
deprecated_conditional(
lambda: self.context.products.is_required_data('product_deps_by_src'),
'1.20.0.dev2',
'The `product_deps_by_src` product depends on internal compiler details and is no longer produced. '
'For similar functionality consume `product_deps_by_target`.'
)

if not self._create_products_if_should_run():
return

zinc_analysis = self.context.products.get_data('zinc_analysis')
classpath_product = self.context.products.get_data('runtime_classpath')
classes_by_source = self.context.products.get_data('classes_by_source')
product_deps_by_src = self.context.products.get_data('product_deps_by_src')
product_deps_by_target = self.context.products.get_data('product_deps_by_target')

fingerprint_strategy = DependencyContext.global_instance().create_fingerprint_strategy(
classpath_product)

targets = list(zinc_analysis.keys())
# classpath fingerprint strategy only works on targets with a classpath.
targets = [target for target in self.context.targets() if hasattr(target, 'strict_deps')]
with self.invalidated(targets,
fingerprint_strategy=fingerprint_strategy,
invalidate_dependents=True) as invalidation_check:
# Extract and parse products for any relevant targets.
for vt in invalidation_check.all_vts:
summary_json_file = self._summary_json_file(vt)
cp_entry, _, analysis_file = zinc_analysis[vt.target]
# A list of class paths to the artifacts created by the target we are computing deps for.
target_artifact_classpaths = [path for _, path in classpath_product.get_for_target(vt.target)]
potential_deps_classpaths = self._zinc.compile_classpath('runtime_classpath', vt.target)

jdeps_output_json = self._jdeps_output_json(vt)
if not vt.valid:
self._extract_analysis(vt.target, analysis_file, summary_json_file)
self._run_jdeps_analysis(
vt.target,
target_artifact_classpaths,
potential_deps_classpaths,
jdeps_output_json
)
self._register_products(vt.target,
cp_entry,
summary_json_file,
classes_by_source,
product_deps_by_src)

def _extract_analysis(self, target, analysis_file, summary_json_file):
target_classpath = self._zinc.compile_classpath('runtime_classpath', target)
analysis_by_cp_entry = self._analysis_by_runtime_entry
upstream_analysis = list(self._upstream_analysis(target_classpath, analysis_by_cp_entry))
args = [
'-summary-json', summary_json_file,
'-analysis-cache', analysis_file,
'-classpath', ':'.join(target_classpath),
'-analysis-map', ','.join('{}:{}'.format(k, v) for k, v in upstream_analysis),
]

result = self.runjava(classpath=self._zinc.extractor,
main=Zinc.ZINC_EXTRACT_MAIN,
args=args,
workunit_name=Zinc.ZINC_EXTRACTOR_TOOL_NAME,
workunit_labels=[WorkUnitLabel.MULTITOOL])
if result != 0:
raise TaskError('Failed to parse analysis for {}'.format(target.address.spec),
exit_code=result)

def _upstream_analysis(self, target_classpath, analysis_by_cp_entry):
for entry in target_classpath:
analysis_file = analysis_by_cp_entry.get(entry)
if analysis_file is not None:
yield entry, analysis_file
jdeps_output_json,
product_deps_by_target)

@memoized_property
def _jdeps_summary_line_regex(self):
return re.compile(r"^.+\s->\s(.+)$")

def _run_jdeps_analysis(self, target, target_artifact_classpaths, potential_deps_classpaths, jdeps_output_json):
with self.aliased_classpaths(potential_deps_classpaths) as classpaths_by_alias:
with open(jdeps_output_json, 'w') as f:
if len(target_artifact_classpaths):
jdeps_stdout, jdeps_stderr = self._spawn_jdeps_command(
target, target_artifact_classpaths, classpaths_by_alias.keys()
).communicate()
deps_classpaths = set()
for line in io.StringIO(jdeps_stdout.decode('utf-8')):
match = self._jdeps_summary_line_regex.fullmatch(line.strip())
if match is not None:
dep_name = match.group(1)
deps_classpaths.add(classpaths_by_alias.get(dep_name, dep_name))

else:
deps_classpaths = []
json.dump(list(deps_classpaths), f)

def _spawn_jdeps_command(self, target, target_artifact_classpaths, potential_deps_classpaths):
jdk = DistributionLocator.cached(jdk=True)
tool_classpath = jdk.find_libs(['tools.jar'])
potential_deps_classpath = ":".join(cp for cp in potential_deps_classpaths)

args = ["-summary"]
if potential_deps_classpath:
args.extend(['-classpath', potential_deps_classpath])

args.extend(target_artifact_classpaths)

java_executor = SubprocessExecutor(jdk)
return java_executor.spawn(classpath=tool_classpath,
main='com.sun.tools.jdeps.Main',
jvm_options=self.get_options().jvm_options,
args=args,
stdout=subprocess.PIPE)

def _register_products(self,
target,
target_cp_entry,
summary_json_file,
classes_by_source,
product_deps_by_src):
summary_json = self._parse_summary_json(summary_json_file)

# Register a mapping between sources and classfiles (if requested).
if classes_by_source is not None:
buildroot = get_buildroot()
for abs_src, classes in summary_json['products'].items():
source = os.path.relpath(os.path.normpath(abs_src), buildroot)
classes = [os.path.normpath(c) for c in classes]
classes_by_source[source].add_abs_paths(target_cp_entry, classes)

# Register classfile product dependencies (if requested).
if product_deps_by_src is not None:
product_deps_by_src[target] = {
os.path.normpath(src): [os.path.normpath(dep) for dep in deps]
for src, deps in summary_json['dependencies'].items()
}

def _parse_summary_json(self, summary_json_file):
with open(summary_json_file, 'r') as f:
return json.load(f, encoding='utf-8')
jdeps_output_json,
product_deps_by_target):
with open(jdeps_output_json) as f:
product_deps_by_target[target] = json.load(f)
56 changes: 31 additions & 25 deletions src/python/pants/backend/jvm/tasks/jvm_dependency_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def _skip(options):
def prepare(cls, options, round_manager):
super().prepare(options, round_manager)
if not cls._skip(options):
round_manager.require_data('product_deps_by_src')
round_manager.require_data('product_deps_by_target')
round_manager.require_data('runtime_classpath')
round_manager.require_data('zinc_analysis')

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -102,15 +101,15 @@ def execute(self):
fingerprint_strategy = DependencyContext.global_instance().create_fingerprint_strategy(
classpath_product)

targets = list(self.context.products.get_data('zinc_analysis').keys())
targets = [target for target in self.context.targets() if hasattr(target, 'strict_deps')]

with self.invalidated(targets,
fingerprint_strategy=fingerprint_strategy,
invalidate_dependents=True) as invalidation_check:
for vt in invalidation_check.invalid_vts:
product_deps_by_src = self.context.products.get_data('product_deps_by_src').get(vt.target)
if product_deps_by_src is not None:
self.check(vt.target, product_deps_by_src)
product_deps_for_target = self.context.products.get_data('product_deps_by_target').get(vt.target)
if product_deps_for_target is not None:
self.check(vt.target, product_deps_for_target)

def check(self, src_tgt, actual_deps):
"""Check for missing deps.
Expand All @@ -130,16 +129,19 @@ def shorten(path): # Make the output easier to read.

def filter_whitelisted(missing_deps):
# Removing any targets that exist in the whitelist from the list of dependency issues.
return [(tgt_pair, evidence) for (tgt_pair, evidence) in missing_deps
if tgt_pair[0].address not in self._target_whitelist]
return [
(tgt_pair, evidence)
for (tgt_pair, evidence) in missing_deps
if tgt_pair[0].address not in self._target_whitelist
]

missing_direct_tgt_deps = filter_whitelisted(missing_direct_tgt_deps)

if self._check_missing_direct_deps and missing_direct_tgt_deps:
log_fn = (self.context.log.error if self._check_missing_direct_deps == 'fatal'
else self.context.log.warn)
for (tgt_pair, evidence) in missing_direct_tgt_deps:
evidence_str = '\n'.join([' {} uses {}'.format(shorten(e[0]), shorten(e[1]))
evidence_str = '\n'.join([' {} uses {}'.format(e[0].address.spec, shorten(e[1]))
for e in evidence])
log_fn('Missing direct BUILD dependency {} -> {} because:\n{}'
.format(tgt_pair[0].address.spec, tgt_pair[1].address.spec, evidence_str))
Expand Down Expand Up @@ -180,6 +182,7 @@ def _compute_missing_deps(self, src_tgt, actual_deps):
All paths in the input and output are absolute.
"""
analyzer = self._analyzer

def must_be_explicit_dep(dep):
# We don't require explicit deps on the java runtime, so we shouldn't consider that
# a missing dep.
Expand All @@ -204,22 +207,23 @@ def target_or_java_dep_in_targets(target, targets):
missing_direct_tgt_deps_map = defaultdict(list) # The same, but for direct deps.

targets_by_file = analyzer.targets_by_file(self.context.targets())
buildroot = get_buildroot()
abs_srcs = [os.path.join(buildroot, src) for src in src_tgt.sources_relative_to_buildroot()]
for src in abs_srcs:
for actual_dep in filter(must_be_explicit_dep, actual_deps.get(src, [])):
actual_dep_tgts = targets_by_file.get(actual_dep)
# actual_dep_tgts is usually a singleton. If it's not, we only need one of these
# to be in our declared deps to be OK.
if actual_dep_tgts is None:
missing_file_deps.add((src_tgt, actual_dep))
elif not target_or_java_dep_in_targets(src_tgt, actual_dep_tgts):
# Obviously intra-target deps are fine.
canonical_actual_dep_tgt = next(iter(actual_dep_tgts))
if canonical_actual_dep_tgt not in src_tgt.dependencies:
# The canonical dep is the only one a direct dependency makes sense on.
missing_direct_tgt_deps_map[(src_tgt, canonical_actual_dep_tgt)].append(
(src, actual_dep))
for actual_dep in filter(must_be_explicit_dep, actual_deps):
actual_dep_tgts = targets_by_file.get(actual_dep)
# actual_dep_tgts is usually a singleton. If it's not, we only need one of these
# to be in our declared deps to be OK.
if actual_dep_tgts is None:
missing_file_deps.add((src_tgt, actual_dep))
elif not target_or_java_dep_in_targets(src_tgt, actual_dep_tgts):
# Obviously intra-target deps are fine.
canonical_actual_dep_tgt = next(iter(actual_dep_tgts))
if canonical_actual_dep_tgt not in src_tgt.dependencies:
# The canonical dep is the only one a direct dependency makes sense on.
# TODO get rid of src usage here. we dont have a way to map class
# files back to source files when using jdeps. I think we can get away without
# listing the src file directly and just list the target which has the transient
# dep
missing_direct_tgt_deps_map[(src_tgt, canonical_actual_dep_tgt)].append(
(src_tgt, actual_dep))

return (list(missing_file_deps),
list(missing_direct_tgt_deps_map.items()))
Expand Down Expand Up @@ -254,6 +258,8 @@ def _compute_unnecessary_deps(self, target, actual_deps):
"""
# Flatten the product deps of this target.
product_deps = set()
# TODO update actual deps will just be a list, not a dict when switching to
# product_deps_by_target_product.
for dep_entries in actual_deps.values():
product_deps.update(dep_entries)

Expand Down
Loading

0 comments on commit 9004bdd

Please sign in to comment.