Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory leak in ./pants changed #5011

44 changes: 11 additions & 33 deletions src/python/pants/backend/jvm/subsystems/java.py
Expand Up @@ -21,6 +21,9 @@ class Java(JvmToolMixin, ZincLanguageMixin, Subsystem):
"""
options_scope = 'java'

_javac_tool_name = 'javac'
_default_javac_spec = '//:{}'.format(_javac_tool_name)

@classmethod
def register_options(cls, register):
super(Java, cls).register_options(register)
Expand All @@ -31,7 +34,7 @@ def register_options(cls, register):
# Javac plugins can access basically all of the compiler internals, so we don't shade anything.
# Hence the unspecified main= argument. This tool is optional, hence the empty classpath list.
cls.register_jvm_tool(register,
'javac',
cls._javac_tool_name,
classpath=[],
help='Java compiler to use. If unspecified, we use the compiler '
'embedded in the Java distribution we run on.')
Expand All @@ -42,40 +45,17 @@ def register_options(cls, register):
fingerprint=True)

def injectables(self, build_graph):
# N.B. This method would normally utilize `injectables_spec_for_key(key)` to get at
# static specs, but due to the need to check the buildgraph before determining whether
# the javac spec is valid we must handle the injectables here without poking the
# `injectables_spec_mapping` property.
javac_spec = self._javac_spec
if not javac_spec:
self._javac_exists = False
else:
javac_address = Address.parse(javac_spec)
self._javac_exists = True if build_graph.contains_address(javac_address) else False

toolsjar_spec = self._tools_jar_spec
if toolsjar_spec:
synthetic_address = Address.parse(toolsjar_spec)
if not build_graph.contains_address(synthetic_address):
build_graph.inject_synthetic_target(synthetic_address, ToolsJar)

@property
def javac_specs(self):
if not self._javac_spec:
return []
assert self._javac_exists is not None, (
'cannot access javac_specs until injectables is called'
)
return [self._javac_spec] if self._javac_exists else []
# If a javac target has been defined on disk, use it: otherwise, inject a ToolsJar
# target to provide the dependency.
javac_address = Address.parse(self._javac_spec)
if not build_graph.contains_address(javac_address):
build_graph.inject_synthetic_target(javac_address, ToolsJar)

@property
def injectables_spec_mapping(self):
return {
'plugin': self._plugin_dependency_specs,
# If no javac library is specified, this maps to None. The caller must handle
# this case by defaulting to the JDK's tools.jar.
'javac': self.javac_specs,
'tools.jar': [self._tools_jar_spec]
'javac': [self._javac_spec],
}

@classmethod
Expand All @@ -93,12 +73,10 @@ def __init__(self, *args, **kwargs):
opts = self.get_options()
# TODO: These checks are a continuation of the hack that allows tests to pass without
# caring about this subsystem.
self._javac_spec = getattr(opts, 'javac', None)
self._javac_exists = None
self._javac_spec = getattr(opts, 'javac', self._default_javac_spec)
self._plugin_dependency_specs = [
Address.parse(spec).spec for spec in getattr(opts, 'compiler_plugin_deps', [])
]
self._tools_jar_spec = '//:tools-jar-synthetic'

def javac_classpath(self, products):
return self.tool_classpath_from_products(products, 'javac', self.options_scope)
5 changes: 1 addition & 4 deletions src/python/pants/backend/jvm/targets/javac_plugin.py
Expand Up @@ -34,7 +34,4 @@ def compute_dependency_specs(cls, kwargs=None, payload=None):
for spec in super(JavacPlugin, cls).compute_dependency_specs(kwargs, payload):
yield spec

yield (
Java.global_instance().injectables_spec_for_key('javac') or
Java.global_instance().injectables_spec_for_key('tools.jar')
)
yield Java.global_instance().injectables_spec_for_key('javac')
92 changes: 80 additions & 12 deletions src/python/pants/engine/legacy/change_calculator.py
Expand Up @@ -5,17 +5,87 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import itertools
import logging
from collections import defaultdict

from pants.base.specs import DescendantAddresses
from pants.engine.legacy.graph import LegacyBuildGraph
from pants.build_graph.address import Address
from pants.engine.legacy.graph import HydratedTargets, target_types_from_symbol_table
from pants.engine.legacy.source_mapper import EngineSourceMapper
from pants.scm.change_calculator import ChangeCalculator


logger = logging.getLogger(__name__)


class _HydratedTargetDependentGraph(object):
"""A graph for walking dependent addresses of HydratedTarget objects.

This avoids/imitates constructing a v1 BuildGraph object, because that codepath results
in many references held in mutable global state (ie, memory leaks).

The long term goal is to deprecate the `changed` goal in favor of sufficiently good cache
hit rates, such that rather than running:

./pants --changed-parent=master test

...you would always be able to run:

./pants test ::

...and have it complete in a similar amount of time by hitting relevant caches.
"""

@classmethod
def from_iterable(cls, target_types, iterable):
"""Create a new HydratedTargetDependentGraph from an iterable of HydratedTarget instances."""
inst = cls(target_types)
for hydrated_target in iterable:
inst.inject_target(hydrated_target)
return inst

def __init__(self, target_types):
self._dependent_address_map = defaultdict(set)
self._target_types = target_types

def inject_target(self, hydrated_target):
"""Inject a target, respecting all sources of dependencies."""
target_cls = self._target_types[hydrated_target.adaptor.type_alias]

declared_deps = hydrated_target.dependencies
implicit_deps = (Address.parse(s) for s in target_cls.compute_dependency_specs(kwargs=hydrated_target.adaptor.kwargs()))

for dep in itertools.chain(declared_deps, implicit_deps):
self._dependent_address_map[dep].add(hydrated_target.adaptor.address)

def dependents_of_addresses(self, addresses):
"""Given an iterable of addresses, yield all of those addresses dependents."""
seen = set(addresses)
for address in addresses:
for dependent_address in self._dependent_address_map[address]:
if dependent_address not in seen:
seen.add(dependent_address)
yield dependent_address

def transitive_dependents_of_addresses(self, addresses):
"""Given an iterable of addresses, yield all of those addresses dependents, transitively."""
addresses_to_visit = set(addresses)
while 1:
dependents = set(self.dependents_of_addresses(addresses))
# If we've exhausted all dependencies or visited all remaining nodes, break.
if (not dependents) or dependents.issubset(addresses_to_visit):
break
addresses = dependents.difference(addresses_to_visit)
addresses_to_visit.update(dependents)

transitive_set = itertools.chain(
*(self._dependent_address_map[address] for address in addresses_to_visit)
)
for dep in transitive_set:
yield dep

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like it's possible for this to emit duplicate entries. would it make sense to dedupe either here or in the callsite below (in the if changed_request.include_dependees == 'direct' block)?


class EngineChangeCalculator(ChangeCalculator):
"""A ChangeCalculator variant that uses the v2 engine for source mapping."""

Expand Down Expand Up @@ -46,20 +116,18 @@ def iter_changed_target_addresses(self, changed_request):
return

# For dependee finding, we need to parse all build files.
graph = LegacyBuildGraph.create(self._scheduler, self._symbol_table)
for _ in graph.inject_specs_closure([DescendantAddresses('')]):
pass
product_iter = (t
for targets in self._scheduler.product_request(HydratedTargets, [DescendantAddresses('')])
for t in targets.dependencies)
graph = _HydratedTargetDependentGraph.from_iterable(target_types_from_symbol_table(self._symbol_table),
product_iter)

if changed_request.include_dependees == 'direct':
emitted = set()
for address in changed_addresses:
for dependee in graph.dependents_of(address):
if dependee not in emitted:
emitted.add(dependee)
yield dependee
for address in graph.dependents_of_addresses(changed_addresses):
yield address
elif changed_request.include_dependees == 'transitive':
for target in graph.transitive_dependees_of_addresses(changed_addresses):
yield target.address
for address in graph.transitive_dependents_of_addresses(changed_addresses):
yield address

def changed_target_addresses(self, changed_request):
return list(self.iter_changed_target_addresses(changed_request))
21 changes: 11 additions & 10 deletions src/python/pants/engine/legacy/graph.py
Expand Up @@ -33,6 +33,16 @@
logger = logging.getLogger(__name__)


def target_types_from_symbol_table(symbol_table):
"""Given a LegacySymbolTable, return the concrete target types constructed for each alias."""
aliases = symbol_table.aliases()
target_types = dict(aliases.target_types)
for alias, factory in aliases.target_macro_factories.items():
target_type, = factory.target_types
target_types[alias] = target_type
return target_types


class _DestWrapper(datatype('DestWrapper', ['target_types'])):
"""A wrapper for dest field of RemoteSources target.

Expand All @@ -52,7 +62,7 @@ class InvalidCommandLineSpecError(AddressLookupError):
@classmethod
def create(cls, scheduler, symbol_table):
"""Construct a graph given a Scheduler, Engine, and a SymbolTable class."""
return cls(scheduler, cls._get_target_types(symbol_table))
return cls(scheduler, target_types_from_symbol_table(symbol_table))

def __init__(self, scheduler, target_types):
"""Construct a graph given a Scheduler, Engine, and a SymbolTable class.
Expand All @@ -69,15 +79,6 @@ def clone_new(self):
"""Returns a new BuildGraph instance of the same type and with the same __init__ params."""
return LegacyBuildGraph(self._scheduler, self._target_types)

@staticmethod
def _get_target_types(symbol_table):
aliases = symbol_table.aliases()
target_types = dict(aliases.target_types)
for alias, factory in aliases.target_macro_factories.items():
target_type, = factory.target_types
target_types[alias] = target_type
return target_types

def _index(self, roots):
"""Index from the given roots into the storage provided by the base class.

Expand Down