Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cloc uses v2 process execution #5840

Merged
merged 4 commits into from
May 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 57 additions & 48 deletions src/python/pants/backend/graph_info/tasks/cloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
import os

from pants.backend.graph_info.subsystems.cloc_binary import ClocBinary
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
from pants.base.workunit import WorkUnitLabel
from pants.engine.fs import FilesContent, PathGlobs, PathGlobsAndRoot, Snapshot
from pants.engine.isolated_process import ExecuteProcessRequest
from pants.task.console_task import ConsoleTask
from pants.util.contextutil import temporary_dir
from pants.util.process_handler import subprocess


class CountLinesOfCode(ConsoleTask):
Expand All @@ -32,59 +32,68 @@ def register_options(cls, register):
register('--ignored', type=bool, fingerprint=True,
help='Show information about files ignored by cloc.')

def _get_cloc_script(self):
return ClocBinary.global_instance().select(self.context)

def console_output(self, targets):
if not self.get_options().transitive:
targets = self.context.target_roots

buildroot = get_buildroot()
# TODO: Work out a nice library-like utility for writing an argfile, as this will be common.
with temporary_dir() as tmpdir:
# Write the paths of all files we want cloc to process to the so-called 'list file'.
# TODO: 1) list_file, report_file and ignored_file should be relative files within the
# execution "chroot", 2) list_file should be part of an input files Snapshot, and
# 3) report_file and ignored_file should be part of an output files Snapshot, when we have
# that capability.
list_file = os.path.join(tmpdir, 'list_file')
list_file = os.path.join(tmpdir, 'input_files_list')
input_files = set()
with open(list_file, 'w') as list_file_out:
for target in targets:
for source in target.sources_relative_to_buildroot():
list_file_out.write(os.path.join(buildroot, source))
input_files.add(source)
list_file_out.write(source)
list_file_out.write(b'\n')
list_file_snapshot = self.context._scheduler.capture_snapshots((
PathGlobsAndRoot(
PathGlobs(('input_files_list',), ()),
str(tmpdir),
),
))[0]

cloc_path, cloc_snapshot = ClocBinary.global_instance().hackily_snapshot(self.context)

# TODO: This should use an input file snapshot which should be provided on the Target object,
# rather than hackily re-snapshotting each of the input files.
# See https://github.com/pantsbuild/pants/issues/5762
input_pathglobs = PathGlobs(tuple(input_files), ())
input_snapshot = self.context._scheduler.product_request(Snapshot, [input_pathglobs])[0]

directory_digest = self.context._scheduler.merge_directories((
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could refer to #5502 here and mention moving more of this into @rules.

cloc_snapshot.directory_digest,
input_snapshot.directory_digest,
list_file_snapshot.directory_digest,
))

cmd = (
'/usr/bin/perl',
cloc_path,
'--skip-uniqueness',
'--ignored=ignored',
'--list-file=input_files_list',
'--report-file=report',
)

# The cloc script reaches into $PATH to look up perl. Let's assume it's in /usr/bin.
req = ExecuteProcessRequest(cmd, (), directory_digest, ('ignored', 'report'))
exec_result = self.context.execute_process_synchronously(req, 'cloc', (WorkUnitLabel.TOOL,))

# TODO: Remove this check when https://github.com/pantsbuild/pants/issues/5719 is resolved.
if exec_result.exit_code != 0:
raise TaskError('{} ... exited non-zero ({}).'.format(' '.join(cmd), exec_result.exit_code))

files_content_tuple = self.context._scheduler.product_request(
FilesContent,
[exec_result.output_directory_digest]
)[0].dependencies

files_content = {fc.path: fc.content for fc in files_content_tuple}
for line in files_content['report'].split('\n'):
yield line

report_file = os.path.join(tmpdir, 'report_file')
ignored_file = os.path.join(tmpdir, 'ignored')

# TODO: Look at how to make BinaryUtil support Snapshots - such as adding an instrinsic to do
# network fetch directly into a Snapshot.
# See http://cloc.sourceforge.net/#options for cloc cmd-line options.
cmd = (
self._get_cloc_script(),
'--skip-uniqueness',
'--ignored={}'.format(ignored_file),
'--list-file={}'.format(list_file),
'--report-file={}'.format(report_file)
)
with self.context.new_workunit(
name='cloc',
labels=[WorkUnitLabel.TOOL],
cmd=' '.join(cmd)) as workunit:
exit_code = subprocess.call(
cmd,
stdout=workunit.output('stdout'),
stderr=workunit.output('stderr')
)

if exit_code != 0:
raise TaskError('{} ... exited non-zero ({}).'.format(' '.join(cmd), exit_code))

with open(report_file, 'r') as report_file_in:
for line in report_file_in.read().split('\n'):
yield line

if self.get_options().ignored:
yield 'Ignored the following files:'
with open(ignored_file, 'r') as ignored_file_in:
for line in ignored_file_in.read().split('\n'):
yield line
if self.get_options().ignored:
yield 'Ignored the following files:'
for line in files_content['ignored'].split('\n'):
yield line
2 changes: 2 additions & 0 deletions src/python/pants/bin/engine_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pants.base.specs import Specs
from pants.engine.build_files import create_graph_rules
from pants.engine.fs import create_fs_rules
from pants.engine.isolated_process import create_process_rules
from pants.engine.legacy.address_mapper import LegacyAddressMapper
from pants.engine.legacy.graph import (LegacyBuildGraph, TransitiveHydratedTargets,
create_legacy_graph_tasks)
Expand Down Expand Up @@ -188,6 +189,7 @@ def setup_legacy_graph(pants_ignore_patterns,
rules = (
create_legacy_graph_tasks(symbol_table) +
create_fs_rules() +
create_process_rules() +
create_graph_rules(address_mapper, symbol_table) +
rules
)
Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/binaries/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ python_library(
'src/python/pants/base:build_environment',
'src/python/pants/base:deprecated',
'src/python/pants/base:exceptions',
'src/python/pants/engine:fs',
'src/python/pants/engine:isolated_process',
'src/python/pants/fs',
'src/python/pants/net',
'src/python/pants/option',
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/binaries/binary_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def hackily_snapshot(self, context):
snapshot = context._scheduler.capture_snapshots((
PathGlobsAndRoot(
PathGlobs((script_relpath,), ()),
bootstrapdir,
str(bootstrapdir),
),
))[0]
return (script_relpath, snapshot)
Expand Down
8 changes: 8 additions & 0 deletions src/python/pants/engine/isolated_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging

from pants.engine.fs import EMPTY_SNAPSHOT, DirectoryDigest
from pants.engine.rules import RootRule
from pants.util.objects import TypeCheckError, datatype


Expand Down Expand Up @@ -50,3 +51,10 @@ def _verify_env_is_dict(cls, env):

class ExecuteProcessResult(datatype(['stdout', 'stderr', 'exit_code', 'output_directory_digest'])):
pass


def create_process_rules():
"""Creates rules that consume the intrinsic filesystem types."""
return [
RootRule(ExecuteProcessRequest),
]
21 changes: 21 additions & 0 deletions src/python/pants/goal/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pants.base.worker_pool import SubprocPool
from pants.base.workunit import WorkUnitLabel
from pants.build_graph.target import Target
from pants.engine.isolated_process import ExecuteProcessResult
from pants.goal.products import Products
from pants.goal.workspace import ScmWorkspace
from pants.process.lock import OwnerPrintingInterProcessFileLock
Expand Down Expand Up @@ -377,3 +378,23 @@ def scan(self, root=None):
for address in self.address_mapper.scan_addresses(root):
build_graph.inject_address_closure(address)
return build_graph

def execute_process_synchronously(self, execute_process_request, name, labels):
"""Executes a process (possibly remotely), and returns information about its output.

:param execute_process_request: The ExecuteProcessRequest to run.
:param name: A descriptive name representing the process being executed.
:param labels: A tuple of WorkUnitLabels.
:return: An ExecuteProcessResult with information about the execution.

Note that this is an unstable, experimental API, which is subject to change with no notice.
"""
with self.new_workunit(
name=name,
labels=labels,
cmd=' '.join(execute_process_request.argv),
) as workunit:
result = self._scheduler.product_request(ExecuteProcessResult, [execute_process_request])[0]
workunit.output("stdout").write(result.stdout)
workunit.output("stderr").write(result.stderr)
return result
10 changes: 10 additions & 0 deletions tests/python/pants_test/backend/graph_info/tasks/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,20 @@ python_tests(
'src/python/pants/backend/jvm/targets:java',
'src/python/pants/backend/python/targets:python',
'src/python/pants/base:build_environment',
'tests/python/pants_test/engine:scheduler_test_base',
'tests/python/pants_test/tasks:task_test_base',
]
)

python_tests(
name='cloc_integration',
sources=['test_cloc_integration.py'],
dependencies=[
'tests/python/pants_test:int-test',
],
tags = {'integration'},
)

python_tests(
name = 'dependees',
sources = ['test_dependees.py'],
Expand Down
36 changes: 30 additions & 6 deletions tests/python/pants_test/backend/graph_info/tasks/test_cloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
from pants.backend.graph_info.tasks.cloc import CountLinesOfCode
from pants.backend.jvm.targets.java_library import JavaLibrary
from pants.backend.python.targets.python_library import PythonLibrary
from pants.base.build_environment import get_buildroot
from pants.base.file_system_project_tree import FileSystemProjectTree
from pants.engine.fs import create_fs_rules
from pants.engine.isolated_process import create_process_rules
from pants_test.engine.scheduler_test_base import SchedulerTestBase
from pants_test.tasks.task_test_base import ConsoleTaskTestBase


class ClocTest(ConsoleTaskTestBase):
class ClocTest(ConsoleTaskTestBase, SchedulerTestBase):
@classmethod
def task_type(cls):
return CountLinesOfCode
Expand Down Expand Up @@ -40,11 +43,21 @@ def assert_counts(res, lang, files, blank, comment, code):
return
self.fail('Found no output line for {}'.format(lang))

res = self.execute_console_task(targets=[py_tgt, java_tgt], options={'transitive': True})
scheduler = self.mk_configured_scheduler()

res = self.execute_console_task(
targets=[py_tgt, java_tgt],
options={'transitive': True},
scheduler=scheduler,
)
assert_counts(res, 'Python', files=3, blank=2, comment=3, code=3)
assert_counts(res, 'Java', files=1, blank=0, comment=1, code=1)

res = self.execute_console_task(targets=[py_tgt, java_tgt], options={'transitive': False})
res = self.execute_console_task(
targets=[py_tgt, java_tgt],
options={'transitive': False},
scheduler=scheduler,
)
assert_counts(res, 'Python', files=2, blank=2, comment=3, code=2)
assert_counts(res, 'Java', files=1, blank=0, comment=1, code=1)

Expand All @@ -53,7 +66,18 @@ def test_ignored(self):
self.create_file('src/py/foo/foo.py', 'print("some code")')
self.create_file('src/py/foo/empty.py', '')

res = self.execute_console_task(targets=[py_tgt], options={'ignored': True})
res = self.execute_console_task(
targets=[py_tgt],
options={'ignored': True},
scheduler=self.mk_configured_scheduler(),
)
self.assertEquals(['Ignored the following files:',
'{}/src/py/foo/empty.py: zero sized file'.format(get_buildroot())],
'src/py/foo/empty.py: zero sized file'],
filter(None, res)[-2:])

def mk_configured_scheduler(self):
return self.mk_scheduler(
rules=create_fs_rules() + create_process_rules(),
project_tree=FileSystemProjectTree(self.build_root),
work_dir=self.pants_workdir
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# coding=utf-8
# Copyright 2018 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

from pants_test.pants_run_integration_test import PantsRunIntegrationTest


class ClocIntegrationTest(PantsRunIntegrationTest):
def test_cloc(self):
pants_run = self.run_pants([
'cloc',
'testprojects/src/python/python_targets:test_library',
])
self.assert_success(pants_run)
# Strip out the header which is non-deterministic because it has speed information in it.
stdout = str('\n'.join(pants_run.stdout_data.split('\n')[1:]))
self.assertEquals(stdout, str("""-------------------------------------------------------------------------------
Language files blank comment code
-------------------------------------------------------------------------------
Python 1 3 3 4
-------------------------------------------------------------------------------

"""))
9 changes: 7 additions & 2 deletions tests/python/pants_test/tasks/task_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def execute_task(self, targets=None, options=None):
return output.getvalue()

def execute_console_task(self, targets=None, extra_targets=None, options=None,
passthru_args=None, workspace=None):
passthru_args=None, workspace=None, scheduler=None):
"""Creates a new task and executes it with the given config, command line args and targets.

:API: public
Expand All @@ -214,7 +214,12 @@ def execute_console_task(self, targets=None, extra_targets=None, options=None,
"""
options = options or {}
self.set_options(**options)
context = self.context(target_roots=targets, passthru_args=passthru_args, workspace=workspace)
context = self.context(
target_roots=targets,
passthru_args=passthru_args,
workspace=workspace,
scheduler=scheduler
)
return self.execute_console_task_given_context(context, extra_targets=extra_targets)

def execute_console_task_given_context(self, context, extra_targets=None):
Expand Down