Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't complain about unowned string imports #14179

Merged
merged 4 commits into from Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -17,11 +17,15 @@
from pants.util.logging import LogLevel


class ParsedPythonImports(FrozenDict[str, int]):
"""All the discovered imports from a Python source file mapped to the first line they appear.
@dataclass(frozen=True)
class ParsedPythonImportInfo:
lineno: int
string: bool
thejcannon marked this conversation as resolved.
Show resolved Hide resolved


May include string imports if the request specified to include them.
"""
class ParsedPythonImports(FrozenDict[str, ParsedPythonImportInfo]):
"""All the discovered imports from a Python source file mapped to the first line they appear and
whether they are a string import."""


@dataclass(frozen=True)
Expand Down Expand Up @@ -68,8 +72,10 @@ async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPyth
)
# See above for where we explicitly encoded as utf8. Even though utf8 is the
# default for decode(), we make that explicit here for emphasis.
process_output = process_result.stdout.decode("utf8")
return ParsedPythonImports(json.loads(process_output) if process_output else {})
process_output = process_result.stdout.decode("utf8") or "{}"
return ParsedPythonImports(
{imp: ParsedPythonImportInfo(**info) for imp, info in json.loads(process_output).items()}
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
)


def rules():
Expand Down
Expand Up @@ -42,7 +42,7 @@ def assert_imports_parsed(
rule_runner: RuleRunner,
content: str,
*,
expected: dict[str, int],
expected: dict[str, tuple[int, bool]],
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
filename: str = "project/foo.py",
constraints: str = ">=3.6",
string_imports: bool = True,
Expand All @@ -67,7 +67,9 @@ def assert_imports_parsed(
)
],
)
assert dict(imports) == expected
assert {
module_name: (info.lineno, info.string) for module_name, info in imports.items()
} == expected


def test_normal_imports(rule_runner: RuleRunner) -> None:
Expand Down Expand Up @@ -125,23 +127,23 @@ def test_normal_imports(rule_runner: RuleRunner) -> None:
rule_runner,
content,
expected={
"__future__.print_function": 1,
"os": 3,
"os.path": 5,
"typing.TYPE_CHECKING": 6,
"requests": 8,
"demo": 10,
"project.demo.Demo": 11,
"project.demo.OriginalName": 12,
"multiline_import1.not_ignored1": 16,
"multiline_import1.not_ignored2": 23,
"multiline_import2.not_ignored": 26,
"project.circular_dep.CircularDep": 29,
"subprocess": 32,
"subprocess23": 34,
"pkg_resources": 36,
"not_ignored_but_looks_like_it_could_be": 39,
"also_not_ignored_but_looks_like_it_could_be": 45,
"__future__.print_function": (1, False),
"os": (3, False),
"os.path": (5, False),
"typing.TYPE_CHECKING": (6, False),
"requests": (8, False),
"demo": (10, False),
"project.demo.Demo": (11, False),
"project.demo.OriginalName": (12, False),
"multiline_import1.not_ignored1": (16, False),
"multiline_import1.not_ignored2": (23, False),
"multiline_import2.not_ignored": (26, False),
"project.circular_dep.CircularDep": (29, False),
"subprocess": (32, False),
"subprocess23": (34, False),
"pkg_resources": (36, False),
"not_ignored_but_looks_like_it_could_be": (39, False),
"also_not_ignored_but_looks_like_it_could_be": (45, False),
},
)

Expand All @@ -165,12 +167,12 @@ def test_relative_imports(rule_runner: RuleRunner, basename: str) -> None:
content,
filename=f"project/util/{basename}",
expected={
"project.util.sibling": 1,
"project.util.sibling.Nibling": 2,
"project.util.subdir.child.Child": 3,
"project.parent.Parent": 4,
"project.parent.Parent1": 6,
"project.parent.Guardian": 7,
"project.util.sibling": (1, False),
"project.util.sibling.Nibling": (2, False),
"project.util.subdir.child.Child": (3, False),
"project.parent.Parent": (4, False),
"project.parent.Parent1": (6, False),
"project.parent.Guardian": (7, False),
},
)

Expand Down Expand Up @@ -210,24 +212,30 @@ def test_imports_from_strings(rule_runner: RuleRunner, min_dots: int) -> None:
)

potentially_valid = {
"a.b": 3,
"a.Foo": 4,
"a.b.d": 5,
"a.b2.d": 6,
"a.b.c.Foo": 7,
"a.b.c.d.Foo": 8,
"a.b.c.d.FooBar": 9,
"a.b.c.d.e.f.g.Baz": 10,
"a.b_c.d._bar": 11,
"a.b2.c.D": 12,
"a.b.c_狗": 13,
"a.b": (3, True),
"a.Foo": (4, True),
"a.b.d": (5, True),
"a.b2.d": (6, True),
"a.b.c.Foo": (7, True),
"a.b.c.d.Foo": (8, True),
"a.b.c.d.FooBar": (9, True),
"a.b.c.d.e.f.g.Baz": (10, True),
"a.b_c.d._bar": (11, True),
"a.b2.c.D": (12, True),
"a.b.c_狗": (13, True),
}
expected = {sym: line for sym, line in potentially_valid.items() if sym.count(".") >= min_dots}

assert_imports_parsed(rule_runner, content, expected=expected, string_imports_min_dots=min_dots)
assert_imports_parsed(rule_runner, content, string_imports=False, expected={})


def test_real_import_beats_string_import(rule_runner: RuleRunner) -> None:
assert_imports_parsed(
rule_runner, "import one.two.three; 'one.two.three'", expected={"one.two.three": (1, False)}
)


def test_gracefully_handle_syntax_errors(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, "x =", expected={})

Expand Down Expand Up @@ -261,13 +269,13 @@ def test_works_with_python2(rule_runner: RuleRunner) -> None:
content,
constraints="==2.7.*",
expected={
"demo": 4,
"project.demo.Demo": 5,
"pkg_resources": 7,
"treat.as.a.regular.import.not.a.string.import": 8,
"dep.from.bytes": 10,
"dep.from.str": 11,
"dep.from.str_狗": 12,
"demo": (4, False),
"project.demo.Demo": (5, False),
"pkg_resources": (7, False),
"treat.as.a.regular.import.not.a.string.import": (8, False),
"dep.from.bytes": (10, True),
"dep.from.str": (11, True),
"dep.from.str_狗": (12, True),
},
)

Expand All @@ -294,11 +302,11 @@ def test_works_with_python38(rule_runner: RuleRunner) -> None:
content,
constraints=">=3.8",
expected={
"demo": 5,
"project.demo.Demo": 6,
"pkg_resources": 8,
"treat.as.a.regular.import.not.a.string.import": 9,
"dep.from.str": 11,
"demo": (5, False),
"project.demo.Demo": (6, False),
"pkg_resources": (8, False),
"treat.as.a.regular.import.not.a.string.import": (9, False),
"dep.from.str": (11, True),
},
)

Expand Down Expand Up @@ -327,10 +335,10 @@ def test_works_with_python39(rule_runner: RuleRunner) -> None:
content,
constraints=">=3.9",
expected={
"demo": 7,
"project.demo.Demo": 8,
"pkg_resources": 10,
"treat.as.a.regular.import.not.a.string.import": 11,
"dep.from.str": 13,
"demo": (7, False),
"project.demo.Demo": (8, False),
"pkg_resources": (10, False),
"treat.as.a.regular.import.not.a.string.import": (11, False),
"dep.from.str": (13, True),
},
)
15 changes: 9 additions & 6 deletions src/python/pants/backend/python/dependency_inference/rules.py
Expand Up @@ -172,7 +172,7 @@ async def infer_python_dependencies_via_imports(
return InferredDependencies([])

wrapped_tgt = await Get(WrappedTarget, Address, request.sources_field.address)
explicitly_provided_deps, detected_imports = await MultiGet(
explicitly_provided_deps, parsed_imports = await MultiGet(
Get(ExplicitlyProvidedDependencies, DependenciesRequest(wrapped_tgt.target[Dependencies])),
Get(
ParsedPythonImports,
Expand All @@ -186,14 +186,13 @@ async def infer_python_dependencies_via_imports(
)

owners_per_import = await MultiGet(
Get(PythonModuleOwners, PythonModule(imported_module))
for imported_module in detected_imports.keys()
Get(PythonModuleOwners, PythonModule(imported_module)) for imported_module in parsed_imports
)

merged_result: set[Address] = set()
unowned_imports: set[str] = set()
address = wrapped_tgt.target.address
for owners, imp in zip(owners_per_import, detected_imports):
for owners, imp in zip(owners_per_import, parsed_imports):
merged_result.update(owners.unambiguous)
explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
owners.ambiguous,
Expand All @@ -205,13 +204,17 @@ async def infer_python_dependencies_via_imports(
if maybe_disambiguated:
merged_result.add(maybe_disambiguated)

if not owners.unambiguous and imp.split(".")[0] not in DEFAULT_UNOWNED_DEPENDENCIES:
if (
not owners.unambiguous
and imp.split(".")[0] not in DEFAULT_UNOWNED_DEPENDENCIES
and not parsed_imports[imp].string
):
unowned_imports.add(imp)

unowned_dependency_behavior = python_infer_subsystem.unowned_dependency_behavior
if unowned_imports and unowned_dependency_behavior is not UnownedDependencyUsage.DoNothing:
unowned_imports_with_lines = [
f"{module_name} ({request.sources_field.file_path}:{detected_imports[module_name]})"
f"{module_name} ({request.sources_field.file_path}:{parsed_imports[module_name].lineno})"
for module_name in sorted(unowned_imports)
]
raise_error = unowned_dependency_behavior is UnownedDependencyUsage.RaiseError
Expand Down
Expand Up @@ -274,7 +274,12 @@ def test_infer_python_strict(caplog) -> None:

rule_runner.write_files(
{
"src/python/cheesey.py": "import venezuelan_beaver_cheese",
"src/python/cheesey.py": dedent(
"""\
import venezuelan_beaver_cheese
"japanese.sage.derby"
"""
),
"src/python/BUILD": "python_sources()",
}
)
Expand All @@ -286,6 +291,7 @@ def run_dep_inference(
rule_runner.set_options(
[
f"--python-infer-unowned-dependency-behavior={unowned_dependency_behavior}",
"--python-infer-string-imports",
"--source-root-patterns=src/python",
],
env_inherit={"PATH", "PYENV_ROOT", "HOME"},
Expand All @@ -301,6 +307,7 @@ def run_dep_inference(
assert len(caplog.records) == 1
assert "The following imports in src/python/cheesey.py have no owners:" in caplog.text
assert " * venezuelan_beaver_cheese (src/python/cheesey.py:1)" in caplog.text
assert "japanese.sage.derby" not in caplog.text

# Now test with "error"
caplog.clear()
Expand All @@ -311,6 +318,7 @@ def run_dep_inference(
assert len(caplog.records) == 2 # one for the error being raised and one for our message
assert "The following imports in src/python/cheesey.py have no owners:" in caplog.text
assert " * venezuelan_beaver_cheese (src/python/cheesey.py:1)" in caplog.text
assert "japanese.sage.derby" not in caplog.text

caplog.clear()

Expand Down
Expand Up @@ -31,12 +31,15 @@ class AstVisitor(ast.NodeVisitor):
def __init__(self, package_parts, contents):
self._package_parts = package_parts
self._contents_lines = contents.decode(errors="ignore").splitlines()

# Each of these maps module_name to first lineno of occurance
# N.B. use `setdefault` when adding imports
self.imports = {} # maps module_name to first lineno of occurance
self.definite_imports = {}
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
self.string_imports = {}

def maybe_add_string_import(self, node, s):
if STRING_IMPORT_REGEX.match(s):
self.imports.setdefault(s, node.lineno)
if os.environ["STRING_IMPORTS"] == "y" and STRING_IMPORT_REGEX.match(s):
self.string_imports.setdefault(s, node.lineno)

@staticmethod
def _is_pragma_ignored(line):
Expand Down Expand Up @@ -67,7 +70,9 @@ def consume_until(string):
token = next(token_iter)

if not self._is_pragma_ignored(token[4]):
self.imports.setdefault(import_prefix + alias.name, token[3][0] + node.lineno - 1)
self.definite_imports.setdefault(
import_prefix + alias.name, token[3][0] + node.lineno - 1
)
if alias.asname and token[1] != alias.asname:
consume_until(alias.asname)

Expand Down Expand Up @@ -100,40 +105,23 @@ def visit_Call(self, node):
if name is not None:
lineno = node.args[0].lineno
if not self._is_pragma_ignored(self._contents_lines[lineno - 1]):
self.imports.setdefault(name, lineno)
self.definite_imports.setdefault(name, lineno)
return

self.generic_visit(node)

# For Python 2.7, and Python3 < 3.8
def visit_Str(self, node):
try:
val = node.s.decode("utf8") if isinstance(node.s, bytes) else node.s
self.maybe_add_string_import(node, val)
except UnicodeError:
pass

if os.environ["STRING_IMPORTS"] == "y":
# String handling changes a bit depending on Python version. We dynamically add the appropriate
# logic.
if sys.version_info[0:2] == (2, 7):

def visit_Str(self, node):
try:
val = node.s.decode("utf8") if isinstance(node.s, bytes) else node.s
self.maybe_add_string_import(node, val)
except UnicodeError:
pass

setattr(AstVisitor, "visit_Str", visit_Str)

elif sys.version_info[0:2] < (3, 8):

def visit_Str(self, node):
self.maybe_add_string_import(node, node.s)

setattr(AstVisitor, "visit_Str", visit_Str)

else:

def visit_Constant(self, node):
if isinstance(node.value, str):
self.maybe_add_string_import(node, node.value)

setattr(AstVisitor, "visit_Constant", visit_Constant)
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
# For Python 3.8+
def visit_Constant(self, node):
if isinstance(node.value, str):
self.maybe_add_string_import(node, node.value)


def main(filename):
Expand All @@ -151,8 +139,20 @@ def main(filename):
# We have to be careful to set the encoding explicitly and write raw bytes ourselves.
# See below for where we explicitly decode.
buffer = sys.stdout if sys.version_info[0:2] == (2, 7) else sys.stdout.buffer
output = json.dumps(visitor.imports)
buffer.write(output.encode("utf8"))

# N.B. Start with string and `update` with definitive so definite "wins"
result = {
module_name: {"lineno": lineno, "string": True}
for module_name, lineno in visitor.string_imports.items()
}
result.update(
{
module_name: {"lineno": lineno, "string": False}
for module_name, lineno in visitor.definite_imports.items()
}
)

buffer.write(json.dumps(result).encode("utf8"))


if __name__ == "__main__":
Expand Down