pantsbuild · Eric-Arellano · Jan 18, 2022 · Jan 15, 2022 · Jan 15, 2022 · Jan 18, 2022
diff --git a/src/python/pants/backend/python/dependency_inference/parse_python_imports.py b/src/python/pants/backend/python/dependency_inference/parse_python_imports.py
@@ -17,11 +17,15 @@
 from pants.util.logging import LogLevel
 
 
-class ParsedPythonImports(FrozenDict[str, int]):
-    """All the discovered imports from a Python source file mapped to the first line they appear.
+@dataclass(frozen=True)
+class ParsedPythonImportInfo:
+    lineno: int
+    string: bool
+
 
-    May include string imports if the request specified to include them.
-    """
+class ParsedPythonImports(FrozenDict[str, ParsedPythonImportInfo]):
+    """All the discovered imports from a Python source file mapped to the first line they appear and
+    whether they are a string import."""
 
 
 @dataclass(frozen=True)
@@ -68,8 +72,10 @@ async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPyth
     )
     # See above for where we explicitly encoded as utf8. Even though utf8 is the
     # default for decode(), we make that explicit here for emphasis.
-    process_output = process_result.stdout.decode("utf8")
-    return ParsedPythonImports(json.loads(process_output) if process_output else {})
+    process_output = process_result.stdout.decode("utf8") or "{}"
+    return ParsedPythonImports(
+        {imp: ParsedPythonImportInfo(**info) for imp, info in json.loads(process_output).items()}
+    )
 
 
 def rules():

diff --git a/src/python/pants/backend/python/dependency_inference/parse_python_imports_test.py b/src/python/pants/backend/python/dependency_inference/parse_python_imports_test.py
@@ -42,7 +42,7 @@ def assert_imports_parsed(
     rule_runner: RuleRunner,
     content: str,
     *,
-    expected: dict[str, int],
+    expected: dict[str, tuple[int, bool]],
     filename: str = "project/foo.py",
     constraints: str = ">=3.6",
     string_imports: bool = True,
@@ -67,7 +67,9 @@ def assert_imports_parsed(
             )
         ],
     )
-    assert dict(imports) == expected
+    assert {
+        module_name: (info.lineno, info.string) for module_name, info in imports.items()
+    } == expected
 
 
 def test_normal_imports(rule_runner: RuleRunner) -> None:
@@ -125,23 +127,23 @@ def test_normal_imports(rule_runner: RuleRunner) -> None:
         rule_runner,
         content,
         expected={
-            "__future__.print_function": 1,
-            "os": 3,
-            "os.path": 5,
-            "typing.TYPE_CHECKING": 6,
-            "requests": 8,
-            "demo": 10,
-            "project.demo.Demo": 11,
-            "project.demo.OriginalName": 12,
-            "multiline_import1.not_ignored1": 16,
-            "multiline_import1.not_ignored2": 23,
-            "multiline_import2.not_ignored": 26,
-            "project.circular_dep.CircularDep": 29,
-            "subprocess": 32,
-            "subprocess23": 34,
-            "pkg_resources": 36,
-            "not_ignored_but_looks_like_it_could_be": 39,
-            "also_not_ignored_but_looks_like_it_could_be": 45,
+            "__future__.print_function": (1, False),
+            "os": (3, False),
+            "os.path": (5, False),
+            "typing.TYPE_CHECKING": (6, False),
+            "requests": (8, False),
+            "demo": (10, False),
+            "project.demo.Demo": (11, False),
+            "project.demo.OriginalName": (12, False),
+            "multiline_import1.not_ignored1": (16, False),
+            "multiline_import1.not_ignored2": (23, False),
+            "multiline_import2.not_ignored": (26, False),
+            "project.circular_dep.CircularDep": (29, False),
+            "subprocess": (32, False),
+            "subprocess23": (34, False),
+            "pkg_resources": (36, False),
+            "not_ignored_but_looks_like_it_could_be": (39, False),
+            "also_not_ignored_but_looks_like_it_could_be": (45, False),
         },
     )
 
@@ -165,12 +167,12 @@ def test_relative_imports(rule_runner: RuleRunner, basename: str) -> None:
         content,
         filename=f"project/util/{basename}",
         expected={
-            "project.util.sibling": 1,
-            "project.util.sibling.Nibling": 2,
-            "project.util.subdir.child.Child": 3,
-            "project.parent.Parent": 4,
-            "project.parent.Parent1": 6,
-            "project.parent.Guardian": 7,
+            "project.util.sibling": (1, False),
+            "project.util.sibling.Nibling": (2, False),
+            "project.util.subdir.child.Child": (3, False),
+            "project.parent.Parent": (4, False),
+            "project.parent.Parent1": (6, False),
+            "project.parent.Guardian": (7, False),
         },
     )
 
@@ -210,24 +212,30 @@ def test_imports_from_strings(rule_runner: RuleRunner, min_dots: int) -> None:
     )
 
     potentially_valid = {
-        "a.b": 3,
-        "a.Foo": 4,
-        "a.b.d": 5,
-        "a.b2.d": 6,
-        "a.b.c.Foo": 7,
-        "a.b.c.d.Foo": 8,
-        "a.b.c.d.FooBar": 9,
-        "a.b.c.d.e.f.g.Baz": 10,
-        "a.b_c.d._bar": 11,
-        "a.b2.c.D": 12,
-        "a.b.c_狗": 13,
+        "a.b": (3, True),
+        "a.Foo": (4, True),
+        "a.b.d": (5, True),
+        "a.b2.d": (6, True),
+        "a.b.c.Foo": (7, True),
+        "a.b.c.d.Foo": (8, True),
+        "a.b.c.d.FooBar": (9, True),
+        "a.b.c.d.e.f.g.Baz": (10, True),
+        "a.b_c.d._bar": (11, True),
+        "a.b2.c.D": (12, True),
+        "a.b.c_狗": (13, True),
     }
     expected = {sym: line for sym, line in potentially_valid.items() if sym.count(".") >= min_dots}
 
     assert_imports_parsed(rule_runner, content, expected=expected, string_imports_min_dots=min_dots)
     assert_imports_parsed(rule_runner, content, string_imports=False, expected={})
 
 
+def test_real_import_beats_string_import(rule_runner: RuleRunner) -> None:
+    assert_imports_parsed(
+        rule_runner, "import one.two.three; 'one.two.three'", expected={"one.two.three": (1, False)}
+    )
+
+
 def test_gracefully_handle_syntax_errors(rule_runner: RuleRunner) -> None:
     assert_imports_parsed(rule_runner, "x =", expected={})
 
@@ -261,13 +269,13 @@ def test_works_with_python2(rule_runner: RuleRunner) -> None:
         content,
         constraints="==2.7.*",
         expected={
-            "demo": 4,
-            "project.demo.Demo": 5,
-            "pkg_resources": 7,
-            "treat.as.a.regular.import.not.a.string.import": 8,
-            "dep.from.bytes": 10,
-            "dep.from.str": 11,
-            "dep.from.str_狗": 12,
+            "demo": (4, False),
+            "project.demo.Demo": (5, False),
+            "pkg_resources": (7, False),
+            "treat.as.a.regular.import.not.a.string.import": (8, False),
+            "dep.from.bytes": (10, True),
+            "dep.from.str": (11, True),
+            "dep.from.str_狗": (12, True),
         },
     )
 
@@ -294,11 +302,11 @@ def test_works_with_python38(rule_runner: RuleRunner) -> None:
         content,
         constraints=">=3.8",
         expected={
-            "demo": 5,
-            "project.demo.Demo": 6,
-            "pkg_resources": 8,
-            "treat.as.a.regular.import.not.a.string.import": 9,
-            "dep.from.str": 11,
+            "demo": (5, False),
+            "project.demo.Demo": (6, False),
+            "pkg_resources": (8, False),
+            "treat.as.a.regular.import.not.a.string.import": (9, False),
+            "dep.from.str": (11, True),
         },
     )
 
@@ -327,10 +335,10 @@ def test_works_with_python39(rule_runner: RuleRunner) -> None:
         content,
         constraints=">=3.9",
         expected={
-            "demo": 7,
-            "project.demo.Demo": 8,
-            "pkg_resources": 10,
-            "treat.as.a.regular.import.not.a.string.import": 11,
-            "dep.from.str": 13,
+            "demo": (7, False),
+            "project.demo.Demo": (8, False),
+            "pkg_resources": (10, False),
+            "treat.as.a.regular.import.not.a.string.import": (11, False),
+            "dep.from.str": (13, True),
         },
     )
diff --git a/src/python/pants/backend/python/dependency_inference/rules.py b/src/python/pants/backend/python/dependency_inference/rules.py
@@ -172,7 +172,7 @@ async def infer_python_dependencies_via_imports(
         return InferredDependencies([])
 
     wrapped_tgt = await Get(WrappedTarget, Address, request.sources_field.address)
-    explicitly_provided_deps, detected_imports = await MultiGet(
+    explicitly_provided_deps, parsed_imports = await MultiGet(
         Get(ExplicitlyProvidedDependencies, DependenciesRequest(wrapped_tgt.target[Dependencies])),
         Get(
             ParsedPythonImports,
@@ -186,14 +186,13 @@ async def infer_python_dependencies_via_imports(
     )
 
     owners_per_import = await MultiGet(
-        Get(PythonModuleOwners, PythonModule(imported_module))
-        for imported_module in detected_imports.keys()
+        Get(PythonModuleOwners, PythonModule(imported_module)) for imported_module in parsed_imports
     )
 
     merged_result: set[Address] = set()
     unowned_imports: set[str] = set()
     address = wrapped_tgt.target.address
-    for owners, imp in zip(owners_per_import, detected_imports):
+    for owners, imp in zip(owners_per_import, parsed_imports):
         merged_result.update(owners.unambiguous)
         explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
             owners.ambiguous,
@@ -205,13 +204,17 @@ async def infer_python_dependencies_via_imports(
         if maybe_disambiguated:
             merged_result.add(maybe_disambiguated)
 
-        if not owners.unambiguous and imp.split(".")[0] not in DEFAULT_UNOWNED_DEPENDENCIES:
+        if (
+            not owners.unambiguous
+            and imp.split(".")[0] not in DEFAULT_UNOWNED_DEPENDENCIES
+            and not parsed_imports[imp].string
+        ):
             unowned_imports.add(imp)
 
     unowned_dependency_behavior = python_infer_subsystem.unowned_dependency_behavior
     if unowned_imports and unowned_dependency_behavior is not UnownedDependencyUsage.DoNothing:
         unowned_imports_with_lines = [
-            f"{module_name} ({request.sources_field.file_path}:{detected_imports[module_name]})"
+            f"{module_name} ({request.sources_field.file_path}:{parsed_imports[module_name].lineno})"
             for module_name in sorted(unowned_imports)
         ]
         raise_error = unowned_dependency_behavior is UnownedDependencyUsage.RaiseError

diff --git a/src/python/pants/backend/python/dependency_inference/rules_test.py b/src/python/pants/backend/python/dependency_inference/rules_test.py
@@ -274,7 +274,12 @@ def test_infer_python_strict(caplog) -> None:
 
     rule_runner.write_files(
         {
-            "src/python/cheesey.py": "import venezuelan_beaver_cheese",
+            "src/python/cheesey.py": dedent(
+                """\
+                    import venezuelan_beaver_cheese
+                    "japanese.sage.derby"
+                """
+            ),
             "src/python/BUILD": "python_sources()",
         }
     )
@@ -286,6 +291,7 @@ def run_dep_inference(
         rule_runner.set_options(
             [
                 f"--python-infer-unowned-dependency-behavior={unowned_dependency_behavior}",
+                "--python-infer-string-imports",
                 "--source-root-patterns=src/python",
             ],
             env_inherit={"PATH", "PYENV_ROOT", "HOME"},
@@ -301,6 +307,7 @@ def run_dep_inference(
     assert len(caplog.records) == 1
     assert "The following imports in src/python/cheesey.py have no owners:" in caplog.text
     assert "  * venezuelan_beaver_cheese (src/python/cheesey.py:1)" in caplog.text
+    assert "japanese.sage.derby" not in caplog.text
 
     # Now test with "error"
     caplog.clear()
@@ -311,6 +318,7 @@ def run_dep_inference(
     assert len(caplog.records) == 2  # one for the error being raised and one for our message
     assert "The following imports in src/python/cheesey.py have no owners:" in caplog.text
     assert "  * venezuelan_beaver_cheese (src/python/cheesey.py:1)" in caplog.text
+    assert "japanese.sage.derby" not in caplog.text
 
     caplog.clear()
 

diff --git a/src/python/pants/backend/python/dependency_inference/scripts/import_parser.py b/src/python/pants/backend/python/dependency_inference/scripts/import_parser.py
@@ -31,12 +31,15 @@ class AstVisitor(ast.NodeVisitor):
     def __init__(self, package_parts, contents):
         self._package_parts = package_parts
         self._contents_lines = contents.decode(errors="ignore").splitlines()
+
+        # Each of these maps module_name to first lineno of occurance
         # N.B. use `setdefault` when adding imports
-        self.imports = {}  # maps module_name to first lineno of occurance
+        self.definite_imports = {}
+        self.string_imports = {}
 
     def maybe_add_string_import(self, node, s):
-        if STRING_IMPORT_REGEX.match(s):
-            self.imports.setdefault(s, node.lineno)
+        if os.environ["STRING_IMPORTS"] == "y" and STRING_IMPORT_REGEX.match(s):
+            self.string_imports.setdefault(s, node.lineno)
 
     @staticmethod
     def _is_pragma_ignored(line):
@@ -67,7 +70,9 @@ def consume_until(string):
                 token = next(token_iter)
 
             if not self._is_pragma_ignored(token[4]):
-                self.imports.setdefault(import_prefix + alias.name, token[3][0] + node.lineno - 1)
+                self.definite_imports.setdefault(
+                    import_prefix + alias.name, token[3][0] + node.lineno - 1
+                )
             if alias.asname and token[1] != alias.asname:
                 consume_until(alias.asname)
 
@@ -100,40 +105,23 @@ def visit_Call(self, node):
             if name is not None:
                 lineno = node.args[0].lineno
                 if not self._is_pragma_ignored(self._contents_lines[lineno - 1]):
-                    self.imports.setdefault(name, lineno)
+                    self.definite_imports.setdefault(name, lineno)
                 return
 
         self.generic_visit(node)
 
+    # For Python 2.7, and Python3 < 3.8
+    def visit_Str(self, node):
+        try:
+            val = node.s.decode("utf8") if isinstance(node.s, bytes) else node.s
+            self.maybe_add_string_import(node, val)
+        except UnicodeError:
+            pass
 
-if os.environ["STRING_IMPORTS"] == "y":
-    # String handling changes a bit depending on Python version. We dynamically add the appropriate
-    # logic.
-    if sys.version_info[0:2] == (2, 7):
-
-        def visit_Str(self, node):
-            try:
-                val = node.s.decode("utf8") if isinstance(node.s, bytes) else node.s
-                self.maybe_add_string_import(node, val)
-            except UnicodeError:
-                pass
-
-        setattr(AstVisitor, "visit_Str", visit_Str)
-
-    elif sys.version_info[0:2] < (3, 8):
-
-        def visit_Str(self, node):
-            self.maybe_add_string_import(node, node.s)
-
-        setattr(AstVisitor, "visit_Str", visit_Str)
-
-    else:
-
-        def visit_Constant(self, node):
-            if isinstance(node.value, str):
-                self.maybe_add_string_import(node, node.value)
-
-        setattr(AstVisitor, "visit_Constant", visit_Constant)
+    # For Python 3.8+
+    def visit_Constant(self, node):
+        if isinstance(node.value, str):
+            self.maybe_add_string_import(node, node.value)
 
 
 def main(filename):
@@ -151,8 +139,20 @@ def main(filename):
     # We have to be careful to set the encoding explicitly and write raw bytes ourselves.
     # See below for where we explicitly decode.
     buffer = sys.stdout if sys.version_info[0:2] == (2, 7) else sys.stdout.buffer
-    output = json.dumps(visitor.imports)
-    buffer.write(output.encode("utf8"))
+
+    # N.B. Start with string and `update` with definitive so definite "wins"
+    result = {
+        module_name: {"lineno": lineno, "string": True}
+        for module_name, lineno in visitor.string_imports.items()
+    }
+    result.update(
+        {
+            module_name: {"lineno": lineno, "string": False}
+            for module_name, lineno in visitor.definite_imports.items()
+        }
+    )
+
+    buffer.write(json.dumps(result).encode("utf8"))
 
 
 if __name__ == "__main__":