Skip to content

Commit

Permalink
Add support for .pyi type stubs (including for first-party) (#10759)
Browse files Browse the repository at this point in the history
There are two use cases for writing `.pyi` type stubs:

1) Writing bindings for third party libraries.
2) Writing stubs for your own code because you prefer it over using type hints/comments.

This PR adds support for both use cases. We allow Python targets to now include `.pyi` files, and update the default globs for `python_library` and `python_tests` to include them. 

We also teach dependency inference about `.pyi` files. For third party reqs, we infer a dep on both the req and the `.pyi` file. For first party stubs, we infer a dep on both the `.py` and `.pyi` files.

Most of our Python functionality works correctly without changes for `.pyi` files, but we have to teach Pytest to not try running over `.pyi` files if they show up in `python_tests` targets, just like we special case `conftest.py`. We also update how we call MyPy; if there is a `.pyi` file and `.py` file, the `.pyi` file takes precedence; otherwise, MyPy would error.

[ci skip-rust]
[ci skip-build-wheels]
  • Loading branch information
Eric-Arellano committed Sep 28, 2020
1 parent 5f46286 commit f90b982
Show file tree
Hide file tree
Showing 18 changed files with 483 additions and 184 deletions.
112 changes: 74 additions & 38 deletions src/python/pants/backend/python/dependency_inference/module_mapper.py
@@ -1,9 +1,10 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from collections import defaultdict
from dataclasses import dataclass
from pathlib import PurePath
from typing import Dict, Optional, Set
from typing import DefaultDict, Dict, List, Optional, Set, Tuple, cast

from pants.backend.python.target_types import (
ModuleMappingField,
Expand All @@ -14,6 +15,7 @@
from pants.core.util_rules.source_files import SourceFilesRequest
from pants.core.util_rules.stripped_source_files import StrippedSourceFiles
from pants.engine.addresses import Address
from pants.engine.collection import Collection
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import Targets
from pants.util.frozendict import FrozenDict
Expand All @@ -36,30 +38,33 @@ def create_from_stripped_path(cls, path: PurePath) -> "PythonModule":
class FirstPartyModuleToAddressMapping:
"""A mapping of module names to owning addresses.
Most of the addresses will refer to generated subtargets. If a module's owning target has more
than one source file, we will generate a new subtarget that only owns the specific module/file.
For example, if the original target owned 4 source files, there will be 4 generated subtargets,
one per each file. All of the metadata will be copied, except for the `sources` field and
Address.
All mapped addresses will be file addresses, aka generated subtargets. That is, each target
will own no more than one single source file. Its metadata will be copied from the original
base target.
If there are >1 original owning targets for a module, no targets will be recorded for that
module.
If there are >1 original owning targets that refer to the same module—such as `//:a` and `//:b` both owning module
`foo`—then we will not add any of the targets to the mapping because there is ambiguity. (We make an exception if
one target is an implementation (.py file) and the other is a type stub (.pyi file).
"""

mapping: FrozenDict[str, Address]
# The mapping should either have 1 or 2 addresses per module, depending on if there is a type
# stub.
mapping: FrozenDict[str, Tuple[Address, ...]]

def address_for_module(self, module: str) -> Optional[Address]:
target = self.mapping.get(module)
if target is not None:
return target
def addresses_for_module(self, module: str) -> Tuple[Address, ...]:
targets = self.mapping.get(module)
if targets:
return targets
# If the module is not found, try the parent, if any. This is to accommodate `from`
# imports, where we don't care about the specific symbol, but only the module. For example,
# with `from typing import List`, we only care about `typing`.
# Unlike with third party modules, we do not look past the direct parent.
# with `from my_project.app import App`, we only care about the `my_project.app` part.
#
# We do not look past the direct parent, as this could cause multiple ambiguous owners to be resolved. This
# contrasts with the third-party module mapping, which will try every ancestor.
if "." not in module:
return None
return ()
parent_module = module.rsplit(".", maxsplit=1)[0]
return self.mapping.get(parent_module)
return self.mapping.get(parent_module, ())


@rule(desc="Creating map of first party targets to Python modules", level=LogLevel.DEBUG)
Expand All @@ -71,20 +76,36 @@ async def map_first_party_modules_to_addresses() -> FirstPartyModuleToAddressMap
for tgt in candidate_targets
)

modules_to_addresses: Dict[str, Address] = {}
modules_with_multiple_owners: Set[str] = set()
modules_to_addresses: DefaultDict[str, List[Address]] = defaultdict(list)
modules_with_multiple_implementations: Set[str] = set()
for tgt, stripped_sources in zip(candidate_targets, stripped_sources_per_explicit_target):
for stripped_f in stripped_sources.snapshot.files:
module = PythonModule.create_from_stripped_path(PurePath(stripped_f)).module
if module in modules_to_addresses:
modules_with_multiple_owners.add(module)
# We check if one of the targets is an implementation (.py file) and the other is a type stub (.pyi
# file), which we allow. Otherwise, we have ambiguity.
either_targets_are_type_stubs = len(modules_to_addresses[module]) == 1 and (
tgt.address.filename.endswith(".pyi")
or modules_to_addresses[module][0].filename.endswith(".pyi")
)
if either_targets_are_type_stubs:
modules_to_addresses[module].append(tgt.address)
else:
modules_with_multiple_implementations.add(module)
else:
modules_to_addresses[module] = tgt.address
modules_to_addresses[module].append(tgt.address)

# Remove modules with ambiguous owners.
for module in modules_with_multiple_owners:
for module in modules_with_multiple_implementations:
modules_to_addresses.pop(module)
return FirstPartyModuleToAddressMapping(FrozenDict(sorted(modules_to_addresses.items())))
return FirstPartyModuleToAddressMapping(
FrozenDict(
{
module: tuple(sorted(addresses))
for module, addresses in sorted(modules_to_addresses.items())
}
)
)


@dataclass(frozen=True)
Expand All @@ -95,7 +116,7 @@ def address_for_module(self, module: str) -> Optional[Address]:
target = self.mapping.get(module)
if target is not None:
return target
# If the module is not found, try the parent module, if any. For example,
# If the module is not found, recursively try the ancestor modules, if any. For example,
# pants.task.task.Task -> pants.task.task -> pants.task -> pants
if "." not in module:
return None
Expand Down Expand Up @@ -128,30 +149,45 @@ async def map_third_party_modules_to_addresses() -> ThirdPartyModuleToAddressMap
return ThirdPartyModuleToAddressMapping(FrozenDict(sorted(modules_to_addresses.items())))


@dataclass(frozen=True)
class PythonModuleOwner:
"""The target that owns a Python module.
class PythonModuleOwners(Collection[Address]):
"""The target(s) that own a Python module.
If >1 target own the same module, the `address` field should be set to `None` to avoid
ambiguity.
If >1 targets own the same module, and they're implementations (vs .pyi type stubs), the
collection should be empty. The collection should never be > 2.
"""

address: Optional[Address]


@rule
async def map_module_to_address(
module: PythonModule,
first_party_mapping: FirstPartyModuleToAddressMapping,
third_party_mapping: ThirdPartyModuleToAddressMapping,
) -> PythonModuleOwner:
) -> PythonModuleOwners:
third_party_address = third_party_mapping.address_for_module(module.module)
if third_party_address:
return PythonModuleOwner(third_party_address)
first_party_address = first_party_mapping.address_for_module(module.module)
if first_party_address:
return PythonModuleOwner(first_party_address)
return PythonModuleOwner(address=None)
first_party_addresses = first_party_mapping.addresses_for_module(module.module)

# It's possible for a user to write type stubs (`.pyi` files) for their third-party dependencies. We check if that
# happened, but we're strict in validating that there is only a single third party address and a single first-party
# address referring to a `.pyi` file; otherwise, we have ambiguous implementations, so no-op.
third_party_resolved_only = third_party_address and not first_party_addresses
third_party_resolved_with_type_stub = (
third_party_address
and len(first_party_addresses) == 1
and first_party_addresses[0].filename.endswith(".pyi")
)

if third_party_resolved_only:
return PythonModuleOwners([cast(Address, third_party_address)])
if third_party_resolved_with_type_stub:
return PythonModuleOwners([cast(Address, third_party_address), first_party_addresses[0]])
# Else, we have ambiguity between the third-party and first-party addresses.
if third_party_address and first_party_addresses:
return PythonModuleOwners()

# We're done with looking at third-party addresses, and now solely look at first-party.
if first_party_addresses:
return PythonModuleOwners(first_party_addresses)
return PythonModuleOwners()


def rules():
Expand Down
@@ -1,16 +1,16 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pathlib import PurePath
from pathlib import Path, PurePath
from textwrap import dedent
from typing import Optional
from typing import List

import pytest

from pants.backend.python.dependency_inference.module_mapper import (
FirstPartyModuleToAddressMapping,
PythonModule,
PythonModuleOwner,
PythonModuleOwners,
ThirdPartyModuleToAddressMapping,
)
from pants.backend.python.dependency_inference.module_mapper import rules as module_mapper_rules
Expand Down Expand Up @@ -38,19 +38,19 @@ def test_create_module_from_path(stripped_path: PurePath, expected: str) -> None


def test_first_party_modules_mapping() -> None:
util_addr = Address.parse("src/python/util:strutil")
test_addr = Address.parse("tests/python/project_test:test")
util_addr = Address("src/python/util", relative_file_path="strutil.py")
test_addr = Address("tests/python/project_test", relative_file_path="test.py")
mapping = FirstPartyModuleToAddressMapping(
FrozenDict({"util.strutil": util_addr, "project_test.test": test_addr})
FrozenDict({"util.strutil": (util_addr,), "project_test.test": (test_addr,)})
)
assert mapping.address_for_module("util.strutil") == util_addr
assert mapping.address_for_module("util.strutil.ensure_text") == util_addr
assert mapping.address_for_module("util") is None
assert mapping.address_for_module("project_test.test") == test_addr
assert mapping.address_for_module("project_test.test.TestDemo") == test_addr
assert mapping.address_for_module("project_test.test.TestDemo.method") is None
assert mapping.address_for_module("project_test") is None
assert mapping.address_for_module("project.test") is None
assert mapping.addresses_for_module("util.strutil") == (util_addr,)
assert mapping.addresses_for_module("util.strutil.ensure_text") == (util_addr,)
assert not mapping.addresses_for_module("util")
assert mapping.addresses_for_module("project_test.test") == (test_addr,)
assert mapping.addresses_for_module("project_test.test.TestDemo") == (test_addr,)
assert not mapping.addresses_for_module("project_test.test.TestDemo.method")
assert not mapping.addresses_for_module("project_test")
assert not mapping.addresses_for_module("project.test")


def test_third_party_modules_mapping() -> None:
Expand All @@ -75,7 +75,7 @@ def rule_runner() -> RuleRunner:
*module_mapper_rules(),
QueryRule(FirstPartyModuleToAddressMapping, ()),
QueryRule(ThirdPartyModuleToAddressMapping, ()),
QueryRule(PythonModuleOwner, (PythonModule,)),
QueryRule(PythonModuleOwners, (PythonModule,)),
],
target_types=[PythonLibrary, PythonRequirementLibrary],
)
Expand All @@ -97,23 +97,25 @@ def test_map_first_party_modules_to_addresses(rule_runner: RuleRunner) -> None:
# not generate subtargets.
rule_runner.create_file("tests/python/project_test/demo_test/__init__.py")
rule_runner.add_to_build_file("tests/python/project_test/demo_test", "python_library()")
# A module with both an implementation and a type stub.
rule_runner.create_files("src/python/stubs", ["stub.py", "stub.pyi"])
rule_runner.add_to_build_file("src/python/stubs", "python_library()")

result = rule_runner.request(FirstPartyModuleToAddressMapping, [])
assert result.mapping == FrozenDict(
{
"project.util.dirutil": Address(
"src/python/project/util",
relative_file_path="dirutil.py",
target_name="util",
"project.util.dirutil": (
Address("src/python/project/util", relative_file_path="dirutil.py"),
),
"project.util.tarutil": (
Address("src/python/project/util", relative_file_path="tarutil.py"),
),
"project.util.tarutil": Address(
"src/python/project/util",
relative_file_path="tarutil.py",
target_name="util",
"project_test.demo_test": (
Address("tests/python/project_test/demo_test", relative_file_path="__init__.py"),
),
"project_test.demo_test": Address(
"tests/python/project_test/demo_test",
relative_file_path="__init__.py",
target_name="demo_test",
"stubs.stub": (
Address("src/python/stubs", relative_file_path="stub.py"),
Address("src/python/stubs", relative_file_path="stub.pyi"),
),
}
)
Expand Down Expand Up @@ -164,8 +166,8 @@ def test_map_third_party_modules_to_addresses(rule_runner: RuleRunner) -> None:
def test_map_module_to_address(rule_runner: RuleRunner) -> None:
rule_runner.set_options(["--source-root-patterns=['source_root1', 'source_root2', '/']"])

def get_owner(module: str) -> Optional[Address]:
return rule_runner.request(PythonModuleOwner, [PythonModule(module)]).address
def get_owners(module: str) -> List[Address]:
return list(rule_runner.request(PythonModuleOwners, [PythonModule(module)]))

# First check that we can map 3rd-party modules.
rule_runner.add_to_build_file(
Expand All @@ -180,33 +182,56 @@ def get_owner(module: str) -> Optional[Address]:
"""
),
)
assert get_owner("colors.red") == Address.parse("3rdparty/python:ansicolors")
assert get_owners("colors.red") == [Address("3rdparty/python", target_name="ansicolors")]

# Now test that we can handle first-party type stubs that go along with that third party
# requirement. Note that `colors.pyi` is at the top-level of the source root so that it strips
# to the module `colors`.
rule_runner.create_file("source_root1/colors.pyi")
rule_runner.add_to_build_file("source_root1", "python_library()")
assert get_owners("colors.red") == [
Address("3rdparty/python", target_name="ansicolors"),
Address("source_root1", relative_file_path="colors.pyi"),
]

# But don't allow a first-party implementation with the same module name.
Path(rule_runner.build_root, "source_root1/colors.pyi").unlink()
rule_runner.create_file("source_root1/colors.py")
assert not get_owners("colors.red")

# Check a first party module using a module path.
rule_runner.create_file("source_root1/project/app.py")
rule_runner.create_file("source_root1/project/file2.py")
rule_runner.add_to_build_file("source_root1/project", "python_library()")
assert get_owner("project.app") == Address(
"source_root1/project", relative_file_path="app.py", target_name="project"
)
assert get_owners("project.app") == [
Address("source_root1/project", relative_file_path="app.py")
]

# Now check with a type stub.
rule_runner.create_file("source_root1/project/app.pyi")
assert get_owners("project.app") == [
Address("source_root1/project", relative_file_path="app.py"),
Address("source_root1/project", relative_file_path="app.pyi"),
]

# Check a package path
rule_runner.create_file("source_root2/project/subdir/__init__.py")
rule_runner.add_to_build_file("source_root2/project/subdir", "python_library()")
assert get_owner("project.subdir") == Address(
"source_root2/project/subdir",
relative_file_path="__init__.py",
target_name="subdir",
)

# Test a module with no owner (stdlib). This also sanity checks that we can handle when
assert get_owners("project.subdir") == [
Address(
"source_root2/project/subdir",
relative_file_path="__init__.py",
)
]

# Test a module with no owner (stdlib). This also smoke tests that we can handle when
# there is no parent module.
assert get_owner("typing") is None
assert not get_owners("typing")

# Test a module with a single owner with a top-level source root of ".". Also confirm we
# can handle when the module includes a symbol (like a class name) at the end.
rule_runner.create_file("script.py")
rule_runner.add_to_build_file("", "python_library(name='script')")
assert get_owner("script.Demo") == Address(
"", relative_file_path="script.py", target_name="script"
)
assert get_owners("script.Demo") == [
Address("", relative_file_path="script.py", target_name="script")
]

0 comments on commit f90b982

Please sign in to comment.