-
-
Notifications
You must be signed in to change notification settings - Fork 608
/
util.py
230 lines (196 loc) · 9.8 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import io
import os
from collections import abc, defaultdict
from typing import Dict, Iterable, List, Set, Tuple, cast
from pkg_resources import Requirement
from pants.backend.python.target_types import PythonSources
from pants.core.target_types import ResourcesSources
from pants.core.util_rules.strip_source_roots import SourceRootStrippedSources
from pants.engine.fs import DigestContents
from pants.engine.target import Target
from pants.source.source_root import SourceRootError
from pants.util.strutil import ensure_text
# Convenient type alias for the pair (package name, data files in the package).
PackageDatum = Tuple[str, Tuple[str, ...]]
def parse_interpreter_constraint(constraint: str) -> Requirement:
"""Parse an interpreter constraint, e.g., CPython>=2.7,<3.
We allow shorthand such as `>=3.7`, which gets expanded to `CPython>=3.7`. See Pex's
interpreter.py's `parse_requirement()`.
"""
try:
parsed_requirement = Requirement.parse(constraint)
except ValueError:
parsed_requirement = Requirement.parse(f"CPython{constraint}")
return parsed_requirement
# Distutils does not support unicode strings in setup.py, so we must explicitly convert to binary
# strings as pants uses unicode_literals. A natural and prior technique was to use `pprint.pformat`,
# but that embeds u's in the string itself during conversion. For that reason we roll out own
# literal pretty-printer here.
#
# Note that we must still keep this code, even though Pants only runs with Python 3, because
# the created product may still be run by Python 2.
#
# For more information, see http://bugs.python.org/issue13943.
def distutils_repr(obj):
"""Compute a string repr suitable for use in generated setup.py files."""
output = io.StringIO()
linesep = os.linesep
def _write(data):
output.write(ensure_text(data))
def _write_repr(o, indent=False, level=0):
pad = " " * 4 * level
if indent:
_write(pad)
level += 1
if isinstance(o, (bytes, str)):
# The py2 repr of str (unicode) is `u'...'` and we don't want the `u` prefix; likewise,
# the py3 repr of bytes is `b'...'` and we don't want the `b` prefix so we hand-roll a
# repr here.
o_txt = ensure_text(o)
if linesep in o_txt:
_write('"""{}"""'.format(o_txt.replace('"""', r"\"\"\"")))
else:
_write("'{}'".format(o_txt.replace("'", r"\'")))
elif isinstance(o, abc.Mapping):
_write("{" + linesep)
for k, v in o.items():
_write_repr(k, indent=True, level=level)
_write(": ")
_write_repr(v, indent=False, level=level)
_write("," + linesep)
_write(pad + "}")
elif isinstance(o, abc.Iterable):
if isinstance(o, abc.MutableSequence):
open_collection, close_collection = "[]"
elif isinstance(o, abc.Set):
open_collection, close_collection = "{}"
else:
open_collection, close_collection = "()"
_write(open_collection + linesep)
for i in o:
_write_repr(i, indent=True, level=level)
_write("," + linesep)
_write(pad + close_collection)
else:
_write(repr(o)) # Numbers and bools.
_write_repr(obj)
return output.getvalue()
def find_packages(
*,
tgts_and_stripped_srcs: Iterable[Tuple[Target, SourceRootStrippedSources]],
init_py_digest_contents: DigestContents,
py2: bool,
) -> Tuple[Tuple[str, ...], Tuple[str, ...], Tuple[PackageDatum, ...]]:
"""Analyze the package structure for the given sources.
Returns a tuple (packages, namespace_packages, package_data), suitable for use as setup()
kwargs.
"""
# Find all packages implied by the sources.
packages: Set[str] = set()
package_data: Dict[str, List[str]] = defaultdict(list)
for tgt, stripped_srcs in tgts_and_stripped_srcs:
if tgt.has_field(PythonSources):
for file in stripped_srcs.snapshot.files:
# Python 2: An __init__.py file denotes a package.
# Python 3: Any directory containing python source files is a package.
if not py2 or os.path.basename(file) == "__init__.py":
packages.add(os.path.dirname(file).replace(os.path.sep, "."))
# Add any packages implied by ancestor __init__.py files.
# Note that init_py_contents includes all __init__.py files, not just ancestors, but
# that's fine - the others will already have been found in tgts_and_stripped_srcs above.
for file_content in init_py_digest_contents:
packages.add(os.path.dirname(file_content.path).replace(os.path.sep, "."))
# Now find all package_data.
for tgt, stripped_srcs in tgts_and_stripped_srcs:
if tgt.has_field(ResourcesSources):
source_roots = list(stripped_srcs.root_to_relfiles.keys())
if len(source_roots) != 1:
# A single target is expected to be under a single source root, so this
# should never happen, but we check to be sure.
raise SourceRootError(
f"Expected a single source root for target "
f"{tgt.address.spec} but found: {', '.join(source_roots)}. "
)
source_root = source_roots[0]
resource_dir_relpath = os.path.relpath(tgt.address.spec_path, source_root)
# Find the closest enclosing package, if any. Resources will be loaded relative to that.
package: str = resource_dir_relpath.replace(os.path.sep, ".")
while package and package not in packages:
package = package.rpartition(".")[0]
# If resource is not in a package, ignore it. There's no principled way to load it anyway.
if package:
package_dir_relpath = package.replace(".", os.path.sep)
package_data[package].extend(
os.path.relpath(file, package_dir_relpath)
for file in stripped_srcs.snapshot.files
)
# See which packages are pkg_resources-style namespace packages.
# Note that implicit PEP 420 namespace packages and pkgutil-style namespace packages
# should *not* be listed in the setup namespace_packages kwarg. That's for pkg_resources-style
# namespace packages only. See https://github.com/pypa/sample-namespace-packages/.
namespace_packages: Set[str] = set()
init_py_by_path: Dict[str, bytes] = {ipc.path: ipc.content for ipc in init_py_digest_contents}
for pkg in packages:
path = os.path.join(pkg.replace(".", os.path.sep), "__init__.py")
if path in init_py_by_path and declares_pkg_resources_namespace_package(
init_py_by_path[path].decode()
):
namespace_packages.add(pkg)
return (
tuple(sorted(packages)),
tuple(sorted(namespace_packages)),
tuple((pkg, tuple(sorted(files))) for pkg, files in package_data.items()),
)
def declares_pkg_resources_namespace_package(python_src: str) -> bool:
"""Given .py file contents, determine if it declares a pkg_resources-style namespace package.
Detects pkg_resources-style namespaces. See here for details:
https://packaging.python.org/guides/packaging-namespace-packages/.
Note: Accepted namespace package decls are valid Python syntax in all Python versions,
so this code can, e.g., detect namespace packages in Python 2 code while running on Python 3.
"""
import ast
def is_name(node: ast.AST, name: str) -> bool:
return isinstance(node, ast.Name) and node.id == name
def is_call_to(node: ast.AST, func_name: str) -> bool:
if not isinstance(node, ast.Call):
return False
func = node.func
return (isinstance(func, ast.Attribute) and func.attr == func_name) or is_name(
func, func_name
)
def has_args(call_node: ast.Call, required_arg_ids: Tuple[str, ...]) -> bool:
args = call_node.args
if len(args) != len(required_arg_ids):
return False
actual_arg_ids = tuple(arg.id for arg in args if isinstance(arg, ast.Name))
return actual_arg_ids == required_arg_ids
try:
python_src_ast = ast.parse(python_src)
except SyntaxError:
# The namespace package incantations we check for are valid code in all Python versions.
# So if the code isn't parseable we know it isn't a valid namespace package.
return False
# Note that these checks are slightly heuristic. It is possible to construct adversarial code
# that would defeat them. But the only consequence would be an incorrect namespace_packages list
# in setup.py, and we're assuming our users aren't trying to shoot themselves in the foot.
for ast_node in ast.walk(python_src_ast):
# pkg_resources-style namespace, e.g.,
# __import__('pkg_resources').declare_namespace(__name__).
if is_call_to(ast_node, "declare_namespace") and has_args(
cast(ast.Call, ast_node), ("__name__",)
):
return True
return False
def is_python2(compatibilities_or_constraints: Iterable[str]) -> bool:
"""Checks if we should assume python2 code."""
def iter_reqs():
for constraint in compatibilities_or_constraints:
yield parse_interpreter_constraint(constraint)
for req in iter_reqs():
for python_27_ver in range(0, 18): # The last python 2.7 version was 2.7.18.
if req.specifier.contains(f"2.7.{python_27_ver}"):
# At least one constraint limits us to Python 2, so assume that.
return True
return False