From d339019866d70af32c759a03a745d1ba00b4b10d Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sat, 18 Aug 2018 02:13:05 +0800 Subject: [PATCH] Fetch Requires-Python and implement its cache --- src/passa/caches.py | 138 +++++++------------------------------- src/passa/dependencies.py | 35 ++++++---- src/passa/locking.py | 4 +- src/passa/markers.py | 4 +- src/passa/providers.py | 11 ++- 5 files changed, 62 insertions(+), 130 deletions(-) diff --git a/src/passa/caches.py b/src/passa/caches.py index f232e7b..9414cdc 100644 --- a/src/passa/caches.py +++ b/src/passa/caches.py @@ -6,11 +6,10 @@ import sys import appdirs -import requests -import packaging.requirements import pip_shims +import requests -from .utils import get_pinned_version +from .utils import get_pinned_version, mkdir_p CACHE_DIR = os.environ.get("PASSA_CACHE_DIR", appdirs.user_cache_dir("passa")) @@ -131,72 +130,9 @@ def _read_cache_file(cache_file_path): return doc['dependencies'] -def _build_table(values, key=None, keyval=None, unique=False, use_lists=False): - """ - Builds a dict-based lookup table (index) elegantly. - - Supports building normal and unique lookup tables. For example: - - >>> assert lookup_table( - ... ['foo', 'bar', 'baz', 'qux', 'quux'], lambda s: s[0]) == { - ... 'b': {'bar', 'baz'}, - ... 'f': {'foo'}, - ... 'q': {'quux', 'qux'} - ... } - - For key functions that uniquely identify values, set unique=True: - - >>> assert lookup_table( - ... ['foo', 'bar', 'baz', 'qux', 'quux'], lambda s: s[0], - ... unique=True) == { - ... 'b': 'baz', - ... 'f': 'foo', - ... 'q': 'quux' - ... } - - The values of the resulting lookup table will be values, not sets. - - For extra power, you can even change the values while building up the LUT. - To do so, use the `keyval` function instead of the `key` arg: - - >>> assert lookup_table( - ... ['foo', 'bar', 'baz', 'qux', 'quux'], - ... keyval=lambda s: (s[0], s[1:])) == { - ... 'b': {'ar', 'az'}, - ... 'f': {'oo'}, - ... 'q': {'uux', 'ux'} - ... } - - """ - if keyval is None: - if key is None: - keyval = (lambda v: v) - else: - keyval = (lambda v: (key(v), v)) - - if unique: - return dict(keyval(v) for v in values) - - lut = {} - for value in values: - k, v = keyval(value) - try: - s = lut[k] - except KeyError: - if use_lists: - s = lut[k] = list() - else: - s = lut[k] = set() - if use_lists: - s.append(v) - else: - s.add(v) - return dict(lut) - +class _JSONCache(object): + """A persistent cache backed by a JSON file. -class DependencyCache(object): - """ - Creates a new persistent dependency cache for the current Python version. The cache file is written to the appropriate user cache dir for the current platform, i.e. @@ -204,20 +140,22 @@ class DependencyCache(object): Where X.Y indicates the Python version. """ - def __init__(self, cache_dir=CACHE_DIR): - if not os.path.isdir(cache_dir): - os.makedirs(cache_dir) - py_version = '.'.join(str(digit) for digit in sys.version_info[:2]) - cache_filename = 'depcache-py{}.json'.format(py_version) + filename_format = None + def __init__(self, cache_dir=CACHE_DIR): + mkdir_p(cache_dir) + python_version = ".".join(str(digit) for digit in sys.version_info[:2]) + cache_filename = self.filename_format.format( + python_version=python_version, + ) self._cache_file = os.path.join(cache_dir, cache_filename) self._cache = None @property def cache(self): - """ - The dictionary that is the actual in-memory cache. This property - lazily loads the cache from disk. + """The dictionary that is the actual in-memory cache. + + This property lazily loads the cache from disk. """ if self._cache is None: self.read_cache() @@ -247,14 +185,16 @@ def as_cache_key(self, ireq): return name, "{}{}".format(version, extras_string) def read_cache(self): - """Reads the cached contents into memory.""" + """Reads the cached contents into memory. + """ if os.path.exists(self._cache_file): self._cache = _read_cache_file(self._cache_file) else: self._cache = {} def write_cache(self): - """Writes the cache to disk as JSON.""" + """Writes the cache to disk as JSON. + """ doc = { '__format__': 1, 'dependencies': self._cache, @@ -292,40 +232,14 @@ def get(self, ireq, default=None): pkgname, pkgversion_and_extras = self.as_cache_key(ireq) return self.cache.get(pkgname, {}).get(pkgversion_and_extras, default) - def reverse_dependencies(self, ireqs): - """ - Returns a lookup table of reverse dependencies for all the given ireqs. - - Since this is all static, it only works if the dependency cache - contains the complete data, otherwise you end up with a partial view. - This is typically no problem if you use this function after the entire - dependency tree is resolved. - """ - ireqs_as_cache_values = [self.as_cache_key(ireq) for ireq in ireqs] - return self._reverse_dependencies(ireqs_as_cache_values) - - def _reverse_dependencies(self, cache_keys): - """Returns a lookup table of reverse dependencies for all given keys. - - Example input:: - - [('pep8', '1.5.7'), - ('flake8', '2.4.0'), - ('mccabe', '0.3'), - ('pyflakes', '0.8.1')] - Example output:: +class DependencyCache(_JSONCache): + """Cache the dependency of cancidates. + """ + filename_format = "depcache-py{python_version}.json" - {'pep8': ['flake8'], - 'flake8': [], - 'mccabe': ['flake8'], - 'pyflakes': ['flake8']} - """ - # First, collect all dependencies into a sequence of (parent, child) - # tuples, like `[('flake8', 'pep8'), ('flake8', 'mccabe'), ...]`. - return _build_table( - (_key_from_req(packaging.requirements.Requirement(dep_name)), name) - for name, version_and_extras in cache_keys - for dep_name in self.cache[name][version_and_extras] - ) +class RequiresPythonCache(_JSONCache): + """Cache a candidate's Requires-Python information. + """ + filename_format = "pyreqcache-py{python_version}.json" diff --git a/src/passa/dependencies.py b/src/passa/dependencies.py index c1a7add..a2dbc8a 100644 --- a/src/passa/dependencies.py +++ b/src/passa/dependencies.py @@ -3,6 +3,7 @@ import sys import distlib.wheel +import packaging.specifiers import packaging.utils import packaging.version import requests @@ -10,12 +11,13 @@ import six from ._pip import build_wheel -from .caches import DependencyCache +from .caches import DependencyCache, RequiresPythonCache from .markers import contains_extra, get_contained_extras, get_without_extra from .utils import is_pinned DEPENDENCY_CACHE = DependencyCache() +REQUIRES_PYTHON_CACHE = RequiresPythonCache() def _cached(f, **kwargs): @@ -24,7 +26,9 @@ def _cached(f, **kwargs): def wrapped(ireq): result = f(ireq, **kwargs) if result is not None and is_pinned(ireq): - DEPENDENCY_CACHE[ireq] = result + deps, requires_python = result + DEPENDENCY_CACHE[ireq] = deps + REQUIRES_PYTHON_CACHE[ireq] = requires_python return result return wrapped @@ -42,20 +46,22 @@ def _is_cache_broken(line, parent_name): def _get_dependencies_from_cache(ireq): """Retrieves dependencies for the requirement from the dependency cache. """ - if os.environ.get("PASSA_IGNORE_DEPENDENCY_CACHE"): + if os.environ.get("PASSA_IGNORE_LOCAL_CACHE"): return if ireq.editable: return try: - cached = DEPENDENCY_CACHE[ireq] + deps = DEPENDENCY_CACHE[ireq] + pyrq = REQUIRES_PYTHON_CACHE[ireq] except KeyError: return # Preserving sanity: Run through the cache and make sure every entry if # valid. If this fails, something is wrong with the cache. Drop it. try: + packaging.specifiers.SpecifierSet(pyrq) ireq_name = packaging.utils.canonicalize_name(ireq.name) - if any(_is_cache_broken(line, ireq_name) for line in cached): + if any(_is_cache_broken(line, ireq_name) for line in deps): broken = True else: broken = False @@ -65,9 +71,10 @@ def _get_dependencies_from_cache(ireq): if broken: print("dropping broken cache for {0}".format(ireq.name)) del DEPENDENCY_CACHE[ireq] + del REQUIRES_PYTHON_CACHE[ireq] return - return cached + return deps, pyrq def _get_dependencies_from_json_url(url, session): @@ -75,6 +82,7 @@ def _get_dependencies_from_json_url(url, session): response.raise_for_status() info = response.json()["info"] + requires_python = info["requires_python"] or "" try: requirement_lines = info["requires_dist"] except KeyError: @@ -83,7 +91,7 @@ def _get_dependencies_from_json_url(url, session): # The JSON API return null for empty requirements, for some reason, so we # can't just pass it into the comprehension. if not requirement_lines: - return [] + return [], requires_python dependencies = [ dep_req.as_line(include_hashes=False) for dep_req in ( @@ -92,7 +100,7 @@ def _get_dependencies_from_json_url(url, session): ) if not contains_extra(dep_req.markers) ] - return dependencies + return dependencies, requires_python def _get_dependencies_from_json(ireq, sources): @@ -189,7 +197,8 @@ def _get_dependencies_from_pip(ireq, sources): wheel = distlib.wheel.Wheel(wheel_path) extras = ireq.extras or () requirements = _read_requirements(wheel, extras) - return requirements + requires_python = getattr(wheel.metadata, "requires_python", None) + return requirements, requires_python or "" def get_dependencies(requirement, sources): @@ -208,12 +217,14 @@ def get_dependencies(requirement, sources): last_exc = None for getter in getters: try: - deps = getter(ireq) + result = getter(ireq) except Exception as e: last_exc = sys.exc_info() continue - if deps is not None: - return [requirementslib.Requirement.from_line(d) for d in deps] + if result is not None: + deps, pyreq = result + reqs = [requirementslib.Requirement.from_line(d) for d in deps] + return reqs, pyreq if last_exc: six.reraise(*last_exc) raise RuntimeError("failed to get dependencies for {}".format( diff --git a/src/passa/locking.py b/src/passa/locking.py index 55ea945..7b199a0 100644 --- a/src/passa/locking.py +++ b/src/passa/locking.py @@ -38,8 +38,8 @@ def resolve_requirements(requirements, sources, allow_pre): r.hashes = get_hashes(hash_cache, r) set_markers( - state.mapping, traces, - requirements, provider.fetched_dependencies, + state.mapping, traces, requirements, + provider.fetched_dependencies, provider.requires_pythons, ) return state, traces diff --git a/src/passa/markers.py b/src/passa/markers.py index 1d84584..7c85268 100644 --- a/src/passa/markers.py +++ b/src/passa/markers.py @@ -112,7 +112,7 @@ def _calculate_markersets_mapping(requirements, candidates, traces): return all_markersets -def set_markers(candidates, traces, requirements, dependencies): +def set_markers(candidates, traces, requirements, dependencies, pythons): """Add markers to candidates based on the dependency tree. :param candidates: A key-candidate mapping. Candidates in the mapping will @@ -123,6 +123,8 @@ def set_markers(candidates, traces, requirements, dependencies): provided to be resolved. :param dependencies: A key-collection mapping containing what dependencies each candidate in `candidates` requested. + :param pythons: A key-str mapping containing Requires-Python information + of each candidate. Keys in mappings and entries in the trace are identifiers of a package, as implemented by the `identify` method of the resolver's provider. diff --git a/src/passa/providers.py b/src/passa/providers.py index 9df62ef..02fef10 100644 --- a/src/passa/providers.py +++ b/src/passa/providers.py @@ -28,12 +28,13 @@ def __init__(self, root_requirements, sources, allow_prereleases): self.allow_prereleases = bool(allow_prereleases) self.invalid_candidates = set() - # Remember dependencies of each pinned candidate. The resolver calls + # Remember requirements of each pinned candidate. The resolver calls # `get_dependencies()` only when it wants to repin, so the last time # the dependencies we got when it is last called on a package, are # the set used by the resolver. We use this later to trace how a given # dependency is specified by a package. self.fetched_dependencies = {} + self.requires_pythons = {} def identify(self, dependency): return identify_requirment(dependency) @@ -76,7 +77,9 @@ def is_satisfied_by(self, requirement, candidate): def get_dependencies(self, candidate): sources = _filter_sources(candidate, self.sources) try: - dependencies = get_dependencies(candidate, sources=sources) + dependencies, requires_python = get_dependencies( + candidate, sources=sources, + ) except Exception as e: if os.environ.get("PASSA_NO_SUPPRESS_EXCEPTIONS"): raise @@ -84,7 +87,9 @@ def get_dependencies(self, candidate): candidate.as_line(), e, )) return [] - self.fetched_dependencies[self.identify(candidate)] = { + candidate_key = self.identify(candidate) + self.fetched_dependencies[candidate_key] = { self.identify(r): r for r in dependencies } + self.requires_pythons[candidate_key] = requires_python return dependencies