Skip to content

Commit

Permalink
Typing for spack checksum code paths (spack#42183)
Browse files Browse the repository at this point in the history
  • Loading branch information
haampie committed Jan 19, 2024
1 parent 75e96b8 commit edc8a5f
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 85 deletions.
31 changes: 15 additions & 16 deletions lib/spack/spack/cmd/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import re
import sys
from typing import Dict, Optional

import llnl.string
import llnl.util.lang
Expand All @@ -25,7 +26,7 @@
)
from spack.util.editor import editor
from spack.util.format import get_version_lines
from spack.version import Version
from spack.version import StandardVersion, Version

description = "checksum available versions of a package"
section = "packaging"
Expand Down Expand Up @@ -89,32 +90,30 @@ def checksum(parser, args):
spec = spack.spec.Spec(args.package)

# Get the package we're going to generate checksums for
pkg = spack.repo.PATH.get_pkg_class(spec.name)(spec)
pkg: PackageBase = spack.repo.PATH.get_pkg_class(spec.name)(spec)

# Skip manually downloaded packages
if pkg.manual_download:
raise ManualDownloadRequiredError(pkg.download_instr)

versions = [Version(v) for v in args.versions]
versions = [StandardVersion.from_string(v) for v in args.versions]

# Define placeholder for remote versions.
# This'll help reduce redundant work if we need to check for the existance
# of remote versions more than once.
remote_versions = None
# Define placeholder for remote versions. This'll help reduce redundant work if we need to
# check for the existence of remote versions more than once.
remote_versions: Optional[Dict[StandardVersion, str]] = None

# Add latest version if requested
if args.latest:
remote_versions = pkg.fetch_remote_versions(args.jobs)
remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
if len(remote_versions) > 0:
latest_version = sorted(remote_versions.keys(), reverse=True)[0]
versions.append(latest_version)
versions.append(max(remote_versions.keys()))

# Add preferred version if requested
# Add preferred version if requested (todo: exclude git versions)
if args.preferred:
versions.append(preferred_version(pkg))

# Store a dict of the form version -> URL
url_dict = {}
url_dict: Dict[StandardVersion, str] = {}

for version in versions:
if deprecated_version(pkg, version):
Expand All @@ -124,16 +123,16 @@ def checksum(parser, args):
if url is not None:
url_dict[version] = url
continue
# if we get here, it's because no valid url was provided by the package
# do expensive fallback to try to recover
# If we get here, it's because no valid url was provided by the package. Do expensive
# fallback to try to recover
if remote_versions is None:
remote_versions = pkg.fetch_remote_versions(args.jobs)
remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
if version in remote_versions:
url_dict[version] = remote_versions[version]

if len(versions) <= 0:
if remote_versions is None:
remote_versions = pkg.fetch_remote_versions(args.jobs)
remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
url_dict = remote_versions

# A spidered URL can differ from the package.py *computed* URL, pointing to different tarballs.
Expand Down
81 changes: 37 additions & 44 deletions lib/spack/spack/package_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
from spack.stage import DIYStage, ResourceStage, Stage, StageComposite, compute_stage_name
from spack.util.executable import ProcessError, which
from spack.util.package_hash import package_hash
from spack.version import GitVersion, StandardVersion, Version
from spack.version import GitVersion, StandardVersion

FLAG_HANDLER_RETURN_TYPE = Tuple[
Optional[Iterable[str]], Optional[Iterable[str]], Optional[Iterable[str]]
Expand All @@ -94,29 +94,26 @@
spack_times_log = "install_times.json"


def deprecated_version(pkg, version):
"""Return True if the version is deprecated, False otherwise.
def deprecated_version(pkg: "PackageBase", version: Union[str, StandardVersion]) -> bool:
"""Return True iff the version is deprecated.
Arguments:
pkg (PackageBase): The package whose version is to be checked.
version (str or spack.version.StandardVersion): The version being checked
pkg: The package whose version is to be checked.
version: The version being checked
"""
if not isinstance(version, StandardVersion):
version = Version(version)
version = StandardVersion.from_string(version)

for k, v in pkg.versions.items():
if version == k and v.get("deprecated", False):
return True
details = pkg.versions.get(version)
return details is not None and details.get("deprecated", False)

return False


def preferred_version(pkg):
def preferred_version(pkg: "PackageBase"):
"""
Returns a sorted list of the preferred versions of the package.
Arguments:
pkg (PackageBase): The package whose versions are to be assessed.
pkg: The package whose versions are to be assessed.
"""
# Here we sort first on the fact that a version is marked
# as preferred in the package, then on the fact that the
Expand Down Expand Up @@ -903,22 +900,16 @@ def version(self):

@classmethod
@memoized
def version_urls(cls):
"""OrderedDict of explicitly defined URLs for versions of this package.
def version_urls(cls) -> Dict[StandardVersion, str]:
"""Dict of explicitly defined URLs for versions of this package.
Return:
An OrderedDict (version -> URL) different versions of this
package, sorted by version.
An dict mapping version to url, ordered by version.
A version's URL only appears in the result if it has an an
explicitly defined ``url`` argument. So, this list may be empty
if a package only defines ``url`` at the top level.
A version's URL only appears in the result if it has an an explicitly defined ``url``
argument. So, this list may be empty if a package only defines ``url`` at the top level.
"""
version_urls = collections.OrderedDict()
for v, args in sorted(cls.versions.items()):
if "url" in args:
version_urls[v] = args["url"]
return version_urls
return {v: args["url"] for v, args in sorted(cls.versions.items()) if "url" in args}

def nearest_url(self, version):
"""Finds the URL with the "closest" version to ``version``.
Expand Down Expand Up @@ -961,46 +952,47 @@ def update_external_dependencies(self, extendee_spec=None):
"""
pass

def all_urls_for_version(self, version):
def all_urls_for_version(self, version: StandardVersion) -> List[str]:
"""Return all URLs derived from version_urls(), url, urls, and
list_url (if it contains a version) in a package in that order.
Args:
version (spack.version.Version): the version for which a URL is sought
version: the version for which a URL is sought
"""
uf = None
if type(self).url_for_version != PackageBase.url_for_version:
uf = self.url_for_version
return self._implement_all_urls_for_version(version, uf)

def _implement_all_urls_for_version(self, version, custom_url_for_version=None):
if not isinstance(version, StandardVersion):
version = Version(version)
def _implement_all_urls_for_version(
self,
version: Union[str, StandardVersion],
custom_url_for_version: Optional[Callable[[StandardVersion], Optional[str]]] = None,
) -> List[str]:
version = StandardVersion.from_string(version) if isinstance(version, str) else version

urls = []
urls: List[str] = []

# If we have a specific URL for this version, don't extrapolate.
version_urls = self.version_urls()
if version in version_urls:
urls.append(version_urls[version])
url = self.version_urls().get(version)
if url:
urls.append(url)

# if there is a custom url_for_version, use it
if custom_url_for_version is not None:
u = custom_url_for_version(version)
if u not in urls and u is not None:
if u is not None and u not in urls:
urls.append(u)

def sub_and_add(u):
def sub_and_add(u: Optional[str]) -> None:
if u is None:
return
# skip the url if there is no version to replace
try:
spack.url.parse_version(u)
except spack.url.UndetectableVersionError:
return
nu = spack.url.substitute_version(u, self.url_version(version))

urls.append(nu)
urls.append(spack.url.substitute_version(u, self.url_version(version)))

# If no specific URL, use the default, class-level URL
sub_and_add(getattr(self, "url", None))
Expand Down Expand Up @@ -2358,15 +2350,14 @@ def format_doc(cls, **kwargs):
return results.getvalue()

@property
def all_urls(self):
def all_urls(self) -> List[str]:
"""A list of all URLs in a package.
Check both class-level and version-specific URLs.
Returns:
list: a list of URLs
Returns a list of URLs
"""
urls = []
urls: List[str] = []
if hasattr(self, "url") and self.url:
urls.append(self.url)

Expand All @@ -2379,7 +2370,9 @@ def all_urls(self):
urls.append(args["url"])
return urls

def fetch_remote_versions(self, concurrency=None):
def fetch_remote_versions(
self, concurrency: Optional[int] = None
) -> Dict[StandardVersion, str]:
"""Find remote versions of this package.
Uses ``list_url`` and any other URLs listed in the package file.
Expand Down
46 changes: 23 additions & 23 deletions lib/spack/spack/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import os
import pathlib
import re
from typing import Any, Dict, Optional, Sequence, Union

import llnl.url
from llnl.path import convert_to_posix_path
Expand Down Expand Up @@ -284,20 +285,19 @@ def parse_version_offset(path):
raise UndetectableVersionError(original_path)


def parse_version(path):
def parse_version(path: str) -> spack.version.StandardVersion:
"""Try to extract a version string from a filename or URL.
Args:
path (str): The filename or URL for the package
path: The filename or URL for the package
Returns:
spack.version.Version: The version of the package
Returns: The version of the package
Raises:
UndetectableVersionError: If the URL does not match any regexes
"""
version, start, length, i, regex = parse_version_offset(path)
return spack.version.Version(version)
return spack.version.StandardVersion.from_string(version)


def parse_name_offset(path, v=None):
Expand Down Expand Up @@ -604,31 +604,31 @@ def color_url(path, **kwargs):


def find_versions_of_archive(
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
):
archive_urls: Union[str, Sequence[str]],
list_url: Optional[str] = None,
list_depth: int = 0,
concurrency: Optional[int] = 32,
reference_package: Optional[Any] = None,
) -> Dict[spack.version.StandardVersion, str]:
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
following order: links found on the scraped page that match a url generated by the
reference package, found and in the archive_urls list, found and derived from those
in the archive_urls list, and if none are found for a version then the item in the
archive_urls list is included for the version.
Args:
archive_urls (str or list or tuple): URL or sequence of URLs for
different versions of a package. Typically these are just the
tarballs from the package file itself. By default, this searches
the parent directories of archives.
list_url (str or None): URL for a listing of archives.
Spack will scrape these pages for download links that look
like the archive URL.
list_depth (int): max depth to follow links on list_url pages.
Defaults to 0.
concurrency (int): maximum number of concurrent requests
reference_package (spack.package_base.PackageBase or None): a spack package
used as a reference for url detection. Uses the url_for_version
method on the package to produce reference urls which, if found,
are preferred.
archive_urls: URL or sequence of URLs for different versions of a package. Typically these
are just the tarballs from the package file itself. By default, this searches the
parent directories of archives.
list_url: URL for a listing of archives. Spack will scrape these pages for download links
that look like the archive URL.
list_depth: max depth to follow links on list_url pages. Defaults to 0.
concurrency: maximum number of concurrent requests
reference_package: a spack package used as a reference for url detection. Uses the
url_for_version method on the package to produce reference urls which, if found, are
preferred.
"""
if not isinstance(archive_urls, (list, tuple)):
if isinstance(archive_urls, str):
archive_urls = [archive_urls]

# Generate a list of list_urls based on archive urls and any
Expand Down Expand Up @@ -689,7 +689,7 @@ def find_versions_of_archive(
# Build a dict version -> URL from any links that match the wildcards.
# Walk through archive_url links first.
# Any conflicting versions will be overwritten by the list_url links.
versions = {}
versions: Dict[spack.version.StandardVersion, str] = {}
matched = set()
for url in sorted(links):
url = convert_to_posix_path(url)
Expand Down
6 changes: 4 additions & 2 deletions lib/spack/spack/util/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import urllib.parse
from html.parser import HTMLParser
from pathlib import Path, PurePosixPath
from typing import IO, Dict, List, Optional, Set, Union
from typing import IO, Dict, Iterable, List, Optional, Set, Union
from urllib.error import HTTPError, URLError
from urllib.request import HTTPSHandler, Request, build_opener

Expand Down Expand Up @@ -554,7 +554,9 @@ def list_url(url, recursive=False):
return gcs.get_all_blobs(recursive=recursive)


def spider(root_urls: Union[str, List[str]], depth: int = 0, concurrency: Optional[int] = None):
def spider(
root_urls: Union[str, Iterable[str]], depth: int = 0, concurrency: Optional[int] = None
):
"""Get web pages from root URLs.
If depth is specified (e.g., depth=2), then this will also follow up to <depth> levels
Expand Down

0 comments on commit edc8a5f

Please sign in to comment.