From fbf2ca977956f066629b73ef8180b7cc4e5331ed Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Fri, 25 Sep 2020 11:49:34 -0700 Subject: [PATCH] Delete binaries/ and net/. They are no longer used. [ci skip-rust] [ci skip-build-wheels] --- src/python/pants/binaries/BUILD | 4 - src/python/pants/binaries/binary_util.py | 584 ------------------ src/python/pants/net/BUILD | 4 - src/python/pants/net/__init__.py | 0 src/python/pants/net/http/BUILD | 4 - src/python/pants/net/http/__init__.py | 0 src/python/pants/net/http/fetcher.py | 406 ------------ src/python/pants/option/global_options.py | 10 +- src/python/pants/util/osutil.py | 45 +- src/python/pants/util/osutil_test.py | 46 +- tests/python/pants_test/net/BUILD | 4 - tests/python/pants_test/net/__init__.py | 0 tests/python/pants_test/net/http/BUILD | 6 - tests/python/pants_test/net/http/__init__.py | 0 .../pants_test/net/http/test_fetcher.py | 373 ----------- 15 files changed, 10 insertions(+), 1476 deletions(-) delete mode 100644 src/python/pants/binaries/BUILD delete mode 100644 src/python/pants/binaries/binary_util.py delete mode 100644 src/python/pants/net/BUILD delete mode 100644 src/python/pants/net/__init__.py delete mode 100644 src/python/pants/net/http/BUILD delete mode 100644 src/python/pants/net/http/__init__.py delete mode 100644 src/python/pants/net/http/fetcher.py delete mode 100644 tests/python/pants_test/net/BUILD delete mode 100644 tests/python/pants_test/net/__init__.py delete mode 100644 tests/python/pants_test/net/http/BUILD delete mode 100644 tests/python/pants_test/net/http/__init__.py delete mode 100644 tests/python/pants_test/net/http/test_fetcher.py diff --git a/src/python/pants/binaries/BUILD b/src/python/pants/binaries/BUILD deleted file mode 100644 index df627b22271..00000000000 --- a/src/python/pants/binaries/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -python_library() diff --git a/src/python/pants/binaries/binary_util.py b/src/python/pants/binaries/binary_util.py deleted file mode 100644 index a2811c12765..00000000000 --- a/src/python/pants/binaries/binary_util.py +++ /dev/null @@ -1,584 +0,0 @@ -# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -import argparse -import logging -import os -import posixpath -import shutil -import sys -from abc import ABC, abstractmethod -from contextlib import contextmanager -from dataclasses import dataclass -from functools import reduce -from typing import Any, List, Optional, Tuple, cast - -from pants.base.build_environment import get_buildroot -from pants.engine.rules import collect_rules, rule -from pants.fs.archive import archiver_for_path -from pants.net.http.fetcher import Fetcher -from pants.option.global_options import GlobalOptions -from pants.option.options_bootstrapper import OptionsBootstrapper -from pants.option.subsystem import Subsystem -from pants.util.contextutil import temporary_file -from pants.util.dirutil import chmod_plus_x, safe_concurrent_creation, safe_open -from pants.util.memo import memoized_classproperty, memoized_method, memoized_property -from pants.util.ordered_set import OrderedSet -from pants.util.osutil import ( - SUPPORTED_PLATFORM_NORMALIZED_NAMES, - get_closest_mac_host_platform_pair, -) - -logger = logging.getLogger(__name__) - - -# TODO: This file is now only used by download_binary.sh, which is only used by -# build-support/bin/native/cargo.sh. Find another solution for bootstrapping -# native tools (cmake, go, protoc) and get rid of this file. - - -@dataclass(frozen=True) -class HostPlatform: - """Describes a platform to resolve binaries for. Determines the binary's location on disk. - - :class:`BinaryToolUrlGenerator` instances receive this to generate download urls. - """ - - os_name: Optional[str] - arch_or_version: Optional[str] - - @memoized_classproperty - def empty(cls): - return cls(None, None) - - def binary_path_components(self): - """These strings are used as consecutive components of the path where a binary is fetched. - - This is also used in generating urls from --binaries-baseurls in PantsHosted. - """ - return [self.os_name, self.arch_or_version] - - -class BinaryToolUrlGenerator(ABC): - """Encapsulates the selection of urls to download for some binary tool. - - :API: public - - :class:`BinaryTool` subclasses can return an instance of a class mixing this in to - get_external_url_generator(self) to download their file or archive from some specified url or set - of urls. - """ - - @abstractmethod - def generate_urls(self, version, host_platform) -> List[str]: - """Return a list of urls to download some binary tool from given a version and platform. - - Each url is tried in order to resolve the binary -- if the list of urls is empty, or downloading - from each of the urls fails, Pants will raise an exception when the binary tool is fetched which - should describe why the urls failed to work. - - :param str version: version string for the requested binary (e.g. '2.0.1'). - :param host_platform: description of the platform to fetch binaries for. - :type host_platform: :class:`HostPlatform` - :returns: a list of urls to download the binary tool from. - :rtype: list - """ - pass - - -class PantsHosted(BinaryToolUrlGenerator): - """Given a binary request and --binaries-baseurls, generate urls to download the binary from. - - This url generator is used if get_external_url_generator(self) is not overridden by a BinaryTool - subclass, or if --allow-external-binary-tool-downloads is False. - - NB: "pants-hosted" is referring to the organization of the urls being specific to pants. It also - happens that most binaries are downloaded from S3 hosting at binaries.pantsbuild.org by default -- - but setting --binaries-baseurls to anything else will only download binaries from the baseurls - given, not from binaries.pantsbuild.org. - """ - - class NoBaseUrlsError(ValueError): - pass - - def __init__(self, binary_request, baseurls): - super().__init__() - self._binary_request = binary_request - - if not baseurls: - raise self.NoBaseUrlsError( - "Error constructing pants-hosted urls for the {} binary: no baseurls were provided.".format( - binary_request.name - ) - ) - self._baseurls = baseurls - - def generate_urls(self, version, host_platform): - """Append the file's download path to each of --binaries-baseurls. - - This assumes that the urls in --binaries-baseurls point somewhere that mirrors Pants's - organization of the downloaded binaries on disk. Each url is tried in order until a request - succeeds. - """ - binary_path = self._binary_request.get_download_path(host_platform) - return [posixpath.join(baseurl, binary_path) for baseurl in self._baseurls] - - -# TODO: Deprecate passing in an explicit supportdir? Seems like we should be able to -# organize our binary hosting so that it's not needed. It's also used to calculate the binary -# download location, though. -@dataclass(frozen=True) -class BinaryRequest: - """Describes a request for a binary to download.""" - - supportdir: Any - version: Any - name: Any - platform_dependent: Any - external_url_generator: Optional[Any] - archiver: Optional[Any] - - def _full_name(self): - if self.archiver: - return "{}.{}".format(self.name, self.archiver.extension) - return self.name - - def get_download_path(self, host_platform): - binary_path_components = [self.supportdir] - if self.platform_dependent: - # TODO(John Sirois): finish doc of the path structure expected under base_path. - binary_path_components.extend(host_platform.binary_path_components()) - binary_path_components.extend([self.version, self._full_name()]) - return os.path.join(*binary_path_components) - - -@dataclass(frozen=True) -class BinaryFetchRequest: - """Describes a request to download a file.""" - - download_path: Any - urls: Tuple - - def __post_init__(self): - if not self.urls: - raise self.NoDownloadUrlsError(f"No urls were provided to {self.__name__}: {self!r}.") - - @memoized_property - def file_name(self): - return os.path.basename(self.download_path) - - class NoDownloadUrlsError(ValueError): - pass - - -class BinaryToolFetcher: - @classmethod - def _default_http_fetcher(cls): - """Return a fetcher that resolves local file paths against the build root. - - Currently this is used everywhere except in testing. - """ - return Fetcher(get_buildroot()) - - def __init__(self, bootstrap_dir, timeout_secs, fetcher=None, ignore_cached_download=False): - """ - :param str bootstrap_dir: The root directory where Pants downloads binaries to. - :param int timeout_secs: The number of seconds to wait before timing out on a request for some - url. - :param fetcher: object to fetch urls with, overridden in testing. - :type fetcher: :class:`pants.net.http.fetcher.Fetcher` - :param bool ignore_cached_download: whether to fetch a binary even if it already exists on disk. - """ - self._bootstrap_dir = bootstrap_dir - self._timeout_secs = timeout_secs - self._fetcher = fetcher or self._default_http_fetcher() - self._ignore_cached_download = ignore_cached_download - - class BinaryNotFound(Exception): - def __init__(self, name, accumulated_errors): - super().__init__( - "Failed to fetch {name} binary from any source: ({error_msgs})".format( - name=name, error_msgs=", ".join(accumulated_errors) - ) - ) - - @contextmanager - def _select_binary_stream(self, name, urls): - """Download a file from a list of urls, yielding a stream after downloading the file. - - URLs are tried in order until they succeed. - - :raises: :class:`BinaryToolFetcher.BinaryNotFound` if requests to all the given urls fail. - """ - downloaded_successfully = False - accumulated_errors = [] - for url in OrderedSet(urls): # De-dup URLS: we only want to try each URL once. - logger.info( - "Attempting to fetch {name} binary from: {url} ...".format(name=name, url=url) - ) - try: - with temporary_file() as dest: - logger.debug( - "in BinaryToolFetcher: url={}, timeout_secs={}".format( - url, self._timeout_secs - ) - ) - self._fetcher.download( - url, - listener=Fetcher.ProgressListener(), - path_or_fd=dest, - timeout_secs=self._timeout_secs, - ) - logger.info("Fetched {name} binary from: {url} .".format(name=name, url=url)) - downloaded_successfully = True - dest.seek(0) - yield dest - break - except (IOError, Fetcher.Error, ValueError) as e: - accumulated_errors.append( - "Failed to fetch binary from {url}: {error}".format(url=url, error=e) - ) - if not downloaded_successfully: - raise self.BinaryNotFound(name, accumulated_errors) - - def _do_fetch(self, download_path, file_name, urls): - with safe_concurrent_creation(download_path) as downloadpath: - with self._select_binary_stream(file_name, urls) as binary_tool_stream: - with safe_open(downloadpath, "wb") as bootstrapped_binary: - shutil.copyfileobj(binary_tool_stream, bootstrapped_binary) - - def fetch_binary(self, fetch_request): - """Fulfill a binary fetch request.""" - bootstrap_dir = os.path.realpath(os.path.expanduser(self._bootstrap_dir)) - bootstrapped_binary_path = os.path.join(bootstrap_dir, fetch_request.download_path) - logger.debug("bootstrapped_binary_path: {}".format(bootstrapped_binary_path)) - file_name = fetch_request.file_name - urls = fetch_request.urls - - if self._ignore_cached_download or not os.path.exists(bootstrapped_binary_path): - self._do_fetch(bootstrapped_binary_path, file_name, urls) - - logger.debug( - "Selected {binary} binary bootstrapped to: {path}".format( - binary=file_name, path=bootstrapped_binary_path - ) - ) - return bootstrapped_binary_path - - -class BinaryUtil: - """Wraps utility methods for finding binary executables.""" - - class Factory(Subsystem): - """ - :API: public - """ - - # N.B. `BinaryUtil` sources all of its options from bootstrap options, so that - # `BinaryUtil` instances can be created prior to `Subsystem` bootstrapping. So - # this options scope is unused, but required to remain a `Subsystem`. - options_scope = "binaries" - - @classmethod - def create(cls) -> "BinaryUtil": - # NB: create is a class method to ~force binary fetch location to be global. - return cast(BinaryUtil, cls._create_for_cls(BinaryUtil)) - - @classmethod - def _create_for_cls(cls, binary_util_cls): - # NB: We read global bootstrap options, but through our own scoped options instance. - options = cls.global_instance().options - binary_tool_fetcher = BinaryToolFetcher( - bootstrap_dir=options.pants_bootstrapdir, - timeout_secs=options.binaries_fetch_timeout_secs, - ) - return binary_util_cls( - baseurls=options.binaries_baseurls, - binary_tool_fetcher=binary_tool_fetcher, - path_by_id=options.binaries_path_by_id, - allow_external_binary_tool_downloads=options.allow_external_binary_tool_downloads, - ) - - class MissingMachineInfo(Exception): - """Indicates that pants was unable to map this machine's OS to a binary path prefix.""" - - pass - - class NoBaseUrlsError(Exception): - """Indicates that no URLs were specified in pants.toml.""" - - pass - - class BinaryResolutionError(Exception): - """Raised to wrap other exceptions raised in the select() method to provide context.""" - - def __init__(self, binary_request, base_exception): - super().__init__( - "Error resolving binary request {}: {}".format(binary_request, base_exception), - base_exception, - ) - - def __init__( - self, - baseurls, - binary_tool_fetcher, - path_by_id=None, - allow_external_binary_tool_downloads=True, - uname_func=None, - ): - """Creates a BinaryUtil with the given settings to define binary lookup behavior. - - This constructor is primarily used for testing. Production code will usually initialize - an instance using the BinaryUtil.Factory.create() method. - - :param baseurls: URL prefixes which represent repositories of binaries. - :type baseurls: list of string - :param int timeout_secs: Timeout in seconds for url reads. - :param string bootstrapdir: Directory to use for caching binaries. Uses this directory to - search for binaries in, or download binaries to if needed. - :param dict path_by_id: Additional mapping from (sysname, id) -> (os, arch) for tool - directory naming - :param bool allow_external_binary_tool_downloads: If False, use --binaries-baseurls to download - all binaries, regardless of whether an - external_url_generator field is provided. - :param function uname_func: method to use to emulate os.uname() in testing - """ - self._baseurls = baseurls - self._binary_tool_fetcher = binary_tool_fetcher - - self._path_by_id = SUPPORTED_PLATFORM_NORMALIZED_NAMES.copy() - if path_by_id: - self._path_by_id.update((tuple(k), tuple(v)) for k, v in path_by_id.items()) - - self._allow_external_binary_tool_downloads = allow_external_binary_tool_downloads - self._uname_func = uname_func or os.uname - - _ID_BY_OS = { - "darwin": lambda release, machine: ("darwin", release.split(".")[0]), - "linux": lambda release, machine: ("linux", machine), - } - - # TODO: we create a HostPlatform in this class instead of in the constructor because we don't want - # to fail until a binary is requested. The HostPlatform should be a parameter that gets lazily - # resolved by the v2 engine. - @memoized_method - def host_platform(self, uname=None): - uname_result = uname if uname else self._uname_func() - sysname, _, release, _, machine = uname_result - os_id_key = sysname.lower() - try: - os_id_fun = self._ID_BY_OS[os_id_key] - os_id_tuple = os_id_fun(release, machine) - except KeyError: - # TODO: test this! - raise self.MissingMachineInfo( - "Pants could not resolve binaries for the current host: platform '{}' was not recognized. " - "Recognized platforms are: [{}].".format( - os_id_key, ", ".join(sorted(self._ID_BY_OS.keys())) - ) - ) - try: - os_name, arch_or_version = self._path_by_id[os_id_tuple] - return HostPlatform(os_name, arch_or_version) - except KeyError: - # In the case of MacOS, arch_or_version represents a version, and newer releases - # can run binaries built for older releases. - # It's better to allow that as a fallback, than for Pants to be broken on each new version - # of MacOS until we get around to adding binaries for that new version, and modifying config - # appropriately. - # If some future version of MacOS cannot run binaries built for a previous - # release, then we're no worse off than we were before (except that the error will be - # less obvious), and we can fix it by pushing appropriate binaries and modifying - # SUPPORTED_PLATFORM_NORMALIZED_NAMES appropriately. This is only likely to happen with a - # major architecture change, so we'll have plenty of warning. - if os_id_tuple[0] == "darwin": - os_name, version = get_closest_mac_host_platform_pair(os_id_tuple[1]) - if os_name is not None and version is not None: - return HostPlatform(os_name, version) - # We fail early here because we need the host_platform to identify where to download - # binaries to. - raise self.MissingMachineInfo( - "Pants could not resolve binaries for the current host. Update --binaries-path-by-id to " - "find binaries for the current host platform {}.\n" - "--binaries-path-by-id was: {}.".format(os_id_tuple, self._path_by_id) - ) - - def _get_download_path(self, binary_request): - return binary_request.get_download_path(self.host_platform()) - - def get_url_generator(self, binary_request): - - external_url_generator = binary_request.external_url_generator - - logger.debug( - "self._allow_external_binary_tool_downloads: {}".format( - self._allow_external_binary_tool_downloads - ) - ) - logger.debug("external_url_generator: {}".format(external_url_generator)) - - if external_url_generator and self._allow_external_binary_tool_downloads: - url_generator = external_url_generator - else: - if not self._baseurls: - raise self.NoBaseUrlsError("--binaries-baseurls is empty.") - url_generator = PantsHosted(binary_request=binary_request, baseurls=self._baseurls) - - return url_generator - - def _get_urls(self, url_generator, binary_request): - return url_generator.generate_urls(binary_request.version, self.host_platform()) - - def select(self, binary_request): - """Fetches a file, unpacking it if necessary.""" - - logger.debug("binary_request: {!r}".format(binary_request)) - - try: - download_path = self._get_download_path(binary_request) - except self.MissingMachineInfo as e: - raise self.BinaryResolutionError(binary_request, e) - - try: - url_generator = self.get_url_generator(binary_request) - except self.NoBaseUrlsError as e: - raise self.BinaryResolutionError(binary_request, e) - - urls = self._get_urls(url_generator, binary_request) - if not isinstance(urls, list): - # TODO: add test for this error! - raise self.BinaryResolutionError( - binary_request, TypeError("urls must be a list: was '{}'.".format(urls)) - ) - fetch_request = BinaryFetchRequest(download_path=download_path, urls=tuple(urls)) - - logger.debug("fetch_request: {!r}".format(fetch_request)) - - try: - downloaded_file = self._binary_tool_fetcher.fetch_binary(fetch_request) - except BinaryToolFetcher.BinaryNotFound as e: - raise self.BinaryResolutionError(binary_request, e) - - # NB: we mark the downloaded file executable if it is not an archive. - archiver = binary_request.archiver - if archiver is None: - chmod_plus_x(downloaded_file) - return downloaded_file - - download_dir = os.path.dirname(downloaded_file) - # Use the 'name' given in the request as the directory name to extract to. - unpacked_dirname = os.path.join(download_dir, binary_request.name) - if not os.path.isdir(unpacked_dirname): - logger.info("Extracting {} to {} .".format(downloaded_file, unpacked_dirname)) - archiver.extract(downloaded_file, unpacked_dirname, concurrency_safe=True) - return unpacked_dirname - - def _make_deprecated_binary_request(self, supportdir, version, name): - return BinaryRequest( - supportdir=supportdir, - version=version, - name=name, - platform_dependent=True, - external_url_generator=None, - archiver=None, - ) - - def select_binary(self, supportdir, version, name): - binary_request = self._make_deprecated_binary_request(supportdir, version, name) - return self.select(binary_request) - - def _make_deprecated_script_request(self, supportdir, version, name): - return BinaryRequest( - supportdir=supportdir, - version=version, - name=name, - platform_dependent=False, - external_url_generator=None, - archiver=None, - ) - - def select_script(self, supportdir, version, name): - binary_request = self._make_deprecated_script_request(supportdir, version, name) - return self.select(binary_request) - - -def _create_bootstrap_binary_arg_parser(): - parser = argparse.ArgumentParser( - description="""\ -Helper for download_binary.sh to use BinaryUtil to download the appropriate binaries. - -Downloads the specified binary at the specified version if it's not already present. - -Outputs an absolute path to the binary, whether fetched or already present, to stdout. - -If the file ends in ".tar.gz", untars the file and outputs the directory to which the files were -untar'd. Otherwise, makes the file executable. - -If a binary tool with the requested name, version, and filename does not exist, the -script will exit with an error and print a message to stderr. - -See binary_util.py for more information. -""" - ) - parser.add_argument( - "util_name", help="Subdirectory for the requested tool in the pants hosted binary schema." - ) - parser.add_argument("version", help="Version of the requested binary tool to download.") - parser.add_argument( - "filename", - nargs="?", - default=None, - help="Filename to download. Defaults to the value provided for `util_name`.", - ) - return parser - - -def select(argv): - # Parse positional arguments to the script. - args = _create_bootstrap_binary_arg_parser().parse_args(argv[1:]) - # Resolve bootstrap options with a fake empty command line. - options_bootstrapper = OptionsBootstrapper.create( - env=os.environ, args=[argv[0]], allow_pantsrc=True - ) - subsystems = (GlobalOptions, BinaryUtil.Factory) - known_scope_infos = reduce(set.union, (ss.known_scope_infos() for ss in subsystems), set()) - options = options_bootstrapper.get_full_options(known_scope_infos) - # Initialize Subsystems. - Subsystem.set_options(options) - - # If the filename provided ends in a known archive extension (such as ".tar.gz"), then we get the - # appropriate Archiver to pass to BinaryUtil. - archiver_for_current_binary = None - filename = args.filename or args.util_name - try: - archiver_for_current_binary = archiver_for_path(filename) - # BinaryRequest requires the `name` field to be provided without an extension, as it appends the - # archiver's extension if one is provided, so we have to remove it here. - filename = filename[: -(len(archiver_for_current_binary.extension) + 1)] - except ValueError: - pass - - binary_util = BinaryUtil.Factory.create() - binary_request = BinaryRequest( - supportdir="bin/{}".format(args.util_name), - version=args.version, - name=filename, - platform_dependent=True, - external_url_generator=None, - archiver=archiver_for_current_binary, - ) - - return binary_util.select(binary_request) - - -if __name__ == "__main__": - print(select(sys.argv)) - - -@rule -def provide_binary_util() -> BinaryUtil: - return BinaryUtil.Factory.create() - - -def rules(): - return collect_rules() diff --git a/src/python/pants/net/BUILD b/src/python/pants/net/BUILD deleted file mode 100644 index e255db96c8d..00000000000 --- a/src/python/pants/net/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -python_library() diff --git a/src/python/pants/net/__init__.py b/src/python/pants/net/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/python/pants/net/http/BUILD b/src/python/pants/net/http/BUILD deleted file mode 100644 index e255db96c8d..00000000000 --- a/src/python/pants/net/http/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -python_library() diff --git a/src/python/pants/net/http/__init__.py b/src/python/pants/net/http/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/python/pants/net/http/fetcher.py b/src/python/pants/net/http/fetcher.py deleted file mode 100644 index 5821a95194c..00000000000 --- a/src/python/pants/net/http/fetcher.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -import hashlib -import os -import re -import sys -import tempfile -import time -from abc import ABC, abstractmethod -from contextlib import closing, contextmanager - -import requests - -from pants.util.dirutil import safe_open -from pants.util.strutil import strip_prefix - - -class Fetcher: - """A streaming URL fetcher that supports listeners.""" - - class Error(Exception): - """Indicates an error fetching an URL.""" - - class TransientError(Error): - """Indicates a fetch error for an operation that may reasonably be retried. - - For example a connection error or fetch timeout are both considered transient. - """ - - class PermanentError(Error): - """Indicates a fetch error that is likely permanent. - - Retrying operations that raise these errors is unlikely to succeed. For example, an HTTP - 404 response code is considered a permanent error. - """ - - def __init__(self, value=None, response_code=None): - super(Fetcher.PermanentError, self).__init__(value) - if response_code and not isinstance(response_code, int): - raise ValueError("response_code must be an integer, got {}".format(response_code)) - self._response_code = response_code - - @property - def response_code(self): - """The HTTP response code of the failed request. - - May be None it the request failed before receiving a server response. - """ - return self._response_code - - class Listener: - """A listener callback interface for HTTP GET requests made by a Fetcher.""" - - def status(self, code, content_length=None): - """Called when the response headers are received before data starts streaming. - - :param int code: the HTTP response code - :param int content_length: the response Content-Length if known, otherwise None - """ - - def recv_chunk(self, data): - """Called as each chunk of data is received from the streaming response. - - :param data: a byte string containing the next chunk of response data - """ - - def finished(self): - """Called when the response has been fully read.""" - - def wrap(self, listener=None): - """Returns a Listener that wraps both the given listener and this listener, calling each - in turn for each callback method.""" - if not listener: - return self - - class Wrapper(Fetcher.Listener): - def status(wrapper, code, content_length=None): - listener.status(code, content_length=content_length) - self.status(code, content_length=content_length) - - def recv_chunk(wrapper, data): - listener.recv_chunk(data) - self.recv_chunk(data) - - def finished(wrapper): - listener.finished() - self.finished() - - return Wrapper() - - class DownloadListener(Listener): - """A Listener that writes all received data to a file like object.""" - - def __init__(self, fh): - """Creates a DownloadListener that writes to the given open file handle. - - The file handle is not closed. - - :param fh: a file handle open for writing - """ - if not fh or not hasattr(fh, "write"): - raise ValueError("fh must be an open file handle, given {}".format(fh)) - self._fh = fh - - def recv_chunk(self, data): - self._fh.write(data) - - class ChecksumListener(Listener): - """A Listener that checksums the data received.""" - - def __init__(self, digest=None): - """Creates a ChecksumListener with the given hashlib digest or else an MD5 digest if - none is supplied. - - :param digest: the digest to use to checksum the received data, MDS by default - """ - self.digest = digest or hashlib.md5() - self._checksum = None - - def recv_chunk(self, data): - self.digest.update(data) - - def finished(self): - self._checksum = self.digest.hexdigest() - - @property - def checksum(self): - """Returns the hex digest of the received data. - - Its not valid to access this property before the listener is finished. - - :rtype: string - :raises: ValueError if accessed before this listener is finished - """ - if self._checksum is None: - raise ValueError( - "The checksum cannot be accessed before this listener is finished." - ) - return self._checksum - - class ProgressListener(Listener): - """A Listener that logs progress to a stream.""" - - def __init__(self, width=None, chunk_size_bytes=None, stream=None): - """Creates a ProgressListener that logs progress for known size items with a progress - bar of the given width in characters and otherwise logs a progress indicator every - chunk_size. - - :param int width: the width of the progress bar for known size downloads, 50 by default. - :param chunk_size_bytes: The size of data chunks to note progress for, 10 KB by default. - :param stream: A stream to write progress information to; `sys.stderr` by default. - :type stream: :class:`io.RawIOBase` - """ - self._width = width or 50 - if not isinstance(self._width, int): - raise ValueError("The width must be an integer, given {}".format(self._width)) - self._chunk_size_bytes = chunk_size_bytes or 10 * 1024 - self._stream = stream or sys.stderr.buffer - self._start = time.time() - - def status(self, code, content_length=None): - self.size = content_length - - if content_length: - self.download_size = int(content_length / 1024) - self.chunk_size = content_length / self._width - else: - self.chunk_size = self._chunk_size_bytes - - self.chunks = 0 - self.read = 0 - - def recv_chunk(self, data): - self.read += len(data) - chunk_count = int(self.read / self.chunk_size) - if chunk_count > self.chunks: - self.chunks = chunk_count - if self.size: - self._stream.write(b"\r") - self._stream.write(f"{int(self.read * 1.0 / self.size * 100):3}% ".encode()) - self._stream.write(b"." * self.chunks) - if self.size: - size_width = len(str(self.download_size)) - downloaded = int(self.read / 1024) - self._stream.write( - f"{' ' * (self._width - self.chunks)} {str(downloaded).rjust(size_width)} KB".encode() - ) - self._stream.flush() - - def finished(self): - if self.chunks > 0: - self._stream.write(f" {time.time() - self._start:.3f}s\n".encode()) - self._stream.flush() - - def __init__(self, root_dir, requests_api=None): - """Creates a Fetcher that uses the given requests api object. - - By default uses the requests module, but can be any object conforming to the requests api like - a requests Session object. - - :param root_dir: The root directory to find relative local `file://` url paths against. - :param requests_api: An optional requests api-like object. - """ - self._root_dir = root_dir - self._requests = requests_api or requests - - class _Response(ABC): - """Abstracts a fetch response.""" - - @property - @abstractmethod - def status_code(self): - """The HTTP status code for the fetch. - - :rtype: int - """ - - @property - @abstractmethod - def size(self): - """The size of the fetched file in bytes if known; otherwise, `None`. - - :rtype: int - :raises :class:`Fetcher.Error` if there is a problem determining the file size. - """ - - @abstractmethod - def iter_content(self, chunk_size_bytes): - """Return an iterator over the content of the fetched file's bytes. - - :rtype: :class:`collections.Iterator` over byte chunks. - :raises :class:`Fetcher.Error` if there is a problem determining the file size. - """ - - @abstractmethod - def close(self): - """Close the underlying fetched file stream.""" - - class _RequestsResponse(_Response): - _TRANSIENT_EXCEPTION_TYPES = (requests.ConnectionError, requests.Timeout) - - @classmethod - def as_fetcher_error(cls, url, e): - exception_factory = ( - Fetcher.TransientError - if isinstance(e, cls._TRANSIENT_EXCEPTION_TYPES) - else Fetcher.PermanentError - ) - return exception_factory("Problem GETing data from {}: {}".format(url, e)) - - def __init__(self, url, resp): - self._url = url - self._resp = resp - - @property - def headers(self): - return self._resp.headers - - @property - def status_code(self): - return self._resp.status_code - - @property - def size(self): - size = self._resp.headers.get("content-length") - return int(size) if size else None - - def iter_content(self, chunk_size_bytes): - try: - return self._resp.iter_content(chunk_size=chunk_size_bytes) - except requests.RequestException as e: - raise self.as_fetcher_error(self._url, e) - - def close(self): - self._resp.close() - - class _LocalFileResponse(_Response): - def __init__(self, fp): - self._fp = fp - - @property - def status_code(self): - return requests.codes.ok - - @property - def size(self): - try: - stat = os.fstat(self._fp.fileno()) - return stat.st_size - except OSError as e: - raise Fetcher.PermanentError( - "Problem stating {} for its size: {}".format(self._fp.name, e) - ) - - def iter_content(self, chunk_size_bytes): - while True: - try: - data = self._fp.read(chunk_size_bytes) - except IOError as e: - raise Fetcher.PermanentError( - "Problem reading chunk from {}: {}".format(self._fp.name, e) - ) - if not data: - break - yield data - - def close(self): - self._fp.close() - - def _as_local_file_path(self, url): - path = re.sub(r"^//", "", strip_prefix(url, "file:")) - if path.startswith("/"): - return path - elif url.startswith("file:"): - return os.path.join(self._root_dir, path) - else: - return None - - def _fetch(self, url, timeout_secs=None): - path = self._as_local_file_path(url) - if path: - try: - fp = open(path, "rb") - return self._LocalFileResponse(fp) - except IOError as e: - raise self.PermanentError("Problem reading data from {}: {}".format(path, e)) - else: - try: - resp = self._requests.get( - url, stream=True, timeout=timeout_secs, allow_redirects=True - ) - return self._RequestsResponse(url, resp) - except requests.RequestException as e: - raise self._RequestsResponse.as_fetcher_error(url, e) - - def fetch(self, url, listener, chunk_size_bytes=None, timeout_secs=None): - """Fetches data from the given URL notifying listener of all lifecycle events. - - :param string url: the url to GET data from - :param listener: the listener to notify of all download lifecycle events - :param chunk_size_bytes: the chunk size to use for buffering data, 10 KB by default - :param timeout_secs: the maximum time to wait for data to be available, 1 second by default - :raises: Fetcher.Error if there was a problem fetching all data from the given url - """ - if not isinstance(listener, self.Listener): - raise ValueError("listener must be a Listener instance, given {}".format(listener)) - - chunk_size_bytes = chunk_size_bytes or 10 * 1024 - timeout_secs = timeout_secs or 1.0 - - with closing(self._fetch(url, timeout_secs=timeout_secs)) as resp: - if resp.status_code != requests.codes.ok: - listener.status(resp.status_code) - raise self.PermanentError( - "Fetch of {} failed with status code {}".format(url, resp.status_code), - response_code=resp.status_code, - ) - listener.status(resp.status_code, content_length=resp.size) - - content_encoding = ( - resp.headers.get("content-encoding") if hasattr(resp, "headers") else None - ) - compressed_transfer = content_encoding == "gzip" or content_encoding == "deflate" - - read_bytes = 0 - for data in resp.iter_content(chunk_size_bytes=chunk_size_bytes): - listener.recv_chunk(data) - read_bytes += len(data) - - if resp.size and read_bytes != resp.size and not compressed_transfer: - raise self.Error("Expected {} bytes, read {}".format(resp.size, read_bytes)) - - listener.finished() - - def download( - self, url, listener=None, path_or_fd=None, chunk_size_bytes=None, timeout_secs=None - ): - """Downloads data from the given URL. - - By default data is downloaded to a temporary file. - - :param string url: the url to GET data from - :param listener: an optional listener to notify of all download lifecycle events - :param path_or_fd: an optional file path or open file descriptor to write data to - :param chunk_size_bytes: the chunk size to use for buffering data - :param timeout_secs: the maximum time to wait for data to be available - :returns: the path to the file data was downloaded to. - :raises: Fetcher.Error if there was a problem downloading all data from the given url. - """ - - @contextmanager - def download_fp(_path_or_fd): - if _path_or_fd and not isinstance(_path_or_fd, str): - yield _path_or_fd, _path_or_fd.name - else: - if not _path_or_fd: - fd, _path_or_fd = tempfile.mkstemp() - os.close(fd) - with safe_open(_path_or_fd, "wb") as fp: - yield fp, _path_or_fd - - with download_fp(path_or_fd) as (fp, path): - listener = self.DownloadListener(fp).wrap(listener) - self.fetch(url, listener, chunk_size_bytes=chunk_size_bytes, timeout_secs=timeout_secs) - return path diff --git a/src/python/pants/option/global_options.py b/src/python/pants/option/global_options.py index 9b5937671fd..21b37a1abeb 100644 --- a/src/python/pants/option/global_options.py +++ b/src/python/pants/option/global_options.py @@ -575,8 +575,6 @@ def register_bootstrap_options(cls, register): ) # BinaryUtil options. - # TODO: Nuke these once we get rid of src/python/pants/binaries/binary_util.py - # (see there for what that will take). register( "--binaries-baseurls", type=list, @@ -584,6 +582,8 @@ def register_bootstrap_options(cls, register): default=["https://binaries.pantsbuild.org"], help="List of URLs from which binary tools are downloaded. URLs are " "searched in order until the requested path is found.", + removal_version="2.1.0.dev0", + removal_hint="This option has no effect", ) register( "--binaries-fetch-timeout-secs", @@ -592,6 +592,8 @@ def register_bootstrap_options(cls, register): advanced=True, help="Timeout in seconds for URL reads when fetching binary tools from the " "repos specified by --baseurls.", + removal_version="2.1.0.dev0", + removal_hint="This option has no effect", ) register( "--binaries-path-by-id", @@ -602,6 +604,8 @@ def register_bootstrap_options(cls, register): "(sysname, id) -> (os, arch), e.g. {('darwin', '15'): ('mac', '10.11'), " "('linux', 'arm32'): ('linux', 'arm32')}." ), + removal_version="2.1.0.dev0", + removal_hint="This option has no effect", ) register( "--allow-external-binary-tool-downloads", @@ -612,6 +616,8 @@ def register_bootstrap_options(cls, register): "generated from --binaries-baseurls, even if the tool has an external url " "generator. This can be necessary if using Pants in an environment which cannot " "contact the wider Internet.", + removal_version="2.1.0.dev0", + removal_hint="This option has no effect", ) # Pants Daemon options. diff --git a/src/python/pants/util/osutil.py b/src/python/pants/util/osutil.py index 6f8b7992be6..a1ea5d8470f 100644 --- a/src/python/pants/util/osutil.py +++ b/src/python/pants/util/osutil.py @@ -6,7 +6,7 @@ import os import posix from functools import reduce -from typing import Dict, List, Optional, Set, Tuple +from typing import Optional, Set logger = logging.getLogger(__name__) @@ -48,10 +48,6 @@ def get_normalized_os_name() -> str: return normalize_os_name(get_os_name()) -def all_normalized_os_names() -> List[str]: - return list(OS_ALIASES.keys()) - - def known_os_names() -> Set[str]: return reduce(set.union, OS_ALIASES.values()) @@ -78,42 +74,3 @@ def safe_kill(pid: Pid, signum: int) -> None: raise ValueError(f"Invalid signal number {signum}: {e}", e) else: raise - - -# TODO: use this as the default value for the global --binaries-path-by-id option! -# pantsd testing fails saying no run trackers were created when I tried to do this. -SUPPORTED_PLATFORM_NORMALIZED_NAMES = { - ("linux", "x86_64"): ("linux", "x86_64"), - ("linux", "amd64"): ("linux", "x86_64"), - ("linux", "i386"): ("linux", "i386"), - ("linux", "i686"): ("linux", "i386"), - ("darwin", "9"): ("mac", "10.5"), - ("darwin", "10"): ("mac", "10.6"), - ("darwin", "11"): ("mac", "10.7"), - ("darwin", "12"): ("mac", "10.8"), - ("darwin", "13"): ("mac", "10.9"), - ("darwin", "14"): ("mac", "10.10"), - ("darwin", "15"): ("mac", "10.11"), - ("darwin", "16"): ("mac", "10.12"), - ("darwin", "17"): ("mac", "10.13"), -} - - -def get_closest_mac_host_platform_pair( - darwin_version_upper_bound: Optional[str] = None, - platform_name_map: Dict[Tuple[str, str], Tuple[str, str]] = SUPPORTED_PLATFORM_NORMALIZED_NAMES, -) -> Tuple[Optional[str], Optional[str]]: - """Return the (host, platform) pair for the highest known darwin version less than the bound.""" - darwin_versions = [int(x[1]) for x in platform_name_map if x[0] == "darwin"] - - if darwin_version_upper_bound is not None: - bounded_darwin_versions = [ - v for v in darwin_versions if v <= int(darwin_version_upper_bound) - ] - else: - bounded_darwin_versions = darwin_versions - - if not bounded_darwin_versions: - return None, None - max_darwin_version = str(max(bounded_darwin_versions)) - return platform_name_map[("darwin", max_darwin_version)] diff --git a/src/python/pants/util/osutil_test.py b/src/python/pants/util/osutil_test.py index b611da35d1e..8741cf087c6 100644 --- a/src/python/pants/util/osutil_test.py +++ b/src/python/pants/util/osutil_test.py @@ -1,14 +1,7 @@ # Copyright 2014 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). -from typing import Optional - -from pants.util.osutil import ( - OS_ALIASES, - get_closest_mac_host_platform_pair, - known_os_names, - normalize_os_name, -) +from pants.util.osutil import OS_ALIASES, known_os_names, normalize_os_name def test_alias_normalization() -> None: @@ -33,40 +26,3 @@ def test_warnings_on_unknown_names(caplog) -> None: normalize_os_name(name) assert len(caplog.records) == 1 assert "Unknown operating system name" in caplog.text - - -def test_get_closest_mac_host_platform_pair() -> None: - # Note the gaps in darwin versions. - platform_name_map = { - ("linux", "x86_64"): ("linux", "x86_64"), - ("linux", "amd64"): ("linux", "x86_64"), - ("darwin", "10"): ("mac", "10.6"), - ("darwin", "13"): ("mac", "10.9"), - ("darwin", "14"): ("mac", "10.10"), - ("darwin", "16"): ("mac", "10.12"), - ("darwin", "17"): ("mac", "10.13"), - } - - def get_macos_version(darwin_version: Optional[str]) -> Optional[str]: - host, version = get_closest_mac_host_platform_pair( - darwin_version, platform_name_map=platform_name_map - ) - if host is not None: - assert "mac" == host - return version - - assert "10.13" == get_macos_version("19") - assert "10.13" == get_macos_version("18") - assert "10.13" == get_macos_version("17") - assert "10.12" == get_macos_version("16") - assert "10.10" == get_macos_version("15") - assert "10.10" == get_macos_version("14") - assert "10.9" == get_macos_version("13") - assert "10.6" == get_macos_version("12") - assert "10.6" == get_macos_version("11") - assert "10.6" == get_macos_version("10") - assert get_macos_version("9") is None - - # When a version bound of `None` is provided, it should select the most recent OSX platform - # available. - assert "10.13" == get_macos_version(None) diff --git a/tests/python/pants_test/net/BUILD b/tests/python/pants_test/net/BUILD deleted file mode 100644 index e255db96c8d..00000000000 --- a/tests/python/pants_test/net/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -python_library() diff --git a/tests/python/pants_test/net/__init__.py b/tests/python/pants_test/net/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/python/pants_test/net/http/BUILD b/tests/python/pants_test/net/http/BUILD deleted file mode 100644 index 7688de5db37..00000000000 --- a/tests/python/pants_test/net/http/BUILD +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -python_tests(name='tests') - -python_library() diff --git a/tests/python/pants_test/net/http/__init__.py b/tests/python/pants_test/net/http/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/python/pants_test/net/http/test_fetcher.py b/tests/python/pants_test/net/http/test_fetcher.py deleted file mode 100644 index 37197391eb0..00000000000 --- a/tests/python/pants_test/net/http/test_fetcher.py +++ /dev/null @@ -1,373 +0,0 @@ -# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -import hashlib -import http.server -import os -import socketserver -import unittest -import unittest.mock -from contextlib import closing, contextmanager -from functools import reduce -from io import BytesIO -from threading import Thread - -import requests - -from pants.net.http.fetcher import Fetcher -from pants.util.contextutil import temporary_dir, temporary_file -from pants.util.dirutil import safe_open, touch - - -class FetcherTest(unittest.TestCase): - def setUp(self): - self.requests = unittest.mock.Mock(spec=requests.Session) - self.response = unittest.mock.Mock(spec=requests.Response) - self.fetcher = Fetcher("/unused/root/dir", requests_api=self.requests) - self.listener = unittest.mock.create_autospec(Fetcher.Listener, spec_set=True) - - def status_call(self, status_code, content_length=None): - return unittest.mock.call.status(status_code, content_length=content_length) - - def ok_call(self, chunks): - return self.status_call(200, content_length=sum(len(c) for c in chunks)) - - def assert_listener_calls(self, expected_listener_calls, chunks, expect_finished=True): - expected_listener_calls.extend(unittest.mock.call.recv_chunk(chunk) for chunk in chunks) - if expect_finished: - expected_listener_calls.append(unittest.mock.call.finished()) - self.assertEqual(expected_listener_calls, self.listener.method_calls) - - def assert_local_file_fetch(self, url_prefix=""): - chunks = [b"0123456789", b"a"] - with temporary_file() as fp: - for chunk in chunks: - fp.write(chunk) - fp.close() - - self.fetcher.fetch(url_prefix + fp.name, self.listener, chunk_size_bytes=10) - - self.assert_listener_calls([self.ok_call(chunks)], chunks) - self.requests.assert_not_called() - - def test_file_path(self): - self.assert_local_file_fetch() - - def test_file_scheme(self): - self.assert_local_file_fetch("file:") - - def assert_local_file_fetch_relative(self, url, *rel_path): - expected_contents = b"proof" - with temporary_dir() as root_dir: - with safe_open(os.path.join(root_dir, *rel_path), "wb") as fp: - fp.write(expected_contents) - with temporary_file() as download_fp: - Fetcher(root_dir).download(url, path_or_fd=download_fp) - download_fp.close() - with open(download_fp.name, "rb") as fp: - self.assertEqual(expected_contents, fp.read()) - - def test_file_scheme_double_slash_relative(self): - self.assert_local_file_fetch_relative("file://relative/path", "relative", "path") - - def test_file_scheme_embedded_double_slash(self): - self.assert_local_file_fetch_relative("file://a//strange//path", "a", "strange", "path") - - def test_file_scheme_triple_slash(self): - self.assert_local_file_fetch("file://") - - def test_file_dne(self): - with temporary_dir() as base: - with self.assertRaises(self.fetcher.PermanentError): - self.fetcher.fetch(os.path.join(base, "dne"), self.listener) - - def test_file_no_perms(self): - with temporary_dir() as base: - no_perms = os.path.join(base, "dne") - touch(no_perms) - os.chmod(no_perms, 0) - self.assertTrue(os.path.exists(no_perms)) - with self.assertRaises(self.fetcher.PermanentError): - self.fetcher.fetch(no_perms, self.listener) - - @contextmanager - def expect_get(self, url, chunk_size_bytes, timeout_secs, chunks=None, listener=True): - chunks = chunks or [b"0123456789", b"a"] - size = sum(len(c) for c in chunks) - - self.requests.get.return_value = self.response - self.response.status_code = 200 - self.response.headers = {"content-length": str(size)} - self.response.iter_content.return_value = chunks - - yield chunks, [self.ok_call(chunks)] if listener else [] - - self.requests.get.expect_called_once_with( - url, allow_redirects=True, stream=True, timeout=timeout_secs - ) - self.response.iter_content.expect_called_once_with(chunk_size=chunk_size_bytes) - - def test_get(self): - with self.expect_get("http://bar", chunk_size_bytes=1024, timeout_secs=60) as ( - chunks, - expected_listener_calls, - ): - - self.fetcher.fetch("http://bar", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.assert_listener_calls(expected_listener_calls, chunks) - self.response.close.expect_called_once_with() - - def test_checksum_listener(self): - digest = unittest.mock.Mock(spec=hashlib.md5()) - digest.hexdigest.return_value = "42" - checksum_listener = Fetcher.ChecksumListener(digest=digest) - - with self.expect_get("http://baz", chunk_size_bytes=1, timeout_secs=37) as ( - chunks, - expected_listener_calls, - ): - - self.fetcher.fetch( - "http://baz", - checksum_listener.wrap(self.listener), - chunk_size_bytes=1, - timeout_secs=37, - ) - - self.assertEqual("42", checksum_listener.checksum) - - def expected_digest_calls(): - for chunk in chunks: - yield unittest.mock.call.update(chunk) - yield unittest.mock.call.hexdigest() - - self.assertEqual(list(expected_digest_calls()), digest.method_calls) - - self.assert_listener_calls(expected_listener_calls, chunks) - self.response.close.assert_called_once_with() - - def concat_chunks(self, chunks): - return reduce(lambda acc, c: acc + c, chunks, b"") - - def test_download_listener(self): - with self.expect_get("http://foo", chunk_size_bytes=1048576, timeout_secs=3600) as ( - chunks, - expected_listener_calls, - ): - - with closing(BytesIO()) as fp: - self.fetcher.fetch( - "http://foo", - Fetcher.DownloadListener(fp).wrap(self.listener), - chunk_size_bytes=1024 * 1024, - timeout_secs=60 * 60, - ) - - downloaded = self.concat_chunks(chunks) - self.assertEqual(downloaded, fp.getvalue()) - - self.assert_listener_calls(expected_listener_calls, chunks) - self.response.close.assert_called_once_with() - - def test_size_mismatch(self): - self.requests.get.return_value = self.response - self.response.status_code = 200 - self.response.headers = {"content-length": "11"} - chunks = ["a", "b"] - self.response.iter_content.return_value = chunks - - with self.assertRaises(self.fetcher.Error): - self.fetcher.fetch("http://foo", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.requests.get.assert_called_once_with( - "http://foo", allow_redirects=True, stream=True, timeout=60 - ) - self.response.iter_content.assert_called_once_with(chunk_size=1024) - self.assert_listener_calls( - [self.status_call(200, content_length=11)], chunks, expect_finished=False - ) - self.response.close.assert_called_once_with() - - def test_get_error_transient(self): - self.requests.get.side_effect = requests.ConnectionError - - with self.assertRaises(self.fetcher.TransientError): - self.fetcher.fetch("http://foo", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.requests.get.assert_called_once_with( - "http://foo", allow_redirects=True, stream=True, timeout=60 - ) - - def test_get_error_permanent(self): - self.requests.get.side_effect = requests.TooManyRedirects - - with self.assertRaises(self.fetcher.PermanentError) as e: - self.fetcher.fetch("http://foo", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.assertTrue(e.exception.response_code is None) - self.requests.get.assert_called_once_with( - "http://foo", allow_redirects=True, stream=True, timeout=60 - ) - - def test_http_error(self): - self.requests.get.return_value = self.response - self.response.status_code = 404 - - with self.assertRaises(self.fetcher.PermanentError) as e: - self.fetcher.fetch("http://foo", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.assertEqual(404, e.exception.response_code) - self.requests.get.expect_called_once_with( - "http://foo", allow_redirects=True, stream=True, timeout=60 - ) - self.listener.status.expect_called_once_with(404) - self.response.close.expect_called_once_with() - - def test_iter_content_error(self): - self.requests.get.return_value = self.response - self.response.status_code = 200 - self.response.headers = {} - self.response.iter_content.side_effect = requests.Timeout - - with self.assertRaises(self.fetcher.TransientError): - self.fetcher.fetch("http://foo", self.listener, chunk_size_bytes=1024, timeout_secs=60) - - self.requests.get.expect_called_once_with( - "http://foo", allow_redirects=True, stream=True, timeout=60 - ) - self.response.iter_content.expect_called_once_with(chunk_size=1024) - self.listener.status.expect_called_once_with(200, content_length=None) - self.response.close.expect_called_once_with() - - def expect_download(self, path_or_fd=None): - with self.expect_get("http://1", chunk_size_bytes=13, timeout_secs=13, listener=False) as ( - chunks, - expected_listener_calls, - ): - - path = self.fetcher.download( - "http://1", path_or_fd=path_or_fd, chunk_size_bytes=13, timeout_secs=13 - ) - - self.response.close.expect_called_once_with() - downloaded = self.concat_chunks(chunks) - return downloaded, path - - def test_download(self): - downloaded, path = self.expect_download() - try: - with open(path, "rb") as fp: - self.assertEqual(downloaded, fp.read()) - finally: - os.unlink(path) - - def test_download_fd(self): - with temporary_file() as fd: - downloaded, path = self.expect_download(path_or_fd=fd) - self.assertEqual(path, fd.name) - fd.close() - with open(path, "rb") as fp: - self.assertEqual(downloaded, fp.read()) - - def test_download_path(self): - with temporary_file() as fd: - fd.close() - downloaded, path = self.expect_download(path_or_fd=fd.name) - self.assertEqual(path, fd.name) - with open(path, "rb") as fp: - self.assertEqual(downloaded, fp.read()) - - @unittest.mock.patch("time.time") - def test_progress_listener(self, timer): - timer.side_effect = [0, 1.137] - - stream = BytesIO() - progress_listener = Fetcher.ProgressListener(width=5, chunk_size_bytes=1, stream=stream) - - with self.expect_get( - "http://baz", chunk_size_bytes=1, timeout_secs=37, chunks=[[1]] * 1024 - ) as (chunks, expected_listener_calls): - - self.fetcher.fetch( - "http://baz", - progress_listener.wrap(self.listener), - chunk_size_bytes=1, - timeout_secs=37, - ) - - self.assert_listener_calls(expected_listener_calls, chunks) - - # We just test the last progress line which should indicate a 100% complete download. - # We control progress bar width (5 dots), size (1KB) and total time downloading (fake 1.137s). - self.assertEqual("100% ..... 1 KB 1.137s\n", stream.getvalue().decode().split("\r")[-1]) - - -class FetcherRedirectTest(unittest.TestCase): - # NB(Eric Ayers): Using class variables like this seems horrible, but I can't figure out a better - # to pass state between the test and the RedirectHTTPHandler class because it gets - # re-instantiated on every request. - _URL = None - _URL2_ACCESSED = False - _URL1_ACCESSED = False - - # A trivial HTTP server that serves up a redirect from /url2 --> /url1 and some hard-coded - # responses in the HTTP message body. - class RedirectHTTPHandler(http.server.BaseHTTPRequestHandler): - def __init__(self, request, client_address, server): - # The base class implements GET and HEAD. - # Old-style class, so we must invoke __init__ this way. - http.server.BaseHTTPRequestHandler.__init__(self, request, client_address, server) - - def do_GET(self): - if self.path.endswith("url2"): - self.send_response(302) - redirect_url = f"{FetcherRedirectTest._URL}/url1" - self.send_header("Location", redirect_url) - self.end_headers() - self.wfile.write(f"redirecting you to {redirect_url}".encode()) - FetcherRedirectTest._URL2_ACCESSED = True - elif self.path.endswith("url1"): - self.send_response(200) - self.end_headers() - self.wfile.write(b"returned from redirect") - FetcherRedirectTest._URL1_ACCESSED = True - else: - self.send_response(404) - self.end_headers() - - @contextmanager - def setup_server(self): - httpd = None - httpd_thread = None - try: - handler = self.RedirectHTTPHandler - httpd = socketserver.TCPServer(("localhost", 0), handler) - port = httpd.server_address[1] - httpd_thread = Thread(target=httpd.serve_forever) - httpd_thread.start() - yield f"http://localhost:{port}" - finally: - if httpd: - httpd.shutdown() - if httpd_thread: - httpd_thread.join() - - def test_download_redirect(self): - """Make sure that a server that returns a redirect is actually followed. - - Test with a real HTTP server that redirects from one URL to another. - """ - - fetcher = Fetcher("/unused/root/dir") - with self.setup_server() as base_url: - self._URL = base_url - self.assertFalse(self._URL2_ACCESSED) - self.assertFalse(self._URL1_ACCESSED) - - path = fetcher.download(base_url + "/url2") - self.assertTrue(self._URL2_ACCESSED) - self.assertTrue(self._URL1_ACCESSED) - - with open(path) as fp: - self.assertIn(fp.read(), ["returned from redirect\n", "returned from redirect\r\n"])