diff --git a/continuous_integration/environment.yaml b/continuous_integration/environment.yaml index 5c6a06ebff..f00ee0b8e2 100644 --- a/continuous_integration/environment.yaml +++ b/continuous_integration/environment.yaml @@ -39,6 +39,7 @@ dependencies: - fsspec - pylibtiff - python-geotiepoints + - pooch - pip - pip: - trollsift diff --git a/doc/rtd_environment.yml b/doc/rtd_environment.yml index 95f905d4df..a95a5e497c 100644 --- a/doc/rtd_environment.yml +++ b/doc/rtd_environment.yml @@ -10,6 +10,7 @@ dependencies: - graphviz - numpy - pillow + - pooch - pyresample - setuptools - setuptools_scm diff --git a/doc/source/conf.py b/doc/source/conf.py index 760fbfc117..7d9e849ad8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -276,4 +276,5 @@ def __getattr__(cls, name): 'xarray': ('https://xarray.pydata.org/en/stable', None), 'rasterio': ('https://rasterio.readthedocs.io/en/latest', None), 'donfig': ('https://donfig.readthedocs.io/en/latest', None), + 'pooch': ('https://www.fatiando.org/pooch/latest/', None), } diff --git a/doc/source/config.rst b/doc/source/config.rst index 727b7a3f8b..b578055ace 100644 --- a/doc/source/config.rst +++ b/doc/source/config.rst @@ -133,6 +133,8 @@ merging of configuration files, they are merged in reverse order. This means "base" configuration paths should be at the end of the list and custom/user paths should be at the beginning of the list. +.. _data_dir_setting: + Data Directory ^^^^^^^^^^^^^^ @@ -146,6 +148,21 @@ defaults to a different path depending on your operating system following the `appdirs `_ "user data dir". +.. _download_aux_setting: + +Download Auxiliary Data +^^^^^^^^^^^^^^^^^^^^^^^ + +* **Environment variable**: ``SATPY_DOWNLOAD_AUX`` +* **YAML/Config Key**: ``download_aux`` +* **Default**: True + +Whether to allow downloading of auxiliary files for certain Satpy operations. +See :doc:`dev_guide/aux_data` for more information. If ``True`` then Satpy +will download and cache any necessary data files to :ref:`data_dir_setting` +when needed. If ``False`` then pre-downloaded files will be used, but any +other files will not be downloaded or checked for validity. + .. _component_configuration: Component Configuration diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst new file mode 100644 index 0000000000..5e53aa1703 --- /dev/null +++ b/doc/source/dev_guide/aux_data.rst @@ -0,0 +1,122 @@ +Auxiliary Data Download +======================= + +Sometimes Satpy components need some extra data files to get their work +done properly. These include files like Look Up Tables (LUTs), coefficients, +or Earth model data (ex. elevations). This includes any file that would be too +large to be included in the Satpy python package; anything bigger than a small +text file. To help with this, Satpy includes utilities for downloading and +caching these files only when your component is used. This saves the user from +wasting time and disk space downloading files they may never use. +This functionality is made possible thanks to the +`Pooch library `_. + +Downloaded files are stored in the directory configured by +:ref:`data_dir_setting`. + +Adding download functionality +----------------------------- + +The utility functions for data downloading include a two step process: + +1. **Registering**: Tell Satpy what files might need to be downloaded and used + later. +2. **Retrieving**: Ask Satpy to download and store the files locally. + +Registering +^^^^^^^^^^^ + +Registering a file for downloading tells Satpy the remote URL for the file, +and an optional hash. The hash is used to verify a successful download. +Registering can also include a ``filename`` to tell Satpy what to name the +file when it is downloaded. If not provided it will be determined from the URL. +Once registered, Satpy can be told to retrieve the file (see below) by using a +"cache key". Cache keys follow the general scheme of +``/`` (ex. ``readers/README.rst``). + +Satpy includes a low-level function and a high-level Mixin class for +registering files. The higher level class is recommended for any Satpy +component like readers, writers, and compositors. The lower-level +:func:`~satpy.aux_download.register_file` function can be used for any other +use case. + +The :class:`~satpy.aux_download.DataMixIn` class is automatically included +in the :class:`~satpy.readers.yaml_reader.FileYAMLReader` and +:class:`~satpy.writers.Writer` base classes. For any other component (like +a compositor) you should include it as another parent class: + +.. code-block:: python + + from satpy.aux_download import DataDownloadMixin + from satpy.composites import GenericCompositor + + class MyCompositor(GenericCompositor, DataDownloadMixin): + """Compositor that uses downloaded files.""" + + def __init__(self, name, url=None, known_hash=None, **kwargs): + super().__init__(name, **kwargs) + data_files = [{'url': url, 'known_hash': known_hash}] + self.register_data_files(data_files) + +However your code registers files, to be consistent it must do it during +initialization so that the :func:`~satpy.aux_download.find_registerable_files`. +If your component isn't a reader, writer, or compositor then this function +will need to be updated to find and load your registered files. See +:ref:`offline_aux_downloads` below for more information. + +As mentioned, the mixin class is included in the base reader and writer class. +To register files in these cases, include a ``data_files`` section in your +YAML configuration file. For readers this would go under the ``reader`` +section and for writers the ``writer`` section. This parameter is a list +of dictionaries including a ``url``, ``known_hash``, and optional +``filename``. For example:: + + reader: + name: abi_l1b + short_name: ABI L1b + long_name: GOES-R ABI Level 1b + ... other metadata ... + data_files: + - url: "https://example.com/my_data_file.dat" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/RELEASING.md" + filename: "satpy_releasing.md" + known_hash: null + +See the :class:`~satpy.aux_download.DataDownloadMixin` for more information. + +Retrieving +^^^^^^^^^^ + +Files that have been registered (see above) can be retrieved by calling the +:func:`~satpy.aux_download.retrieve` function. This function expects a single +argument: the cache key. Cache keys are returned by registering functions, but +can also be pre-determined by following the scheme +``/`` (ex. ``readers/README.rst``). +Retrieving a file will download it to local disk if needed and then return +the local pathname. Data is stored locally in the :ref:`data_dir_setting`. +It is up to the caller to then open the file. + +.. _offline_aux_downloads: + +Offline Downloads +----------------- + +To assist with operational environments, Satpy includes a +:func:`~satpy.aux_download.retrieve_all` function that will try to find all +files that Satpy components may need to download in the future and download +them to the current directory specified by :ref:`data_dir_setting`. +This function allows you to specify a list of ``readers``, ``writers``, or +``composite_sensors`` to limit what components are checked for files to +download. + +The ``retrieve_all`` function is also available through a command line script +called ``satpy_retrieve_all_aux_data``. Run the following for usage information. + +.. code-block:: bash + + satpy_retrieve_all_aux_data --help + +To make sure that no additional files are downloaded when running Satpy see +:ref:`download_aux_setting`. diff --git a/doc/source/dev_guide/custom_reader.rst b/doc/source/dev_guide/custom_reader.rst index 5eb4f68831..86c7be99aa 100644 --- a/doc/source/dev_guide/custom_reader.rst +++ b/doc/source/dev_guide/custom_reader.rst @@ -571,4 +571,12 @@ One way of implementing a file handler is shown below: # left as an exercise to the reader :) If you have any questions, please contact the -:ref:`Satpy developers `. \ No newline at end of file +:ref:`Satpy developers `. + +Auxiliary File Download +----------------------- + +If your reader needs additional data files to do calibrations, corrections, +or anything else see the :doc:`aux_data` document for more information on +how to download and cache these files without including them in the Satpy +python package. \ No newline at end of file diff --git a/doc/source/dev_guide/index.rst b/doc/source/dev_guide/index.rst index f32c320ce1..461a9adb65 100644 --- a/doc/source/dev_guide/index.rst +++ b/doc/source/dev_guide/index.rst @@ -16,6 +16,7 @@ at the pages listed below. custom_reader plugins satpy_internals + aux_data Coding guidelines ================= diff --git a/satpy/_config.py b/satpy/_config.py index 8365b70219..d7fa175b61 100644 --- a/satpy/_config.py +++ b/satpy/_config.py @@ -38,6 +38,7 @@ 'cache_dir': _satpy_dirs.user_cache_dir, 'data_dir': _satpy_dirs.user_data_dir, 'config_path': [], + 'download_aux': True, } # Satpy main configuration object @@ -125,13 +126,14 @@ def config_search_paths(filename, search_dirs=None, **kwargs): return paths[::-1] -def glob_config(pattern): +def glob_config(pattern, search_dirs=None): """Return glob results for all possible configuration locations. Note: This method does not check the configuration "base" directory if the pattern includes a subdirectory. This is done for performance since this is usually used to find *all* configs for a certain component. """ - patterns = config_search_paths(pattern, check_exists=False) + patterns = config_search_paths(pattern, search_dirs=search_dirs, + check_exists=False) for pattern_fn in patterns: for path in glob.iglob(pattern_fn): yield path diff --git a/satpy/aux_download.py b/satpy/aux_download.py new file mode 100644 index 0000000000..1d973254f7 --- /dev/null +++ b/satpy/aux_download.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2021 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Functions and utilities for downloading ancillary data.""" + +import os +import logging +import satpy + +import pooch + +logger = logging.getLogger(__name__) + +_FILE_REGISTRY = {} +_FILE_URLS = {} + + +def register_file(url, filename, component_type=None, known_hash=None): + """Register file for future retrieval. + + This function only prepares Satpy to be able to download and cache the + provided file. It will not download the file. See + :func:`satpy.aux_download.retrieve` for more information. + + Args: + url (str): URL where remote file can be downloaded. + filename (str): Filename used to identify and store the downloaded + file as. + component_type (str or None): Name of the type of Satpy component that + will use this file. Typically "readers", "composites", "writers", + or "enhancements" for consistency. This will be prepended to the + filename when storing the data in the cache. + known_hash (str): Hash used to verify the file is downloaded correctly. + See https://www.fatiando.org/pooch/v1.3.0/beginner.html#hashes + for more information. If not provided then the file is not checked. + + Returns: + Cache key that can be used to retrieve the file later. The cache key + consists of the ``component_type`` and provided ``filename``. This + should be passed to :func:`satpy.aux_download_retrieve` when the + file will be used. + + """ + fname = _generate_filename(filename, component_type) + _FILE_REGISTRY[fname] = known_hash + _FILE_URLS[fname] = url + return fname + + +def _generate_filename(filename, component_type): + if filename is None: + return None + path = filename + if component_type: + path = '/'.join([component_type, path]) + return path + + +def _retrieve_offline(data_dir, cache_key): + logger.debug('Downloading auxiliary files is turned off, will check ' + 'local files.') + local_file = os.path.join(data_dir, *cache_key.split('/')) + if not os.path.isfile(local_file): + raise RuntimeError("Satpy 'download_aux' setting is False meaning " + "no new files will be downloaded and the local " + "file '{}' does not exist.".format(local_file)) + return local_file + + +def retrieve(cache_key, pooch_kwargs=None): + """Download and cache the file associated with the provided ``cache_key``. + + Cache location is controlled by the config ``data_dir`` key. See + :ref:`data_dir_setting` for more information. + + Args: + cache_key (str): Cache key returned by + :func:`~satpy.aux_download.register_file`. + pooch_kwargs (dict or None): Extra keyword arguments to pass to + :meth:`pooch.Pooch.fetch`. + + Returns: + Local path of the cached file. + + + """ + pooch_kwargs = pooch_kwargs or {} + + path = satpy.config.get('data_dir') + if not satpy.config.get('download_aux'): + return _retrieve_offline(path, cache_key) + # reuse data directory as the default URL where files can be downloaded from + pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, + urls=_FILE_URLS) + return pooch_obj.fetch(cache_key, **pooch_kwargs) + + +def _retrieve_all_with_pooch(pooch_kwargs): + if pooch_kwargs is None: + pooch_kwargs = {} + path = satpy.config.get('data_dir') + pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, + urls=_FILE_URLS) + for fname in _FILE_REGISTRY: + logger.info("Downloading extra data file '%s'...", fname) + pooch_obj.fetch(fname, **pooch_kwargs) + + +def retrieve_all(readers=None, writers=None, composite_sensors=None, + pooch_kwargs=None): + """Find cache-able data files for Satpy and download them. + + The typical use case for this function is to download all ancillary files + before going to an environment/system that does not have internet access. + + Args: + readers (list or None): Limit searching to these readers. If not + specified or ``None`` then all readers are searched. If an + empty list then no readers are searched. + writers (list or None): Limit searching to these writers. If not + specified or ``None`` then all writers are searched. If an + empty list then no writers are searched. + composite_sensors (list or None): Limit searching to composite + configuration files for these sensors. If ``None`` then all sensor + configs will be searched. If an empty list then no composites + will be searched. + pooch_kwargs (dict): Additional keyword arguments to pass to pooch + ``fetch``. + + """ + if not satpy.config.get('download_aux'): + raise RuntimeError("Satpy 'download_aux' setting is False so no files " + "will be downloaded.") + + find_registerable_files(readers=readers, + writers=writers, + composite_sensors=composite_sensors) + _retrieve_all_with_pooch(pooch_kwargs) + logger.info("Done downloading all extra files.") + + +def find_registerable_files(readers=None, writers=None, + composite_sensors=None): + """Load all Satpy components so they can be downloaded. + + Args: + readers (list or None): Limit searching to these readers. If not + specified or ``None`` then all readers are searched. If an + empty list then no readers are searched. + writers (list or None): Limit searching to these writers. If not + specified or ``None`` then all writers are searched. If an + empty list then no writers are searched. + composite_sensors (list or None): Limit searching to composite + configuration files for these sensors. If ``None`` then all sensor + configs will be searched. If an empty list then no composites + will be searched. + + """ + _find_registerable_files_compositors(composite_sensors) + _find_registerable_files_readers(readers) + _find_registerable_files_writers(writers) + return sorted(_FILE_REGISTRY.keys()) + + +def _find_registerable_files_compositors(sensors=None): + """Load all compositor configs so that files are registered. + + Compositor objects should register files when they are initialized. + + """ + from satpy.composites.config_loader import CompositorLoader + composite_loader = CompositorLoader() + if sensors is None: + sensors = composite_loader.all_composite_sensors() + if sensors: + composite_loader.load_compositors(sensors) + + +def _find_registerable_files_readers(readers=None): + """Load all readers so that files are registered.""" + import yaml + from satpy.readers import configs_for_reader, load_reader + for reader_configs in configs_for_reader(reader=readers): + try: + load_reader(reader_configs) + except (ModuleNotFoundError, yaml.YAMLError): + continue + + +def _find_registerable_files_writers(writers=None): + """Load all writers so that files are registered.""" + from satpy.writers import configs_for_writer, load_writer_configs + for writer_configs in configs_for_writer(writer=writers): + try: + load_writer_configs(writer_configs) + except ValueError: + continue + + +class DataDownloadMixin: + """Mixin class for Satpy components to download files. + + This class simplifies the logic needed to download and cache data files + needed for operations in a Satpy component (readers, writers, etc). It + does this in a two step process where files that might be downloaded are + "registered" and then "retrieved" when they need to be used. + + To use this class include it as one of the subclasses of your Satpy + component. Then in the ``__init__`` method, call the + ``register_data_files`` function during initialization. + + .. note:: + + This class is already included in the ``FileYAMLReader`` and + ``Writer`` base classes. There is no need to define a custom + class. + + The below code is shown as an example:: + + from satpy.readers.yaml_reader import AbstractYAMLReader + from satpy.aux_download import DataDownloadMixin + + class MyReader(AbstractYAMLReader, DataDownloadMixin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.register_data_files() + + This class expects data files to be configured in either a + ``self.info['data_files']`` (standard for readers/writers) or + ``self.config['data_files']`` list. The ``data_files`` item + itself is a list of dictionaries. This information can also be + passed directly to ``register_data_files`` for more complex cases. + In YAML, for a reader, this might look like this:: + + reader: + name: abi_l1b + short_name: ABI L1b + long_name: GOES-R ABI Level 1b + ... other metadata ... + data_files: + - url: "https://example.com/my_data_file.dat" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/RELEASING.md" + filename: "satpy_releasing.md" + + In this example we register two files that might be downloaded. + If ``known_hash`` is not provided or None (null in YAML) then the data + file will not be checked for validity when downloaded. See + :func:`~satpy.aux_download.register_file` for more information. You can + optionally specify ``filename`` to define the in-cache name when this file + is downloaded. This can be useful in cases when the filename can not be + easily determined from the URL. + + When it comes time to needing the file, you can retrieve the local path + by calling ``~satpy.aux_download.retrieve(cache_key)`` with the + "cache key" generated during registration. These keys will be in the + format: ``/``. For a + reader this would be ``readers/satpy_release.md``. + + This Mixin is not the only way to register and download files for a + Satpy component, but is the most generic and flexible. Feel free to + use the :func:`~satpy.aux_download.register_file` and + :func:`~satpy.aux_download.retrieve` functions directly. + However, :meth:`~satpy.aux_download.find_registerable_files` must also + be updated to support your component (if files are not register during + initialization). + + """ + + DATA_FILE_COMPONENTS = { + 'reader': 'readers', + 'writer': 'writers', + 'composit': 'composites', + } + + @property + def _data_file_component_type(self): + cls_name = self.__class__.__name__.lower() + for cls_name_sub, comp_type in self.DATA_FILE_COMPONENTS.items(): + if cls_name_sub in cls_name: + return comp_type + return 'other' + + def register_data_files(self, data_files=None): + """Register a series of files that may be downloaded later. + + See :class:`~satpy.aux_download.DataDownloadMixin` for more + information on the assumptions and structure of the data file + configuration dictionary. + + """ + comp_type = self._data_file_component_type + if data_files is None: + df_parent = getattr(self, 'info', self.config) + data_files = df_parent.get('data_files', []) + cache_keys = [] + for data_file_entry in data_files: + cache_key = self._register_data_file(data_file_entry, comp_type) + cache_keys.append(cache_key) + return cache_keys + + @staticmethod + def _register_data_file(data_file_entry, comp_type): + url = data_file_entry['url'] + filename = data_file_entry.get('filename', os.path.basename(url)) + known_hash = data_file_entry.get('known_hash') + return register_file(url, filename, component_type=comp_type, + known_hash=known_hash) + + +def retrieve_all_cmd(): + """Call 'retrieve_all' function from console script 'satpy_retrieve_all'.""" + import argparse + parser = argparse.ArgumentParser(description="Download auxiliary data files used by Satpy.") + parser.add_argument('--data-dir', + help="Override 'SATPY_DATA_DIR' for destination of " + "downloaded files. This does NOT change the " + "directory Satpy will look at when searching " + "for files outside of this script.") + parser.add_argument('--composite-sensors', nargs="*", + help="Limit loaded composites for the specified " + "sensors. If specified with no arguments, " + "no composite files will be downloaded.") + parser.add_argument('--readers', nargs="*", + help="Limit searching to these readers. If specified " + "with no arguments, no reader files will be " + "downloaded.") + parser.add_argument('--writers', nargs="*", + help="Limit searching to these writers. If specified " + "with no arguments, no writer files will be " + "downloaded.") + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO) + + if args.data_dir is None: + args.data_dir = satpy.config.get('data_dir') + + with satpy.config.set(datA_dir=args.data_dir): + retrieve_all(readers=args.readers, writers=args.writers, + composite_sensors=args.composite_sensors) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index df8cab72b6..d5a7203dfe 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -25,9 +25,9 @@ import numpy as np import xarray as xr -import satpy from satpy.dataset import DataID, combine_metadata from satpy.dataset.dataid import minimal_default_keys_config +from satpy.aux_download import DataDownloadMixin from satpy.writers import get_enhanced_image @@ -972,45 +972,85 @@ def __call__(self, projectables, *args, **kwargs): *args, **kwargs) -class StaticImageCompositor(GenericCompositor): +class StaticImageCompositor(GenericCompositor, DataDownloadMixin): """A compositor that loads a static image from disk. - If the filename passed to this compositor is not valid then - the SATPY_ANCPATH environment variable will be checked to see - if the image is located there + Environment variables in the filename are automatically expanded. - Environment variables in the filename are automatically expanded """ - def __init__(self, name, filename=None, area=None, **kwargs): + def __init__(self, name, filename=None, url=None, known_hash=None, area=None, + **kwargs): """Collect custom configuration values. Args: - filename (str): Filename of the image to load, environment - variables are expanded + filename (str): Name to use when storing and referring to the file + in the ``data_dir`` cache. If ``url`` is provided (preferred), + then this is used as the filename in the cache and will be + appended to ``/composites//``. If + ``url`` is provided and ``filename`` is not then the + ``filename`` will be guessed from the ``url``. + If ``url`` is not provided, then it is assumed ``filename`` + refers to a local file with an absolute path. + Environment variables are expanded. + url (str): URL to remote file. When the composite is created the + file will be downloaded and cached in Satpy's ``data_dir``. + Environment variables are expanded. + known_hash (str or None): Hash of the remote file used to verify + a successful download. If not provided then the download will + not be verified. See :func:`satpy.aux_download.register_file` + for more information. area (str): Name of area definition for the image. Optional - for images with built-in area definitions (geotiff) + for images with built-in area definitions (geotiff). """ - if filename is None: - raise ValueError("No image configured for static image compositor") - self.filename = os.path.expandvars(filename) + filename, url = self._get_cache_filename_and_url(filename, url) + self._cache_filename = filename + self._url = url + self._known_hash = known_hash self.area = None if area is not None: from satpy.resample import get_area_def self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) + cache_keys = self.register_data_files([]) + self._cache_key = cache_keys[0] + + @staticmethod + def _get_cache_filename_and_url(filename, url): + if filename is not None: + filename = os.path.expanduser(os.path.expandvars(filename)) + if url is not None: + url = os.path.expandvars(url) + if filename is None: + filename = os.path.basename(url) + if url is None and (filename is None or not os.path.isabs(filename)): + raise ValueError("StaticImageCompositor needs a remote 'url' " + "or absolute path to 'filename'.") + return filename, url + + def register_data_files(self, data_files): + """Tell Satpy about files we may want to download.""" + if os.path.isabs(self._cache_filename): + return [None] + return super().register_data_files([{ + 'url': self._url, + 'known_hash': self._known_hash, + 'filename': self._cache_filename, + }]) + + def _retrieve_data_file(self): + from satpy.aux_download import retrieve + if os.path.isabs(self._cache_filename): + return self._cache_filename + return retrieve(self._cache_key) def __call__(self, *args, **kwargs): """Call the compositor.""" from satpy import Scene - # Check if filename exists, if not then try from SATPY_ANCPATH - if not os.path.isfile(self.filename): - tmp_filename = os.path.join(satpy.config.get('data_dir'), self.filename) - if os.path.isfile(tmp_filename): - self.filename = tmp_filename - scn = Scene(reader='generic_image', filenames=[self.filename]) + local_file = self._retrieve_data_file() + scn = Scene(reader='generic_image', filenames=[local_file]) scn.load(['image']) img = scn['image'] # use compositor parameters as extra metadata diff --git a/satpy/composites/config_loader.py b/satpy/composites/config_loader.py index 0ba48c7560..a0ae52a722 100644 --- a/satpy/composites/config_loader.py +++ b/satpy/composites/config_loader.py @@ -24,7 +24,8 @@ from yaml import UnsafeLoader from satpy import DatasetDict, DataQuery, DataID -from satpy._config import get_entry_points_config_dirs, config_search_paths +from satpy._config import (get_entry_points_config_dirs, config_search_paths, + glob_config) from satpy.utils import recursive_dict_update from satpy.dataset.dataid import minimal_default_keys_config @@ -175,6 +176,19 @@ def __init__(self): # sensor -> { dict of DataID key information } self._sensor_dataid_keys = {} + @classmethod + def all_composite_sensors(cls): + """Get all sensor names from available composite configs.""" + paths = get_entry_points_config_dirs('satpy.composites') + composite_configs = glob_config( + os.path.join("composites", "*.yaml"), + search_dirs=paths) + yaml_names = set([os.path.splitext(os.path.basename(fn))[0] + for fn in composite_configs]) + non_sensor_yamls = ('visir',) + sensor_names = [x for x in yaml_names if x not in non_sensor_yamls] + return sensor_names + def load_sensor_composites(self, sensor_name): """Load all compositor configs for the provided sensor.""" config_filename = sensor_name + ".yaml" diff --git a/satpy/etc/composites/visir.yaml b/satpy/etc/composites/visir.yaml index 812e22fc89..b00cfe8dac 100644 --- a/satpy/etc/composites/visir.yaml +++ b/satpy/etc/composites/visir.yaml @@ -419,9 +419,11 @@ composites: _night_background: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background - filename: BlackMarble_2016_01deg_geo.tif + url: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_01deg_geo.tif" + known_hash: "sha256:146c116962677ae113d9233374715686737ff97141a77cc5da69a9451315a685" # optional _night_background_hires: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background_hires - filename: BlackMarble_2016_3km_geo.tif + url: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_3km_geo.tif" + known_hash: "sha256:e915ef2a20d84e2a59e1547d3ad564463ad4bcf22bfa02e0e0b8ed1cd722e9c0" # optional diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index bfa064b292..6733599c4c 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -42,6 +42,7 @@ from satpy.utils import recursive_dict_update from satpy.dataset import DataQuery, DataID, get_key from satpy.dataset.dataid import get_keys_from_config, default_id_keys_config, default_co_keys_config +from satpy.aux_download import DataDownloadMixin from satpy import DatasetDict from satpy.resample import add_crs_xy_coords from trollsift.parser import globify, parse @@ -329,7 +330,7 @@ def load_ds_ids_from_config(self): return ids -class FileYAMLReader(AbstractYAMLReader): +class FileYAMLReader(AbstractYAMLReader, DataDownloadMixin): """Primary reader base class that is configured by a YAML file. This class uses the idea of per-file "file handler" objects to read file @@ -354,6 +355,7 @@ def __init__(self, self.filter_filenames = self.info.get('filter_filenames', filter_filenames) self.filter_parameters = filter_parameters or {} self.coords_cache = WeakValueDictionary() + self.register_data_files() @property def sensor_names(self): diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 9926ea67a8..9a6dea5b02 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -721,13 +721,13 @@ def test_concat_datasets(self): num_bands = len(res.bands) self.assertEqual(num_bands, 1) self.assertEqual(res.shape[0], num_bands) - self.assertTrue(res.bands[0] == 'L') + self.assertEqual(res.bands[0], 'L') res = self.comp._concat_datasets([self.all_valid, self.all_valid], 'LA') num_bands = len(res.bands) self.assertEqual(num_bands, 2) self.assertEqual(res.shape[0], num_bands) - self.assertTrue(res.bands[0] == 'L') - self.assertTrue(res.bands[1] == 'A') + self.assertEqual(res.bands[0], 'L') + self.assertEqual(res.bands[1], 'A') self.assertRaises(IncompatibleAreas, self.comp._concat_datasets, [self.all_valid, self.wrong_shape], 'LA') @@ -742,10 +742,10 @@ def test_get_sensors(self): dset2 = self.first_invalid dset2.attrs['sensor'] = 'bar' res = self.comp._get_sensors([dset1, dset2]) - self.assertTrue('foo' in res) - self.assertTrue('bar' in res) + self.assertIn('foo', res) + self.assertIn('bar', res) self.assertEqual(len(res), 2) - self.assertTrue(isinstance(res, set)) + self.assertIsInstance(res, set) @mock.patch('satpy.composites.GenericCompositor._get_sensors') @mock.patch('satpy.composites.combine_metadata') @@ -790,11 +790,11 @@ def test_call(self): res = self.comp([self.all_valid, self.first_invalid], **attrs) # Verify attributes self.assertEqual(res.attrs.get('sensor'), 'foo') - self.assertTrue('foo' in res.attrs) + self.assertIn('foo', res.attrs) self.assertEqual(res.attrs.get('foo'), 'bar') - self.assertTrue('units' not in res.attrs) - self.assertTrue('calibration' not in res.attrs) - self.assertTrue('modifiers' not in res.attrs) + self.assertNotIn('units', res.attrs) + self.assertNotIn('calibration', res.attrs) + self.assertNotIn('modifiers', res.attrs) self.assertIsNone(res.attrs['wavelength']) self.assertEqual(res.attrs['mode'], 'LA') self.assertEqual(res.attrs['resolution'], 333) @@ -888,24 +888,31 @@ def test_init(self, get_area_def): # No filename given raises ValueError with self.assertRaises(ValueError): - comp = StaticImageCompositor("name") + StaticImageCompositor("name") + + # No absolute filename and no URL + with self.assertRaises(ValueError): + StaticImageCompositor("name", filename="foo.tif") # No area defined - comp = StaticImageCompositor("name", filename="foo.tif") - self.assertEqual(comp.filename, "foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif") + self.assertEqual(comp._cache_filename, "/foo.tif") self.assertIsNone(comp.area) # Area defined get_area_def.return_value = "bar" - comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") - self.assertEqual(comp.filename, "foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif", area="euro4") + self.assertEqual(comp._cache_filename, "/foo.tif") self.assertEqual(comp.area, "bar") get_area_def.assert_called_once_with("euro4") + @mock.patch('satpy.aux_download.retrieve') + @mock.patch('satpy.aux_download.register_file') @mock.patch('satpy.Scene') - def test_call(self, Scene): # noqa + def test_call(self, Scene, register, retrieve): # noqa """Test the static compositing.""" from satpy.composites import StaticImageCompositor + remote_tif = "http://example.com/foo.tif" class MockScene(dict): def load(self, arg): @@ -916,31 +923,48 @@ def load(self, arg): scn = MockScene() scn['image'] = img Scene.return_value = scn - comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") + # absolute path to local file + comp = StaticImageCompositor("name", filename="/foo.tif", area="euro4") + res = comp() + Scene.assert_called_once_with(reader='generic_image', + filenames=['/foo.tif']) + register.assert_not_called() + retrieve.assert_not_called() + self.assertIn("start_time", res.attrs) + self.assertIn("end_time", res.attrs) + self.assertIsNone(res.attrs['sensor']) + self.assertNotIn('modifiers', res.attrs) + self.assertNotIn('calibration', res.attrs) + + # remote file with local cached version + Scene.reset_mock() + register.return_value = "data_dir/foo.tif" + retrieve.return_value = "data_dir/foo.tif" + comp = StaticImageCompositor("name", url=remote_tif, area="euro4") res = comp() Scene.assert_called_once_with(reader='generic_image', - filenames=[comp.filename]) - self.assertTrue("start_time" in res.attrs) - self.assertTrue("end_time" in res.attrs) + filenames=['data_dir/foo.tif']) + self.assertIn("start_time", res.attrs) + self.assertIn("end_time", res.attrs) self.assertIsNone(res.attrs['sensor']) - self.assertTrue('modifiers' not in res.attrs) - self.assertTrue('calibration' not in res.attrs) + self.assertNotIn('modifiers', res.attrs) + self.assertNotIn('calibration', res.attrs) # Non-georeferenced image, no area given img.attrs.pop('area') - comp = StaticImageCompositor("name", filename="foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif") with self.assertRaises(AttributeError): - res = comp() + comp() # Non-georeferenced image, area given - comp = StaticImageCompositor("name", filename="foo.tif", area='euro4') + comp = StaticImageCompositor("name", filename="/foo.tif", area='euro4') res = comp() self.assertEqual(res.attrs['area'].area_id, 'euro4') # Filename contains environment variable os.environ["TEST_IMAGE_PATH"] = "/path/to/image" comp = StaticImageCompositor("name", filename="${TEST_IMAGE_PATH}/foo.tif", area='euro4') - self.assertEqual(comp.filename, "/path/to/image/foo.tif") + self.assertEqual(comp._cache_filename, "/path/to/image/foo.tif") def _enhance2dataset(dataset, convert_p=False): @@ -1158,7 +1182,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v1) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1166,7 +1190,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1175,7 +1199,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1185,7 +1209,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v3) res = comp([data, ct_data_v3]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data_v3) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha_v3) @@ -1210,7 +1234,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v1) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'RGBA') + self.assertEqual(res.mode, 'RGBA') np.testing.assert_allclose(res.sel(bands='R'), data.sel(bands='R').where(ct_data > 1)) np.testing.assert_allclose(res.sel(bands='G'), @@ -1229,7 +1253,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'RGBA') + self.assertEqual(res.mode, 'RGBA') np.testing.assert_allclose(res.sel(bands='R'), data.sel(bands='R').where(ct_data > 1)) np.testing.assert_allclose(res.sel(bands='G'), diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py new file mode 100644 index 0000000000..9e0ceb8d46 --- /dev/null +++ b/satpy/tests/test_data_download.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2021 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Test for ancillary data downloading.""" + +from unittest import mock + +import pytest +import yaml + +pooch = pytest.importorskip("pooch") + +README_URL = "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + + +def _setup_custom_composite_config(base_dir): + from satpy.composites import StaticImageCompositor + composite_config = base_dir.mkdir("composites").join("visir.yaml") + with open(composite_config, 'w') as comp_file: + yaml.dump({ + "sensor_name": "visir", + "composites": { + "test_static": { + "compositor": StaticImageCompositor, + "url": README_URL, + "known_hash": None, + }, + }, + }, comp_file) + + +def _setup_custom_reader_config(base_dir): + reader_config = base_dir.mkdir("readers").join("fake.yaml") + with open(reader_config, 'wt') as comp_file: + # abstract base classes can't be converted so we do raw string + comp_file.write(""" +reader: + name: "fake" + reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + data_files: + - url: {} + known_hash: null + - url: {} + filename: "README2.rst" + known_hash: null +file_types: {{}} +""".format(README_URL, README_URL)) + + +def _setup_custom_writer_config(base_dir): + writer_config = base_dir.mkdir("writers").join("fake.yaml") + with open(writer_config, 'wt') as comp_file: + # abstract base classes can't be converted so we do raw string + comp_file.write(""" +writer: + name: "fake" + writer: !!python/name:satpy.writers.Writer + data_files: + - url: {} + known_hash: null + - url: {} + filename: "README2.rst" + known_hash: null +""".format(README_URL, README_URL)) + + +def _get_reader_find_conditions(readers, found_files): + r_cond1 = 'readers/README.rst' in found_files + r_cond2 = 'readers/README2.rst' in found_files + if readers is not None and not readers: + r_cond1 = not r_cond1 + r_cond2 = not r_cond2 + return r_cond1, r_cond2 + + +def _get_writer_find_conditions(writers, found_files): + w_cond1 = 'writers/README.rst' in found_files + w_cond2 = 'writers/README2.rst' in found_files + if writers is not None and not writers: + w_cond1 = not w_cond1 + w_cond2 = not w_cond2 + return w_cond1, w_cond2 + + +def _get_comp_find_conditions(comp_sensors, found_files): + comp_cond = 'composites/README.rst' in found_files + if comp_sensors is not None and not comp_sensors: + comp_cond = not comp_cond + return comp_cond + + +class TestDataDownload: + """Test basic data downloading functionality.""" + + @pytest.fixture(autouse=True) + def _setup_custom_configs(self, tmpdir): + _setup_custom_composite_config(tmpdir) + _setup_custom_reader_config(tmpdir) + _setup_custom_writer_config(tmpdir) + self.tmpdir = tmpdir + + @pytest.mark.parametrize('comp_sensors', [[], None, ['visir']]) + @pytest.mark.parametrize('writers', [[], None, ['fake']]) + @pytest.mark.parametrize('readers', [[], None, ['fake']]) + def test_find_registerable(self, readers, writers, comp_sensors): + """Test that find_registerable finds some things.""" + import satpy + from satpy.aux_download import find_registerable_files + with satpy.config.set(config_path=[self.tmpdir]), \ + mock.patch('satpy.aux_download._FILE_REGISTRY', {}): + found_files = find_registerable_files( + readers=readers, writers=writers, + composite_sensors=comp_sensors, + ) + + r_cond1, r_cond2 = _get_reader_find_conditions(readers, found_files) + assert r_cond1 + assert r_cond2 + w_cond1, w_cond2 = _get_writer_find_conditions(writers, found_files) + assert w_cond1 + assert w_cond2 + comp_cond = _get_comp_find_conditions(comp_sensors, found_files) + assert comp_cond + + def test_limited_find_registerable(self): + """Test that find_registerable doesn't find anything when limited.""" + import satpy + from satpy.aux_download import find_registerable_files + file_registry = {} + with satpy.config.set(config_path=[self.tmpdir]), \ + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): + found_files = find_registerable_files( + readers=[], writers=[], composite_sensors=[], + ) + assert not found_files + + def test_retrieve(self): + """Test retrieving a single file.""" + import satpy + from satpy.aux_download import find_registerable_files, retrieve + file_registry = {} + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): + comp_file = 'composites/README.rst' + found_files = find_registerable_files() + assert comp_file in found_files + assert not self.tmpdir.join(comp_file).exists() + retrieve(comp_file) + assert self.tmpdir.join(comp_file).exists() + + def test_offline_retrieve(self): + """Test retrieving a single file when offline.""" + import satpy + from satpy.aux_download import find_registerable_files, retrieve + file_registry = {} + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=True), \ + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): + comp_file = 'composites/README.rst' + found_files = find_registerable_files() + assert comp_file in found_files + + # the file doesn't exist, we can't download it + assert not self.tmpdir.join(comp_file).exists() + with satpy.config.set(download_aux=False): + pytest.raises(RuntimeError, retrieve, comp_file) + + # allow downloading and get it + retrieve(comp_file) + assert self.tmpdir.join(comp_file).exists() + + # turn off downloading and make sure we get local file + with satpy.config.set(download_aux=False): + local_file = retrieve(comp_file) + assert local_file + + def test_offline_retrieve_all(self): + """Test registering and retrieving all files fails when offline.""" + import satpy + from satpy.aux_download import retrieve_all + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=False): + pytest.raises(RuntimeError, retrieve_all) + + def test_retrieve_all(self): + """Test registering and retrieving all files.""" + import satpy + from satpy.aux_download import retrieve_all + file_registry = {} + file_urls = {} + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry), \ + mock.patch('satpy.aux_download._FILE_URLS', file_urls), \ + mock.patch('satpy.aux_download.find_registerable_files'): + comp_file = 'composites/README.rst' + file_registry[comp_file] = None + file_urls[comp_file] = README_URL + assert not self.tmpdir.join(comp_file).exists() + retrieve_all() + assert self.tmpdir.join(comp_file).exists() diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index 3240421dcc..81357347f5 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -39,6 +39,7 @@ from satpy import CHUNK_SIZE from satpy.plugin_base import Plugin from satpy.resample import get_area_def +from satpy.aux_download import DataDownloadMixin from trollsift import parser @@ -543,7 +544,7 @@ def compute_writer_results(results): target.close() -class Writer(Plugin): +class Writer(Plugin, DataDownloadMixin): """Base Writer class for all other writers. A minimal writer subclass should implement the `save_dataset` method. @@ -595,6 +596,7 @@ def __init__(self, name=None, filename=None, base_dir=None, **kwargs): raise ValueError("Writer 'name' not provided") self.filename_parser = self.create_filename_parser(base_dir) + self.register_data_files() @classmethod def separate_init_kwargs(cls, kwargs): diff --git a/setup.py b/setup.py index 2fb77fd546..2df95f03b9 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,8 @@ requires = ['numpy >=1.13', 'pillow', 'pyresample >=1.11.0', 'trollsift', 'trollimage >1.10.1', 'pykdtree', 'pyyaml', 'xarray >=0.10.1, !=0.13.0', - 'dask[array] >=0.17.1', 'pyproj', 'zarr', 'donfig', 'appdirs'] + 'dask[array] >=0.17.1', 'pyproj', 'zarr', 'donfig', 'appdirs', + 'pooch'] test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio', 'libtiff', 'rasterio', 'geoviews', 'trollimage', 'fsspec'] @@ -102,6 +103,13 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): return data_files +entry_points = { + 'console_scripts': [ + 'satpy_retrieve_all_aux_data=satpy.aux_download:retrieve_all_cmd', + ], +} + + NAME = 'satpy' with open('README.rst', 'r') as readme: README = readme.read() @@ -138,4 +146,5 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): tests_require=test_requires, python_requires='>=3.6', extras_require=extras_require, + entry_points=entry_points, )