From 00f2cd79f14cf6fd58fbe776fd8aef8d356a99c2 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Tue, 26 Jan 2021 14:51:48 -0600 Subject: [PATCH 01/16] Add ancillary data download API --- doc/source/conf.py | 1 + doc/source/config.rst | 2 + satpy/composites/__init__.py | 28 +++-- satpy/composites/config_loader.py | 5 + satpy/data_download.py | 175 ++++++++++++++++++++++++++++++ satpy/etc/composites/visir.yaml | 6 +- satpy/tests/test_composites.py | 8 +- 7 files changed, 210 insertions(+), 15 deletions(-) create mode 100644 satpy/data_download.py diff --git a/doc/source/conf.py b/doc/source/conf.py index f3878f6734..3222c403a2 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -268,4 +268,5 @@ def __getattr__(cls, name): 'xarray': ('https://xarray.pydata.org/en/stable', None), 'rasterio': ('https://rasterio.readthedocs.io/en/latest', None), 'donfig': ('https://donfig.readthedocs.io/en/latest', None), + 'pooch': ('https://www.fatiando.org/pooch/latest/', None), } diff --git a/doc/source/config.rst b/doc/source/config.rst index 00e47d7fc5..f6c7c31aec 100644 --- a/doc/source/config.rst +++ b/doc/source/config.rst @@ -115,6 +115,8 @@ configuration files, they are merged in reverse order. This means "base" configuration paths should be at the end of the list and custom/user paths should be at the beginning of the list. +.. _data_dir_setting: + Data Directory ^^^^^^^^^^^^^^ diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 527eb2f9ca..be52e5aab7 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -25,7 +25,6 @@ import numpy as np import xarray as xr -import satpy from satpy.dataset import DataID, combine_metadata from satpy.dataset.dataid import minimal_default_keys_config from satpy.writers import get_enhanced_image @@ -981,7 +980,8 @@ class StaticImageCompositor(GenericCompositor): Environment variables in the filename are automatically expanded """ - def __init__(self, name, filename=None, area=None, **kwargs): + def __init__(self, name, filename=None, known_hash=None, area=None, + **kwargs): """Collect custom configuration values. Args: @@ -993,23 +993,33 @@ def __init__(self, name, filename=None, area=None, **kwargs): """ if filename is None: raise ValueError("No image configured for static image compositor") - self.filename = os.path.expandvars(filename) + self.file_uri = os.path.expandvars(filename) + self._cache_filename = os.path.basename(self.file_uri) + self._cache_key = None # initialized later + self._known_hash = known_hash self.area = None if area is not None: from satpy.resample import get_area_def self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) + self.register_data_files() + + def register_data_files(self): + """Tell Satpy about files we may want to download.""" + from satpy.data_download import register_file + cache_key = register_file(self.file_uri, self._cache_filename, + component_type='composites', + component_name=self.__class__.__name__, + known_hash=self._known_hash) + self._cache_key = cache_key def __call__(self, *args, **kwargs): """Call the compositor.""" from satpy import Scene - # Check if filename exists, if not then try from SATPY_ANCPATH - if not os.path.isfile(self.filename): - tmp_filename = os.path.join(satpy.config.get('data_dir'), self.filename) - if os.path.isfile(tmp_filename): - self.filename = tmp_filename - scn = Scene(reader='generic_image', filenames=[self.filename]) + from satpy.data_download import retrieve + local_file = retrieve(self._cache_key) + scn = Scene(reader='generic_image', filenames=[local_file]) scn.load(['image']) img = scn['image'] # use compositor parameters as extra metadata diff --git a/satpy/composites/config_loader.py b/satpy/composites/config_loader.py index 0ba48c7560..ab2513a645 100644 --- a/satpy/composites/config_loader.py +++ b/satpy/composites/config_loader.py @@ -207,6 +207,11 @@ def get_modifier(self, key, sensor_names): continue raise KeyError("Could not find modifier '{}'".format(key)) + def load_all_sensors(self): + """Load compositors for all sensors.""" + # TODO + pass + def load_compositors(self, sensor_names): """Load all compositor configs for the provided sensors. diff --git a/satpy/data_download.py b/satpy/data_download.py new file mode 100644 index 0000000000..05f38a77c0 --- /dev/null +++ b/satpy/data_download.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2021 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Functions and utilities for downloading ancillary data. + +TODO: Put examples here or on a new sphinx page? + +""" + +import logging +import satpy +import unittest.mock + +try: + import pooch +except ImportError: + # TODO: Implement DumpPooch for local files only + pooch = None + +logger = logging.getLogger(__name__) + +FILE_REGISTRY = {} +FILE_URLS = {} + + +def register_file(url, filename, component_type=None, component_name=None, known_hash=None): + """Register file for future retrieval. + + This function only prepares Satpy to be able to download and cache the + provided file. It will not download the file. See + :func:`satpy.data_download.retrieve` for more information. + + Args: + url (str): URL where remote file can be downloaded. + filename (str): Filename used to identify and store the downloaded + file as. + component_type (str or None): Name of the type of Satpy component that + will use this file. Typically "readers", "composites", "writers", + or "enhancements" for consistency. This will be prepended to the + filename when storing the data in the cache. + component_name (str or None): Name of the Satpy component that will + use this file. In most cases this will be the name of the Python + class instead of the name of the instance + (ex. StaticImageCompositor versus '_night_background'). This will be + prepended to the filename when storing the data in the cache. + known_hash (str): Hash used to verify the file is downloaded correctly. + See https://www.fatiando.org/pooch/v1.3.0/beginner.html#hashes + for more information. If not provided then the file is not checked. + + Returns: + Cache key that can be used to retrieve the file later. The cache key + consists of the ``component_type``, ``component_name``, and provided + ``filename``. This should be passed to + :func:`satpy.data_download_retrieve` when the file will be used. + + """ + if known_hash is None: + # https://www.fatiando.org/pooch/v1.3.0/advanced.html#bypassing-the-hash-check + known_hash = unittest.mock.ANY + fname = _generate_filename(filename, component_type, component_name) + + global FILE_REGISTRY + global FILE_URLS + FILE_REGISTRY[fname] = known_hash + FILE_URLS[fname] = url + return fname + + +def _generate_filename(filename, component_type, component_name): + if filename is None: + return None + path = filename + if component_name: + path = '/'.join([component_name, path]) + if component_type: + path = '/'.join([component_type, path]) + return path + + +# def retrieve(url, filename=None, component_type=None, component_name=None, +# known_hash=None, pooch_kwargs=None): +# if pooch is None: +# raise ImportError("Extra dependency library 'pooch' is required to " +# "download data files.") +# pooch_kwargs = pooch_kwargs or {} +# +# path = satpy.config.get('data_dir') +# fname = register_file(url, filename, component_type, component_name, +# known_hash) +# return pooch.retrieve(url, known_hash, fname=fname, path=path, +# **pooch_kwargs) + + +def retrieve(cache_key, pooch_kwargs=None): + """Download and cache the file associated with the provided ``cache_key``. + + Cache location is controlled by the config ``data_dir`` key. See + :ref:`data_dir_setting` for more information. + + Args: + cache_key (str): Cache key returned by + :func:`~satpy.data_download.register_file`. + pooch_kwargs (dict or None): Extra keyword arguments to pass to + :meth:`pooch.Pooch.fetch`. + + Returns: + Local path of the cached file. + + + """ + if pooch is None: + raise ImportError("Extra dependency library 'pooch' is required to " + "download data files.") + pooch_kwargs = pooch_kwargs or {} + + path = satpy.config.get('data_dir') + # reuse data directory as the default URL where files can be downloaded from + pooch_obj = pooch.create(path, path, registry=FILE_REGISTRY, + urls=FILE_URLS) + return pooch_obj.fetch(cache_key, **pooch_kwargs) + + +def retrieve_all(pooch_kwargs=None): + """Find cache-able data files for Satpy and download them. + + The typical use case for this function is to download all ancillary files + before going to an environment/system that does not have internet access. + + """ + if pooch is None: + raise ImportError("Extra dependency library 'pooch' is required to " + "download data files.") + if pooch_kwargs is None: + pooch_kwargs = {} + + _find_registerable_files() + path = satpy.config.get('data_dir') + pooch_obj = pooch.create(path, path, registry=FILE_REGISTRY, + urls=FILE_URLS) + for fname in FILE_REGISTRY: + logger.info("Downloading extra data file '%s'...", fname) + pooch_obj.fetch(fname, **pooch_kwargs) + logger.info("Done downloading all extra files.") + + +def _find_registerable_files(): + """Load all Satpy components so they can be downloaded.""" + _find_registerable_files_compositors() + # TODO: Readers, writers + + +def _find_registerable_files_compositors(): + """Load all compositor configs so that files are registered. + + Compositor objects should register files when they are initialized. + + """ + from satpy.composites.config_loader import CompositorLoader + composite_loader = CompositorLoader() + all_sensor_names = ['viirs', 'seviri'] # FIXME: Find a way to actually get these + composite_loader.load_compositors(all_sensor_names) diff --git a/satpy/etc/composites/visir.yaml b/satpy/etc/composites/visir.yaml index 812e22fc89..cd1d1dd3e6 100644 --- a/satpy/etc/composites/visir.yaml +++ b/satpy/etc/composites/visir.yaml @@ -419,9 +419,11 @@ composites: _night_background: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background - filename: BlackMarble_2016_01deg_geo.tif + filename: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_01deg_geo.tif" + known_hash: "sha256:146c116962677ae113d9233374715686737ff97141a77cc5da69a9451315a685" # optional _night_background_hires: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background_hires - filename: BlackMarble_2016_3km_geo.tif + filename: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_3km_geo.tif" + known_hash: "sha256:e915ef2a20d84e2a59e1547d3ad564463ad4bcf22bfa02e0e0b8ed1cd722e9c0" # optional diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 9926ea67a8..49725eb634 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -892,13 +892,13 @@ def test_init(self, get_area_def): # No area defined comp = StaticImageCompositor("name", filename="foo.tif") - self.assertEqual(comp.filename, "foo.tif") + self.assertEqual(comp.file_uri, "foo.tif") self.assertIsNone(comp.area) # Area defined get_area_def.return_value = "bar" comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") - self.assertEqual(comp.filename, "foo.tif") + self.assertEqual(comp.file_uri, "foo.tif") self.assertEqual(comp.area, "bar") get_area_def.assert_called_once_with("euro4") @@ -919,7 +919,7 @@ def load(self, arg): comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") res = comp() Scene.assert_called_once_with(reader='generic_image', - filenames=[comp.filename]) + filenames=[comp.file_uri]) self.assertTrue("start_time" in res.attrs) self.assertTrue("end_time" in res.attrs) self.assertIsNone(res.attrs['sensor']) @@ -940,7 +940,7 @@ def load(self, arg): # Filename contains environment variable os.environ["TEST_IMAGE_PATH"] = "/path/to/image" comp = StaticImageCompositor("name", filename="${TEST_IMAGE_PATH}/foo.tif", area='euro4') - self.assertEqual(comp.filename, "/path/to/image/foo.tif") + self.assertEqual(comp.file_uri, "/path/to/image/foo.tif") def _enhance2dataset(dataset, convert_p=False): From 751d767d55a058125b9ee0bd32faf4eca08ac31e Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 4 Feb 2021 10:09:40 -0600 Subject: [PATCH 02/16] Add tests for data downloads --- satpy/data_download.py | 47 ++++++--------- satpy/tests/test_data_download.py | 95 +++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 satpy/tests/test_data_download.py diff --git a/satpy/data_download.py b/satpy/data_download.py index 05f38a77c0..ea4a69f2a4 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -23,7 +23,6 @@ import logging import satpy -import unittest.mock try: import pooch @@ -33,8 +32,8 @@ logger = logging.getLogger(__name__) -FILE_REGISTRY = {} -FILE_URLS = {} +_FILE_REGISTRY = {} +_FILE_URLS = {} def register_file(url, filename, component_type=None, component_name=None, known_hash=None): @@ -68,15 +67,12 @@ class instead of the name of the instance :func:`satpy.data_download_retrieve` when the file will be used. """ - if known_hash is None: - # https://www.fatiando.org/pooch/v1.3.0/advanced.html#bypassing-the-hash-check - known_hash = unittest.mock.ANY fname = _generate_filename(filename, component_type, component_name) - global FILE_REGISTRY - global FILE_URLS - FILE_REGISTRY[fname] = known_hash - FILE_URLS[fname] = url + global _FILE_REGISTRY + global _FILE_URLS + _FILE_REGISTRY[fname] = known_hash + _FILE_URLS[fname] = url return fname @@ -91,20 +87,6 @@ def _generate_filename(filename, component_type, component_name): return path -# def retrieve(url, filename=None, component_type=None, component_name=None, -# known_hash=None, pooch_kwargs=None): -# if pooch is None: -# raise ImportError("Extra dependency library 'pooch' is required to " -# "download data files.") -# pooch_kwargs = pooch_kwargs or {} -# -# path = satpy.config.get('data_dir') -# fname = register_file(url, filename, component_type, component_name, -# known_hash) -# return pooch.retrieve(url, known_hash, fname=fname, path=path, -# **pooch_kwargs) - - def retrieve(cache_key, pooch_kwargs=None): """Download and cache the file associated with the provided ``cache_key``. @@ -129,8 +111,8 @@ def retrieve(cache_key, pooch_kwargs=None): path = satpy.config.get('data_dir') # reuse data directory as the default URL where files can be downloaded from - pooch_obj = pooch.create(path, path, registry=FILE_REGISTRY, - urls=FILE_URLS) + pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, + urls=_FILE_URLS) return pooch_obj.fetch(cache_key, **pooch_kwargs) @@ -147,20 +129,21 @@ def retrieve_all(pooch_kwargs=None): if pooch_kwargs is None: pooch_kwargs = {} - _find_registerable_files() + find_registerable_files() path = satpy.config.get('data_dir') - pooch_obj = pooch.create(path, path, registry=FILE_REGISTRY, - urls=FILE_URLS) - for fname in FILE_REGISTRY: + pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, + urls=_FILE_URLS) + for fname in _FILE_REGISTRY: logger.info("Downloading extra data file '%s'...", fname) pooch_obj.fetch(fname, **pooch_kwargs) logger.info("Done downloading all extra files.") -def _find_registerable_files(): +def find_registerable_files(): """Load all Satpy components so they can be downloaded.""" _find_registerable_files_compositors() # TODO: Readers, writers + return sorted(_FILE_REGISTRY.keys()) def _find_registerable_files_compositors(): @@ -173,3 +156,5 @@ def _find_registerable_files_compositors(): composite_loader = CompositorLoader() all_sensor_names = ['viirs', 'seviri'] # FIXME: Find a way to actually get these composite_loader.load_compositors(all_sensor_names) + +# TODO: Add MixIn class that can be used by readers and writers diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py new file mode 100644 index 0000000000..fdc3ffaef9 --- /dev/null +++ b/satpy/tests/test_data_download.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2021 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Test for ancillary data downloading.""" + +from unittest import mock +import pytest +import yaml + +pooch = pytest.importorskip("pooch") + +README_URL = "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + + +def _setup_custom_composite_config(base_dir): + from satpy.composites import StaticImageCompositor + composite_config = base_dir.mkdir("composites").join("visir.yaml") + with open(composite_config, 'w') as comp_file: + yaml.dump({ + "sensor_name": "visir", + "composites": { + "test_static": { + "compositor": StaticImageCompositor, + "filename": README_URL, + "known_hash": None, + }, + }, + }, comp_file) + + +def _setup_custom_configs(base_dir): + # TODO: Readers and Writers + _setup_custom_composite_config(base_dir) + + +class TestDataDownload: + """Test basic data downloading functionality.""" + + def test_find_registerable(self, tmpdir): + """Test that find_registerable finds some things.""" + import satpy + from satpy.data_download import find_registerable_files + _setup_custom_configs(tmpdir) + file_registry = {} + with satpy.config.set(config_path=[tmpdir]), \ + mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + found_files = find_registerable_files() + assert 'composites/StaticImageCompositor/README.rst' in found_files + + def test_retrieve(self, tmpdir): + """Test retrieving a single file.""" + import satpy + from satpy.data_download import find_registerable_files, retrieve + _setup_custom_configs(tmpdir) + file_registry = {} + with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir)), \ + mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + comp_file = 'composites/StaticImageCompositor/README.rst' + found_files = find_registerable_files() + assert comp_file in found_files + assert not tmpdir.join(comp_file).exists() + retrieve(comp_file) + assert tmpdir.join(comp_file).exists() + + def test_retrieve_all(self, tmpdir): + """Test registering and retrieving all files.""" + import satpy + from satpy.data_download import retrieve_all + _setup_custom_configs(tmpdir) + file_registry = {} + file_urls = {} + with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir)), \ + mock.patch('satpy.data_download._FILE_REGISTRY', file_registry), \ + mock.patch('satpy.data_download._FILE_URLS', file_urls), \ + mock.patch('satpy.data_download.find_registerable_files'): + comp_file = 'composites/StaticImageCompositor/README.rst' + file_registry[comp_file] = None + file_urls[comp_file] = README_URL + assert not tmpdir.join(comp_file).exists() + retrieve_all() + assert tmpdir.join(comp_file).exists() From 5fc30e5e3e57149e485a7b94aadd08c60bd382ae Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 4 Feb 2021 10:35:45 -0600 Subject: [PATCH 03/16] Fix SaticImageCompositor tests with data download mocks --- satpy/composites/__init__.py | 5 ++--- satpy/tests/test_composites.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index be52e5aab7..34a36143c6 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -995,7 +995,6 @@ def __init__(self, name, filename=None, known_hash=None, area=None, raise ValueError("No image configured for static image compositor") self.file_uri = os.path.expandvars(filename) self._cache_filename = os.path.basename(self.file_uri) - self._cache_key = None # initialized later self._known_hash = known_hash self.area = None if area is not None: @@ -1003,7 +1002,7 @@ def __init__(self, name, filename=None, known_hash=None, area=None, self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) - self.register_data_files() + self._cache_key = self.register_data_files()[0] def register_data_files(self): """Tell Satpy about files we may want to download.""" @@ -1012,7 +1011,7 @@ def register_data_files(self): component_type='composites', component_name=self.__class__.__name__, known_hash=self._known_hash) - self._cache_key = cache_key + return [cache_key] def __call__(self, *args, **kwargs): """Call the compositor.""" diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 49725eb634..845cd2d6d5 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -902,8 +902,10 @@ def test_init(self, get_area_def): self.assertEqual(comp.area, "bar") get_area_def.assert_called_once_with("euro4") + @mock.patch('satpy.data_download.retrieve') + @mock.patch('satpy.data_download.register_file') @mock.patch('satpy.Scene') - def test_call(self, Scene): # noqa + def test_call(self, Scene, register, retrieve): # noqa """Test the static compositing.""" from satpy.composites import StaticImageCompositor @@ -916,6 +918,8 @@ def load(self, arg): scn = MockScene() scn['image'] = img Scene.return_value = scn + register.return_value = "foo.tif" + retrieve.return_value = "foo.tif" comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") res = comp() Scene.assert_called_once_with(reader='generic_image', From efbdfbe7377f5a0ef764c26e4714e0faba64eabe Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 4 Feb 2021 12:36:40 -0600 Subject: [PATCH 04/16] Add pooch as a hard requirement --- continuous_integration/environment.yaml | 1 + doc/rtd_environment.yml | 1 + satpy/data_download.py | 12 +----------- setup.py | 3 ++- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/continuous_integration/environment.yaml b/continuous_integration/environment.yaml index 5c6a06ebff..f00ee0b8e2 100644 --- a/continuous_integration/environment.yaml +++ b/continuous_integration/environment.yaml @@ -39,6 +39,7 @@ dependencies: - fsspec - pylibtiff - python-geotiepoints + - pooch - pip - pip: - trollsift diff --git a/doc/rtd_environment.yml b/doc/rtd_environment.yml index 6d9baab780..66158ce5b4 100644 --- a/doc/rtd_environment.yml +++ b/doc/rtd_environment.yml @@ -10,6 +10,7 @@ dependencies: - graphviz - numpy - pillow + - pooch - pyresample - setuptools - setuptools_scm diff --git a/satpy/data_download.py b/satpy/data_download.py index ea4a69f2a4..b99e8306d9 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -24,11 +24,7 @@ import logging import satpy -try: - import pooch -except ImportError: - # TODO: Implement DumpPooch for local files only - pooch = None +import pooch logger = logging.getLogger(__name__) @@ -104,9 +100,6 @@ def retrieve(cache_key, pooch_kwargs=None): """ - if pooch is None: - raise ImportError("Extra dependency library 'pooch' is required to " - "download data files.") pooch_kwargs = pooch_kwargs or {} path = satpy.config.get('data_dir') @@ -123,9 +116,6 @@ def retrieve_all(pooch_kwargs=None): before going to an environment/system that does not have internet access. """ - if pooch is None: - raise ImportError("Extra dependency library 'pooch' is required to " - "download data files.") if pooch_kwargs is None: pooch_kwargs = {} diff --git a/setup.py b/setup.py index 8b744f5912..0279070a72 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,8 @@ requires = ['numpy >=1.13', 'pillow', 'pyresample >=1.11.0', 'trollsift', 'trollimage >1.10.1', 'pykdtree', 'pyyaml', 'xarray >=0.10.1, !=0.13.0', - 'dask[array] >=0.17.1', 'pyproj', 'zarr', 'donfig', 'appdirs'] + 'dask[array] >=0.17.1', 'pyproj', 'zarr', 'donfig', 'appdirs', + 'pooch'] test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio', 'libtiff', 'rasterio', 'geoviews', 'trollimage', 'fsspec'] From f8064cc5d3a33ae3ec0ef1b8cbfe8581d29c977c Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 4 Feb 2021 13:30:20 -0600 Subject: [PATCH 05/16] Add helper method to CompositeLoader for getting all sensor names --- satpy/_config.py | 5 +++-- satpy/composites/config_loader.py | 21 +++++++++++++++------ satpy/data_download.py | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/satpy/_config.py b/satpy/_config.py index c8df3753bd..49a5064c92 100644 --- a/satpy/_config.py +++ b/satpy/_config.py @@ -116,13 +116,14 @@ def config_search_paths(filename, search_dirs=None, **kwargs): return paths[::-1] -def glob_config(pattern): +def glob_config(pattern, search_dirs=None): """Return glob results for all possible configuration locations. Note: This method does not check the configuration "base" directory if the pattern includes a subdirectory. This is done for performance since this is usually used to find *all* configs for a certain component. """ - patterns = config_search_paths(pattern, check_exists=False) + patterns = config_search_paths(pattern, search_dirs=search_dirs, + check_exists=False) for pattern_fn in patterns: for path in glob.iglob(pattern_fn): yield path diff --git a/satpy/composites/config_loader.py b/satpy/composites/config_loader.py index ab2513a645..a0ae52a722 100644 --- a/satpy/composites/config_loader.py +++ b/satpy/composites/config_loader.py @@ -24,7 +24,8 @@ from yaml import UnsafeLoader from satpy import DatasetDict, DataQuery, DataID -from satpy._config import get_entry_points_config_dirs, config_search_paths +from satpy._config import (get_entry_points_config_dirs, config_search_paths, + glob_config) from satpy.utils import recursive_dict_update from satpy.dataset.dataid import minimal_default_keys_config @@ -175,6 +176,19 @@ def __init__(self): # sensor -> { dict of DataID key information } self._sensor_dataid_keys = {} + @classmethod + def all_composite_sensors(cls): + """Get all sensor names from available composite configs.""" + paths = get_entry_points_config_dirs('satpy.composites') + composite_configs = glob_config( + os.path.join("composites", "*.yaml"), + search_dirs=paths) + yaml_names = set([os.path.splitext(os.path.basename(fn))[0] + for fn in composite_configs]) + non_sensor_yamls = ('visir',) + sensor_names = [x for x in yaml_names if x not in non_sensor_yamls] + return sensor_names + def load_sensor_composites(self, sensor_name): """Load all compositor configs for the provided sensor.""" config_filename = sensor_name + ".yaml" @@ -207,11 +221,6 @@ def get_modifier(self, key, sensor_names): continue raise KeyError("Could not find modifier '{}'".format(key)) - def load_all_sensors(self): - """Load compositors for all sensors.""" - # TODO - pass - def load_compositors(self, sensor_names): """Load all compositor configs for the provided sensors. diff --git a/satpy/data_download.py b/satpy/data_download.py index b99e8306d9..d3f17b28a5 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -144,7 +144,7 @@ def _find_registerable_files_compositors(): """ from satpy.composites.config_loader import CompositorLoader composite_loader = CompositorLoader() - all_sensor_names = ['viirs', 'seviri'] # FIXME: Find a way to actually get these + all_sensor_names = composite_loader.all_composite_sensors() composite_loader.load_compositors(all_sensor_names) # TODO: Add MixIn class that can be used by readers and writers From a1ff9bdbcb49caff727f8efada8a1161d664d889 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 5 Feb 2021 14:08:52 -0600 Subject: [PATCH 06/16] Refactor StaticImageCompositor to use 'url' and allow for local files --- satpy/composites/__init__.py | 55 ++++++++++++++++++++++++------- satpy/etc/composites/visir.yaml | 4 +-- satpy/tests/test_composites.py | 32 +++++++++++++----- satpy/tests/test_data_download.py | 2 +- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 34a36143c6..99c0acb460 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -980,21 +980,34 @@ class StaticImageCompositor(GenericCompositor): Environment variables in the filename are automatically expanded """ - def __init__(self, name, filename=None, known_hash=None, area=None, + def __init__(self, name, filename=None, url=None, known_hash=None, area=None, **kwargs): """Collect custom configuration values. Args: - filename (str): Filename of the image to load, environment - variables are expanded + filename (str): Name to use when storing and referring to the file + in the ``data_dir`` cache. If ``url`` is provided (preferred), + then this is used as the filename in the cache and will be + appended to ``/composites//``. If + ``url`` is provided and ``filename`` is not then the + ``filename`` will be guessed from the ``url``. + If ``url`` is not provided, then it is assumed ``filename`` + refers to a local file with an absolute path. + Environment variables are expanded. + url (str): URL to remote file. When the composite is created the + file will be downloaded and cached in Satpy's ``data_dir``. + Environment variables are expanded. + known_hash (str or None): Hash of the remote file used to verify + a successful download. If not provided then the download will + not be verified. See :func:`satpy.data_download.register_file` + for more information. area (str): Name of area definition for the image. Optional - for images with built-in area definitions (geotiff) + for images with built-in area definitions (geotiff). """ - if filename is None: - raise ValueError("No image configured for static image compositor") - self.file_uri = os.path.expandvars(filename) - self._cache_filename = os.path.basename(self.file_uri) + filename, url = self._get_cache_filename_and_url(filename, url) + self._cache_filename = filename + self._url = url self._known_hash = known_hash self.area = None if area is not None: @@ -1004,20 +1017,40 @@ def __init__(self, name, filename=None, known_hash=None, area=None, super(StaticImageCompositor, self).__init__(name, **kwargs) self._cache_key = self.register_data_files()[0] + @staticmethod + def _get_cache_filename_and_url(filename, url): + if filename is not None: + filename = os.path.expanduser(os.path.expandvars(filename)) + if url is not None: + url = os.path.expandvars(url) + if filename is None: + filename = os.path.basename(url) + if url is None and not os.path.isabs(filename): + raise ValueError("StaticImageCompositor needs a remote 'url' " + "or absolute path to 'filename'.") + return filename, url + def register_data_files(self): """Tell Satpy about files we may want to download.""" + if os.path.isabs(self._cache_filename): + return [None] from satpy.data_download import register_file - cache_key = register_file(self.file_uri, self._cache_filename, + cache_key = register_file(self._url, self._cache_filename, component_type='composites', component_name=self.__class__.__name__, known_hash=self._known_hash) return [cache_key] + def _retrieve_data_file(self): + from satpy.data_download import retrieve + if os.path.isabs(self._cache_filename): + return self._cache_filename + return retrieve(self._cache_key) + def __call__(self, *args, **kwargs): """Call the compositor.""" from satpy import Scene - from satpy.data_download import retrieve - local_file = retrieve(self._cache_key) + local_file = self._retrieve_data_file() scn = Scene(reader='generic_image', filenames=[local_file]) scn.load(['image']) img = scn['image'] diff --git a/satpy/etc/composites/visir.yaml b/satpy/etc/composites/visir.yaml index cd1d1dd3e6..b00cfe8dac 100644 --- a/satpy/etc/composites/visir.yaml +++ b/satpy/etc/composites/visir.yaml @@ -419,11 +419,11 @@ composites: _night_background: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background - filename: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_01deg_geo.tif" + url: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_01deg_geo.tif" known_hash: "sha256:146c116962677ae113d9233374715686737ff97141a77cc5da69a9451315a685" # optional _night_background_hires: compositor: !!python/name:satpy.composites.StaticImageCompositor standard_name: night_background_hires - filename: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_3km_geo.tif" + url: "https://neo.sci.gsfc.nasa.gov/archive/blackmarble/2016/global/BlackMarble_2016_3km_geo.tif" known_hash: "sha256:e915ef2a20d84e2a59e1547d3ad564463ad4bcf22bfa02e0e0b8ed1cd722e9c0" # optional diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 845cd2d6d5..1286fdf66e 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -908,6 +908,7 @@ def test_init(self, get_area_def): def test_call(self, Scene, register, retrieve): # noqa """Test the static compositing.""" from satpy.composites import StaticImageCompositor + remote_tif = "http://example.com/foo.tif" class MockScene(dict): def load(self, arg): @@ -918,12 +919,27 @@ def load(self, arg): scn = MockScene() scn['image'] = img Scene.return_value = scn - register.return_value = "foo.tif" - retrieve.return_value = "foo.tif" - comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") + # absolute path to local file + comp = StaticImageCompositor("name", filename="/foo.tif", area="euro4") + res = comp() + Scene.assert_called_once_with(reader='generic_image', + filenames=['/foo.tif']) + register.assert_not_called + retrieve.assert_not_called + self.assertTrue("start_time" in res.attrs) + self.assertTrue("end_time" in res.attrs) + self.assertIsNone(res.attrs['sensor']) + self.assertTrue('modifiers' not in res.attrs) + self.assertTrue('calibration' not in res.attrs) + + # remote file with local cached version + Scene.reset_mock() + register.return_value = "data_dir/foo.tif" + retrieve.return_value = "data_dir/foo.tif" + comp = StaticImageCompositor("name", url=remote_tif, area="euro4") res = comp() Scene.assert_called_once_with(reader='generic_image', - filenames=[comp.file_uri]) + filenames=['data_dir/foo.tif']) self.assertTrue("start_time" in res.attrs) self.assertTrue("end_time" in res.attrs) self.assertIsNone(res.attrs['sensor']) @@ -932,19 +948,19 @@ def load(self, arg): # Non-georeferenced image, no area given img.attrs.pop('area') - comp = StaticImageCompositor("name", filename="foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif") with self.assertRaises(AttributeError): - res = comp() + comp() # Non-georeferenced image, area given - comp = StaticImageCompositor("name", filename="foo.tif", area='euro4') + comp = StaticImageCompositor("name", filename="/foo.tif", area='euro4') res = comp() self.assertEqual(res.attrs['area'].area_id, 'euro4') # Filename contains environment variable os.environ["TEST_IMAGE_PATH"] = "/path/to/image" comp = StaticImageCompositor("name", filename="${TEST_IMAGE_PATH}/foo.tif", area='euro4') - self.assertEqual(comp.file_uri, "/path/to/image/foo.tif") + self.assertEqual(comp._cache_filename, "/path/to/image/foo.tif") def _enhance2dataset(dataset, convert_p=False): diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index fdc3ffaef9..651c05af2e 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -35,7 +35,7 @@ def _setup_custom_composite_config(base_dir): "composites": { "test_static": { "compositor": StaticImageCompositor, - "filename": README_URL, + "url": README_URL, "known_hash": None, }, }, From 7cc966c13979eb937165ca1cfb06dda1faa85501 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 5 Feb 2021 15:40:17 -0600 Subject: [PATCH 07/16] Add DataDownloadMixin class for readers and writers --- satpy/data_download.py | 128 +++++++++++++++++++++++++++++- satpy/tests/test_data_download.py | 85 +++++++++++++++++++- 2 files changed, 210 insertions(+), 3 deletions(-) diff --git a/satpy/data_download.py b/satpy/data_download.py index d3f17b28a5..1510f1fec1 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -21,6 +21,7 @@ """ +import os import logging import satpy @@ -132,7 +133,8 @@ def retrieve_all(pooch_kwargs=None): def find_registerable_files(): """Load all Satpy components so they can be downloaded.""" _find_registerable_files_compositors() - # TODO: Readers, writers + _find_registerable_files_readers() + _find_registerable_files_writers() return sorted(_FILE_REGISTRY.keys()) @@ -147,4 +149,126 @@ def _find_registerable_files_compositors(): all_sensor_names = composite_loader.all_composite_sensors() composite_loader.load_compositors(all_sensor_names) -# TODO: Add MixIn class that can be used by readers and writers + +def _find_registerable_files_readers(): + """Load all readers so that files are registered.""" + import yaml + from satpy.readers import configs_for_reader, load_reader + for reader_configs in configs_for_reader(): + try: + load_reader(reader_configs) + except (ModuleNotFoundError, yaml.YAMLError): + continue + + +def _find_registerable_files_writers(): + """Load all writers so that files are registered.""" + from satpy.writers import configs_for_writer, load_writer_configs + for writer_configs in configs_for_writer(): + load_writer_configs(writer_configs) + + +class DataDownloadMixin: + """Mixin class for Satpy components to download files. + + This class simplifies the logic needed to download and cache data files + needed for operations in a Satpy component (readers, writers, etc). It + does this in a two step process where files that might be downloaded are + "registered" and then "retrieved" when they need to be used. + + To use this class include it as one of the subclasses of your reader, + writer, or other Satpy component. Then in the ``__init__`` method, + initialize the data file info storage and call the ``register_data_files`` + function during initialization:: + + from satpy.readers.yaml_reader import FileYAMLReader + from satpy.data_download import DataDownloadMixin + + class MyReader(FileYAMLReader, DataDownloadMixin): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.data_files = [] + self.register_data_files() + + This class expects data files to be configured in either a + ``self.info['data_files']`` (standard for readers/writers) or + ``self.config['data_files']`` list. The ``data_files`` item + itself is a list of dictionaries. This information can also be + passed directly to ``register_data_files`` for more complex cases. + In YAML, for a reader, this might look like this:: + + reader: + name: abi_l1b + short_name: ABI L1b + long_name: GOES-R ABI Level 1b + ... other metadata ... + data_files: + - url: "https://example.com/my_data_file.dat" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/RELEASING.md" + filename: "satpy_releasing.md" + known_hash: null + + In this example we register two files that might be downloaded. + If ``known_hash`` is not provided or None (null in YAML) then the data + file will not be checked for validity when downloaded. See + :func:`~satpy.data_download.register_file` for more information. You can + optionally specify ``filename`` to define the in-cache name when this file + is downloaded. This can be useful in cases when the filename can not be + easily determined from the URL. + + When it comes time to needing the file, you can retrieve the local path + by calling ``~satpy.data_download.retrieve(cache_key)`` with the + "cache key" generated during registration. These keys will be in the + format: ``//``. For a + reader with a class named ``MySensorReader`` this would be + ``readers/MySensorReader/satpy_release.md``. See + :meth:`~satpy.data_download.DataDownloadMixin.retrieve_data_file`. + + This Mixin is not the only way to register and download files for a + Satpy component, but is the most generic and flexible. Feel free to + use the :func:`~satpy.data_download.register_file` and + :func:`~satpy.data_download.retrieve` functions directly. + However, :meth:`~satpy.data_download.find_registerable_files` must also + be updated to support your component (if files are not register during + initialization). + + """ + + DATA_FILE_COMPONENTS = { + 'reader': 'readers', + 'writer': 'writers', + } + + @property + def _data_file_component_type(self): + cls_name = self.__class__.__name__.lower() + for cls_name_sub, comp_type in self.DATA_FILE_COMPONENTS.items(): + if cls_name_sub in cls_name: + return comp_type + return 'other' + + def register_data_files(self, data_files=None): + """Register a series of files that may be downloaded later. + + See :class:`~satpy.data_download.DataDownloadMixin` for more + information on the assumptions and structure of the data file + configuration dictionary. + + """ + comp_type = self._data_file_component_type + if data_files is None: + df_parent = getattr(self, 'info', self.config) + data_files = df_parent.get('data_files', []) + for data_file_entry in data_files: + cache_key = self._register_data_file(data_file_entry, comp_type) + self.data_files.append(cache_key) + + def _register_data_file(self, data_file_entry, comp_type): + url = data_file_entry['url'] + filename = data_file_entry.get('filename', os.path.basename(url)) + known_hash = data_file_entry.get('known_hash') + return register_file(url, filename, component_type=comp_type, + component_name=self.__class__.__name__, + known_hash=known_hash) diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index 651c05af2e..7a51e97334 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -18,6 +18,11 @@ """Test for ancillary data downloading.""" from unittest import mock + +from satpy.readers.yaml_reader import FileYAMLReader +from satpy.writers import Writer +from satpy.data_download import DataDownloadMixin + import pytest import yaml @@ -42,9 +47,83 @@ def _setup_custom_composite_config(base_dir): }, comp_file) +class FakeMixedReader(FileYAMLReader, DataDownloadMixin): + """Fake reader that uses the data download mixin.""" + + def __init__(self, *args, **kwargs): + """Initialize reader and data downloading.""" + super().__init__(*args, **kwargs) + self.data_files = [] + self.register_data_files() + + @property + def start_time(self): + """Start time of the reader.""" + return None + + @property + def end_time(self): + """End time of the reader.""" + return None + + def filter_selected_filenames(self, filenames): + """Filter provided filenames by parameters in reader configuration.""" + return filenames + + def load(self, dataset_keys): + """Load some data.""" + return {} + + +def _setup_custom_reader_config(base_dir): + reader_config = base_dir.mkdir("readers").join("fake.yaml") + with open(reader_config, 'wt') as comp_file: + # abstract base classes can't be converted so we do raw string + comp_file.write(""" +reader: + name: "fake" + reader: !!python/name:satpy.tests.test_data_download.FakeMixedReader + data_files: + - url: {} + known_hash: null + - url: {} + filename: "README2.rst" + known_hash: null +file_types: {{}} +""".format(README_URL, README_URL)) + + +class FakeMixedWriter(Writer, DataDownloadMixin): + """Fake reader that uses the data download mixin.""" + + def __init__(self, *args, **kwargs): + """Initialize writer and data downloading.""" + super().__init__(*args, **kwargs) + self.data_files = [] + self.register_data_files() + + +def _setup_custom_writer_config(base_dir): + writer_config = base_dir.mkdir("writers").join("fake.yaml") + with open(writer_config, 'wt') as comp_file: + # abstract base classes can't be converted so we do raw string + comp_file.write(""" +writer: + name: "fake" + writer: !!python/name:satpy.tests.test_data_download.FakeMixedWriter + data_files: + - url: {} + known_hash: null + - url: {} + filename: "README2.rst" + known_hash: null +""".format(README_URL, README_URL)) + + def _setup_custom_configs(base_dir): - # TODO: Readers and Writers _setup_custom_composite_config(base_dir) + _setup_custom_reader_config(base_dir) + _setup_custom_writer_config(base_dir) class TestDataDownload: @@ -60,6 +139,10 @@ def test_find_registerable(self, tmpdir): mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): found_files = find_registerable_files() assert 'composites/StaticImageCompositor/README.rst' in found_files + assert 'readers/FakeMixedReader/README.rst' in found_files + assert 'readers/FakeMixedReader/README2.rst' in found_files + assert 'writers/FakeMixedWriter/README.rst' in found_files + assert 'writers/FakeMixedWriter/README2.rst' in found_files def test_retrieve(self, tmpdir): """Test retrieving a single file.""" From 97982064c7b3b218bf7476a6c663213ad1ad445d Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 5 Feb 2021 16:03:20 -0600 Subject: [PATCH 08/16] Fix various style issues in composite tests and fix static image tests --- satpy/composites/__init__.py | 2 +- satpy/tests/test_composites.py | 66 ++++++++++++++++++---------------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 99c0acb460..9b04db3986 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -1025,7 +1025,7 @@ def _get_cache_filename_and_url(filename, url): url = os.path.expandvars(url) if filename is None: filename = os.path.basename(url) - if url is None and not os.path.isabs(filename): + if url is None and (filename is None or not os.path.isabs(filename)): raise ValueError("StaticImageCompositor needs a remote 'url' " "or absolute path to 'filename'.") return filename, url diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 1286fdf66e..3de80935bd 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -721,13 +721,13 @@ def test_concat_datasets(self): num_bands = len(res.bands) self.assertEqual(num_bands, 1) self.assertEqual(res.shape[0], num_bands) - self.assertTrue(res.bands[0] == 'L') + self.assertEqual(res.bands[0], 'L') res = self.comp._concat_datasets([self.all_valid, self.all_valid], 'LA') num_bands = len(res.bands) self.assertEqual(num_bands, 2) self.assertEqual(res.shape[0], num_bands) - self.assertTrue(res.bands[0] == 'L') - self.assertTrue(res.bands[1] == 'A') + self.assertEqual(res.bands[0], 'L') + self.assertEqual(res.bands[1], 'A') self.assertRaises(IncompatibleAreas, self.comp._concat_datasets, [self.all_valid, self.wrong_shape], 'LA') @@ -742,10 +742,10 @@ def test_get_sensors(self): dset2 = self.first_invalid dset2.attrs['sensor'] = 'bar' res = self.comp._get_sensors([dset1, dset2]) - self.assertTrue('foo' in res) - self.assertTrue('bar' in res) + self.assertIn('foo', res) + self.assertIn('bar', res) self.assertEqual(len(res), 2) - self.assertTrue(isinstance(res, set)) + self.assertIsInstance(res, set) @mock.patch('satpy.composites.GenericCompositor._get_sensors') @mock.patch('satpy.composites.combine_metadata') @@ -790,11 +790,11 @@ def test_call(self): res = self.comp([self.all_valid, self.first_invalid], **attrs) # Verify attributes self.assertEqual(res.attrs.get('sensor'), 'foo') - self.assertTrue('foo' in res.attrs) + self.assertIn('foo', res.attrs) self.assertEqual(res.attrs.get('foo'), 'bar') - self.assertTrue('units' not in res.attrs) - self.assertTrue('calibration' not in res.attrs) - self.assertTrue('modifiers' not in res.attrs) + self.assertNotIn('units', res.attrs) + self.assertNotIn('calibration', res.attrs) + self.assertNotIn('modifiers', res.attrs) self.assertIsNone(res.attrs['wavelength']) self.assertEqual(res.attrs['mode'], 'LA') self.assertEqual(res.attrs['resolution'], 333) @@ -888,17 +888,21 @@ def test_init(self, get_area_def): # No filename given raises ValueError with self.assertRaises(ValueError): - comp = StaticImageCompositor("name") + StaticImageCompositor("name") + + # No absolute filename and no URL + with self.assertRaises(ValueError): + StaticImageCompositor("name", filename="foo.tif") # No area defined - comp = StaticImageCompositor("name", filename="foo.tif") - self.assertEqual(comp.file_uri, "foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif") + self.assertEqual(comp._cache_filename, "/foo.tif") self.assertIsNone(comp.area) # Area defined get_area_def.return_value = "bar" - comp = StaticImageCompositor("name", filename="foo.tif", area="euro4") - self.assertEqual(comp.file_uri, "foo.tif") + comp = StaticImageCompositor("name", filename="/foo.tif", area="euro4") + self.assertEqual(comp._cache_filename, "/foo.tif") self.assertEqual(comp.area, "bar") get_area_def.assert_called_once_with("euro4") @@ -924,13 +928,13 @@ def load(self, arg): res = comp() Scene.assert_called_once_with(reader='generic_image', filenames=['/foo.tif']) - register.assert_not_called - retrieve.assert_not_called - self.assertTrue("start_time" in res.attrs) - self.assertTrue("end_time" in res.attrs) + register.assert_not_called() + retrieve.assert_not_called() + self.assertIn("start_time", res.attrs) + self.assertIn("end_time", res.attrs) self.assertIsNone(res.attrs['sensor']) - self.assertTrue('modifiers' not in res.attrs) - self.assertTrue('calibration' not in res.attrs) + self.assertNotIn('modifiers', res.attrs) + self.assertNotIn('calibration', res.attrs) # remote file with local cached version Scene.reset_mock() @@ -940,11 +944,11 @@ def load(self, arg): res = comp() Scene.assert_called_once_with(reader='generic_image', filenames=['data_dir/foo.tif']) - self.assertTrue("start_time" in res.attrs) - self.assertTrue("end_time" in res.attrs) + self.assertIn("start_time", res.attrs) + self.assertIn("end_time", res.attrs) self.assertIsNone(res.attrs['sensor']) - self.assertTrue('modifiers' not in res.attrs) - self.assertTrue('calibration' not in res.attrs) + self.assertNotIn('modifiers', res.attrs) + self.assertNotIn('calibration', res.attrs) # Non-georeferenced image, no area given img.attrs.pop('area') @@ -1178,7 +1182,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v1) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1186,7 +1190,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1195,7 +1199,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha) @@ -1205,7 +1209,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v3) res = comp([data, ct_data_v3]) - self.assertTrue(res.mode == 'LA') + self.assertEqual(res.mode, 'LA') np.testing.assert_allclose(res.sel(bands='L'), reference_data_v3) np.testing.assert_allclose(res.sel(bands='A'), reference_alpha_v3) @@ -1230,7 +1234,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v1) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'RGBA') + self.assertEqual(res.mode, 'RGBA') np.testing.assert_allclose(res.sel(bands='R'), data.sel(bands='R').where(ct_data > 1)) np.testing.assert_allclose(res.sel(bands='G'), @@ -1249,7 +1253,7 @@ def test_call(self): with dask.config.set(scheduler=CustomScheduler(max_computes=0)): comp = MaskingCompositor("name", conditions=conditions_v2) res = comp([data, ct_data]) - self.assertTrue(res.mode == 'RGBA') + self.assertEqual(res.mode, 'RGBA') np.testing.assert_allclose(res.sel(bands='R'), data.sel(bands='R').where(ct_data > 1)) np.testing.assert_allclose(res.sel(bands='G'), From 3dd789ecde6392b9b122400b3ebe4e2f73bbc54d Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 5 Feb 2021 16:17:37 -0600 Subject: [PATCH 09/16] Fix data download find registerables when writer can't be loaded --- satpy/data_download.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/satpy/data_download.py b/satpy/data_download.py index 1510f1fec1..33eaebb57b 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -165,7 +165,10 @@ def _find_registerable_files_writers(): """Load all writers so that files are registered.""" from satpy.writers import configs_for_writer, load_writer_configs for writer_configs in configs_for_writer(): - load_writer_configs(writer_configs) + try: + load_writer_configs(writer_configs) + except ValueError: + continue class DataDownloadMixin: From 0ae8f1fe304fae6f6d384641da3af10541bb9cf4 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Tue, 9 Feb 2021 19:57:10 -0600 Subject: [PATCH 10/16] Add DataDownloadMixin for default base reader and writer classes --- satpy/composites/__init__.py | 19 +++++----- satpy/data_download.py | 57 ++++++++++++++--------------- satpy/readers/yaml_reader.py | 4 ++- satpy/tests/test_data_download.py | 60 +++++-------------------------- satpy/writers/__init__.py | 4 ++- 5 files changed, 51 insertions(+), 93 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 9b04db3986..0f24fbab4a 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -27,6 +27,7 @@ from satpy.dataset import DataID, combine_metadata from satpy.dataset.dataid import minimal_default_keys_config +from satpy.data_download import DataDownloadMixin from satpy.writers import get_enhanced_image @@ -970,7 +971,7 @@ def __call__(self, projectables, *args, **kwargs): *args, **kwargs) -class StaticImageCompositor(GenericCompositor): +class StaticImageCompositor(GenericCompositor, DataDownloadMixin): """A compositor that loads a static image from disk. If the filename passed to this compositor is not valid then @@ -1015,7 +1016,8 @@ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) - self._cache_key = self.register_data_files()[0] + cache_keys = self.register_data_files([]) + self._cache_key = cache_keys[0] @staticmethod def _get_cache_filename_and_url(filename, url): @@ -1030,16 +1032,15 @@ def _get_cache_filename_and_url(filename, url): "or absolute path to 'filename'.") return filename, url - def register_data_files(self): + def register_data_files(self, data_files): """Tell Satpy about files we may want to download.""" if os.path.isabs(self._cache_filename): return [None] - from satpy.data_download import register_file - cache_key = register_file(self._url, self._cache_filename, - component_type='composites', - component_name=self.__class__.__name__, - known_hash=self._known_hash) - return [cache_key] + return super().register_data_files([{ + 'url': self._url, + 'known_hash': self._known_hash, + 'filename': self._cache_filename, + }]) def _retrieve_data_file(self): from satpy.data_download import retrieve diff --git a/satpy/data_download.py b/satpy/data_download.py index 33eaebb57b..b88f61af67 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -15,11 +15,7 @@ # # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -"""Functions and utilities for downloading ancillary data. - -TODO: Put examples here or on a new sphinx page? - -""" +"""Functions and utilities for downloading ancillary data.""" import os import logging @@ -33,7 +29,7 @@ _FILE_URLS = {} -def register_file(url, filename, component_type=None, component_name=None, known_hash=None): +def register_file(url, filename, component_type=None, known_hash=None): """Register file for future retrieval. This function only prepares Satpy to be able to download and cache the @@ -48,23 +44,18 @@ def register_file(url, filename, component_type=None, component_name=None, known will use this file. Typically "readers", "composites", "writers", or "enhancements" for consistency. This will be prepended to the filename when storing the data in the cache. - component_name (str or None): Name of the Satpy component that will - use this file. In most cases this will be the name of the Python - class instead of the name of the instance - (ex. StaticImageCompositor versus '_night_background'). This will be - prepended to the filename when storing the data in the cache. known_hash (str): Hash used to verify the file is downloaded correctly. See https://www.fatiando.org/pooch/v1.3.0/beginner.html#hashes for more information. If not provided then the file is not checked. Returns: Cache key that can be used to retrieve the file later. The cache key - consists of the ``component_type``, ``component_name``, and provided - ``filename``. This should be passed to - :func:`satpy.data_download_retrieve` when the file will be used. + consists of the ``component_type`` and provided ``filename``. This + should be passed to :func:`satpy.data_download_retrieve` when the + file will be used. """ - fname = _generate_filename(filename, component_type, component_name) + fname = _generate_filename(filename, component_type) global _FILE_REGISTRY global _FILE_URLS @@ -73,12 +64,10 @@ class instead of the name of the instance return fname -def _generate_filename(filename, component_type, component_name): +def _generate_filename(filename, component_type): if filename is None: return None path = filename - if component_name: - path = '/'.join([component_name, path]) if component_type: path = '/'.join([component_type, path]) return path @@ -179,18 +168,24 @@ class DataDownloadMixin: does this in a two step process where files that might be downloaded are "registered" and then "retrieved" when they need to be used. - To use this class include it as one of the subclasses of your reader, - writer, or other Satpy component. Then in the ``__init__`` method, - initialize the data file info storage and call the ``register_data_files`` - function during initialization:: + To use this class include it as one of the subclasses of your Satpy + component. Then in the ``__init__`` method, call the + ``register_data_files`` function during initialization. + + .. note:: + + This class is already included in the ``FileYAMLReader`` and + ``Writer`` base classes. There is no need to define a custom + class. + + The below code is shown as an example:: - from satpy.readers.yaml_reader import FileYAMLReader + from satpy.readers.yaml_reader import AbstractYAMLReader from satpy.data_download import DataDownloadMixin - class MyReader(FileYAMLReader, DataDownloadMixin): + class MyReader(AbstractYAMLReader, DataDownloadMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.data_files = [] self.register_data_files() This class expects data files to be configured in either a @@ -224,10 +219,8 @@ def __init__(self, *args, **kwargs): When it comes time to needing the file, you can retrieve the local path by calling ``~satpy.data_download.retrieve(cache_key)`` with the "cache key" generated during registration. These keys will be in the - format: ``//``. For a - reader with a class named ``MySensorReader`` this would be - ``readers/MySensorReader/satpy_release.md``. See - :meth:`~satpy.data_download.DataDownloadMixin.retrieve_data_file`. + format: ``/``. For a + reader this would be ``readers/satpy_release.md``. This Mixin is not the only way to register and download files for a Satpy component, but is the most generic and flexible. Feel free to @@ -242,6 +235,7 @@ def __init__(self, *args, **kwargs): DATA_FILE_COMPONENTS = { 'reader': 'readers', 'writer': 'writers', + 'composit': 'composites', } @property @@ -264,14 +258,15 @@ def register_data_files(self, data_files=None): if data_files is None: df_parent = getattr(self, 'info', self.config) data_files = df_parent.get('data_files', []) + cache_keys = [] for data_file_entry in data_files: cache_key = self._register_data_file(data_file_entry, comp_type) - self.data_files.append(cache_key) + cache_keys.append(cache_key) + return cache_keys def _register_data_file(self, data_file_entry, comp_type): url = data_file_entry['url'] filename = data_file_entry.get('filename', os.path.basename(url)) known_hash = data_file_entry.get('known_hash') return register_file(url, filename, component_type=comp_type, - component_name=self.__class__.__name__, known_hash=known_hash) diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index 49e29db15e..4092877d17 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -42,6 +42,7 @@ from satpy.utils import recursive_dict_update from satpy.dataset import DataQuery, DataID, get_key from satpy.dataset.dataid import get_keys_from_config, default_id_keys_config, default_co_keys_config +from satpy.data_download import DataDownloadMixin from satpy import DatasetDict from satpy.resample import add_crs_xy_coords from trollsift.parser import globify, parse @@ -329,7 +330,7 @@ def load_ds_ids_from_config(self): return ids -class FileYAMLReader(AbstractYAMLReader): +class FileYAMLReader(AbstractYAMLReader, DataDownloadMixin): """Primary reader base class that is configured by a YAML file. This class uses the idea of per-file "file handler" objects to read file @@ -354,6 +355,7 @@ def __init__(self, self.filter_filenames = self.info.get('filter_filenames', filter_filenames) self.filter_parameters = filter_parameters or {} self.coords_cache = WeakValueDictionary() + self.register_data_files() @property def sensor_names(self): diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index 7a51e97334..a2eb7f8850 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -19,10 +19,6 @@ from unittest import mock -from satpy.readers.yaml_reader import FileYAMLReader -from satpy.writers import Writer -from satpy.data_download import DataDownloadMixin - import pytest import yaml @@ -47,34 +43,6 @@ def _setup_custom_composite_config(base_dir): }, comp_file) -class FakeMixedReader(FileYAMLReader, DataDownloadMixin): - """Fake reader that uses the data download mixin.""" - - def __init__(self, *args, **kwargs): - """Initialize reader and data downloading.""" - super().__init__(*args, **kwargs) - self.data_files = [] - self.register_data_files() - - @property - def start_time(self): - """Start time of the reader.""" - return None - - @property - def end_time(self): - """End time of the reader.""" - return None - - def filter_selected_filenames(self, filenames): - """Filter provided filenames by parameters in reader configuration.""" - return filenames - - def load(self, dataset_keys): - """Load some data.""" - return {} - - def _setup_custom_reader_config(base_dir): reader_config = base_dir.mkdir("readers").join("fake.yaml") with open(reader_config, 'wt') as comp_file: @@ -82,7 +50,7 @@ def _setup_custom_reader_config(base_dir): comp_file.write(""" reader: name: "fake" - reader: !!python/name:satpy.tests.test_data_download.FakeMixedReader + reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader data_files: - url: {} known_hash: null @@ -93,16 +61,6 @@ def _setup_custom_reader_config(base_dir): """.format(README_URL, README_URL)) -class FakeMixedWriter(Writer, DataDownloadMixin): - """Fake reader that uses the data download mixin.""" - - def __init__(self, *args, **kwargs): - """Initialize writer and data downloading.""" - super().__init__(*args, **kwargs) - self.data_files = [] - self.register_data_files() - - def _setup_custom_writer_config(base_dir): writer_config = base_dir.mkdir("writers").join("fake.yaml") with open(writer_config, 'wt') as comp_file: @@ -110,7 +68,7 @@ def _setup_custom_writer_config(base_dir): comp_file.write(""" writer: name: "fake" - writer: !!python/name:satpy.tests.test_data_download.FakeMixedWriter + writer: !!python/name:satpy.writers.Writer data_files: - url: {} known_hash: null @@ -138,11 +96,11 @@ def test_find_registerable(self, tmpdir): with satpy.config.set(config_path=[tmpdir]), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): found_files = find_registerable_files() - assert 'composites/StaticImageCompositor/README.rst' in found_files - assert 'readers/FakeMixedReader/README.rst' in found_files - assert 'readers/FakeMixedReader/README2.rst' in found_files - assert 'writers/FakeMixedWriter/README.rst' in found_files - assert 'writers/FakeMixedWriter/README2.rst' in found_files + assert 'composites/README.rst' in found_files + assert 'readers/README.rst' in found_files + assert 'readers/README2.rst' in found_files + assert 'writers/README.rst' in found_files + assert 'writers/README2.rst' in found_files def test_retrieve(self, tmpdir): """Test retrieving a single file.""" @@ -152,7 +110,7 @@ def test_retrieve(self, tmpdir): file_registry = {} with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir)), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): - comp_file = 'composites/StaticImageCompositor/README.rst' + comp_file = 'composites/README.rst' found_files = find_registerable_files() assert comp_file in found_files assert not tmpdir.join(comp_file).exists() @@ -170,7 +128,7 @@ def test_retrieve_all(self, tmpdir): mock.patch('satpy.data_download._FILE_REGISTRY', file_registry), \ mock.patch('satpy.data_download._FILE_URLS', file_urls), \ mock.patch('satpy.data_download.find_registerable_files'): - comp_file = 'composites/StaticImageCompositor/README.rst' + comp_file = 'composites/README.rst' file_registry[comp_file] = None file_urls[comp_file] = README_URL assert not tmpdir.join(comp_file).exists() diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index 8e0472c74b..83fc2874cf 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -39,6 +39,7 @@ from satpy import CHUNK_SIZE from satpy.plugin_base import Plugin from satpy.resample import get_area_def +from satpy.data_download import DataDownloadMixin from trollsift import parser @@ -543,7 +544,7 @@ def compute_writer_results(results): target.close() -class Writer(Plugin): +class Writer(Plugin, DataDownloadMixin): """Base Writer class for all other writers. A minimal writer subclass should implement the `save_dataset` method. @@ -595,6 +596,7 @@ def __init__(self, name=None, filename=None, base_dir=None, **kwargs): raise ValueError("Writer 'name' not provided") self.filename_parser = self.create_filename_parser(base_dir) + self.register_data_files() @classmethod def separate_init_kwargs(cls, kwargs): From f4137dcfcdccc718a8046f672746b0b43c85bde5 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Tue, 9 Feb 2021 21:14:54 -0600 Subject: [PATCH 11/16] Add auxiliary data download documentation --- doc/source/dev_guide/aux_data.rst | 109 +++++++++++++++++++++++++ doc/source/dev_guide/custom_reader.rst | 10 ++- doc/source/dev_guide/index.rst | 1 + satpy/composites/__init__.py | 5 +- 4 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 doc/source/dev_guide/aux_data.rst diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst new file mode 100644 index 0000000000..5c6c51f120 --- /dev/null +++ b/doc/source/dev_guide/aux_data.rst @@ -0,0 +1,109 @@ +Auxiliary Data Download +======================= + +Sometimes Satpy components need some extra data files to get their work +done properly. These include files like Look Up Tables (LUTs), coefficients, +or Earth model data (ex. elevations). This includes any file that would be too +large to be included in the Satpy python package; anything bigger than a small +text file. To help with this, Satpy includes utilities for downloading and +caching these files only when your component is used. This saves the user from +wasting time and disk space downloading files they may never use. +This functionality is made possible thanks to the +`Pooch library `_. + +Downloaded files are stored in the directory configured by +:ref:`data_dir_setting`. + +Adding download functionality +----------------------------- + +The utility functions for data downloading include a two step process: + +1. **Registering**: Tell Satpy what files might need to be downloaded and used + later. +2. **Retrieving**: Ask Satpy to download and store the files locally. + +Registering +^^^^^^^^^^^ + +Registering a file for downloading tells Satpy the remote URL for the file, +and an optional hash. The hash is used to verify a successful download. +Registering can also include a ``filename`` to tell Satpy what to name the +file when it is downloaded. If not provided it will be determined from the URL. +Once registered, Satpy can be told to retrieve the file (see below) by using a +"cache key". Cache keys follow the general scheme of +``/`` (ex. ``readers/README.rst``). + +Satpy includes a low-level function and a high-level Mixin class for +registering files. The higher level class is recommended for any Satpy +component like readers, writers, and compositors. The lower-level +:func:`~satpy.data_download.register_file` function can be used for any other +use case. + +The :class:`~satpy.data_download.DataMixIn` class is automatically included +in the :class:`~satpy.readers.yaml_reader.FileYAMLReader` and +:class:`~satpy.writers.Writer` base classes. For any other component (like +a compositor) you should include it as another parent class: + +.. code-block:: python + + from satpy.data_download import DataDownloadMixin + from satpy.composites import GenericCompositor + + class MyCompositor(GenericCompositor, DataDownloadMixin): + """Compositor that uses downloaded files.""" + + def __init__(self, name, url=None, known_hash=None, **kwargs): + super().__init__(name, **kwargs) + data_files = [{'url': url, 'known_hash': known_hash}] + self.register_data_files(data_files) + +However your code registers files, to be consistent it must do it during +initialization so that the :func:`~satpy.data_download.find_registerable_files`. +If your component isn't a reader, writer, or compositor then this function +will need to be updated to find and load your registered files. See +:ref:`offline_aux_downloads` below for more information. + +As mentioned, the mixin class is included in the base reader and writer class. +To register files in these cases, include a ``data_files`` section in your +YAML configuration file. For readers this would go under the ``reader`` +section and for writers the ``writer`` section. This parameter is a list +of dictionaries including a ``url``, ``known_hash``, and optional +``filename``. For example:: + + reader: + name: abi_l1b + short_name: ABI L1b + long_name: GOES-R ABI Level 1b + ... other metadata ... + data_files: + - url: "https://example.com/my_data_file.dat" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/README.rst" + known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" + - url: "https://raw.githubusercontent.com/pytroll/satpy/master/RELEASING.md" + filename: "satpy_releasing.md" + known_hash: null + +See the :class:`~satpy.data_download.DataDownloadMixin` for more information. + +Retrieving +^^^^^^^^^^ + +Files that have been registered (see above) can be retrieved by calling the +:func:`~satpy.data_download.retrieve` function. This function expects a single +argument: the cache key. Cache keys are returned by registering functions, but +can also be pre-determined by following the scheme +``/`` (ex. ``readers/README.rst``). +Retrieving a file will download it to local disk if needed and then return +the local pathname. Data is stored locally in the :ref:`data_dir_setting`. +It is up to the caller to then open the file. + +.. _offline_aux_downloads: + +Offline Downloads +----------------- + +To assist with operational environments, Satpy includes a +:func:`~satpy.data_download.retrieve_all` function that will try to find all +files that Satpy components may need to download in the future and download +them to the current directory specified by :ref:`data_dir_setting`. diff --git a/doc/source/dev_guide/custom_reader.rst b/doc/source/dev_guide/custom_reader.rst index 5eb4f68831..86c7be99aa 100644 --- a/doc/source/dev_guide/custom_reader.rst +++ b/doc/source/dev_guide/custom_reader.rst @@ -571,4 +571,12 @@ One way of implementing a file handler is shown below: # left as an exercise to the reader :) If you have any questions, please contact the -:ref:`Satpy developers `. \ No newline at end of file +:ref:`Satpy developers `. + +Auxiliary File Download +----------------------- + +If your reader needs additional data files to do calibrations, corrections, +or anything else see the :doc:`aux_data` document for more information on +how to download and cache these files without including them in the Satpy +python package. \ No newline at end of file diff --git a/doc/source/dev_guide/index.rst b/doc/source/dev_guide/index.rst index f32c320ce1..461a9adb65 100644 --- a/doc/source/dev_guide/index.rst +++ b/doc/source/dev_guide/index.rst @@ -16,6 +16,7 @@ at the pages listed below. custom_reader plugins satpy_internals + aux_data Coding guidelines ================= diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 0f24fbab4a..8ff89db1bd 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -974,11 +974,8 @@ def __call__(self, projectables, *args, **kwargs): class StaticImageCompositor(GenericCompositor, DataDownloadMixin): """A compositor that loads a static image from disk. - If the filename passed to this compositor is not valid then - the SATPY_ANCPATH environment variable will be checked to see - if the image is located there + Environment variables in the filename are automatically expanded. - Environment variables in the filename are automatically expanded """ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, From 1836d38776401710b4740aa24e314ab3b6d9278e Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 10 Feb 2021 09:58:53 -0600 Subject: [PATCH 12/16] Add 'satpy_retrieve_all' console script for easier offline downloading --- doc/source/dev_guide/aux_data.rst | 10 ++++ satpy/data_download.py | 98 ++++++++++++++++++++++++++----- satpy/tests/test_data_download.py | 65 +++++++++++++++++--- setup.py | 8 +++ 4 files changed, 159 insertions(+), 22 deletions(-) diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst index 5c6c51f120..24efedc37a 100644 --- a/doc/source/dev_guide/aux_data.rst +++ b/doc/source/dev_guide/aux_data.rst @@ -107,3 +107,13 @@ To assist with operational environments, Satpy includes a :func:`~satpy.data_download.retrieve_all` function that will try to find all files that Satpy components may need to download in the future and download them to the current directory specified by :ref:`data_dir_setting`. +This function allows you to specify a list of ``readers``, ``writers``, or +``composite_sensors`` to limit what components are checked for files to +download. + +The ``retrieve_all`` function is also available through a command line script +called ``satpy_retrieve_all``. Run the following for usage information. + +.. code-block:: bash + + satpy_retrieve_all --help diff --git a/satpy/data_download.py b/satpy/data_download.py index b88f61af67..82e55fbac8 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -99,17 +99,34 @@ def retrieve(cache_key, pooch_kwargs=None): return pooch_obj.fetch(cache_key, **pooch_kwargs) -def retrieve_all(pooch_kwargs=None): +def retrieve_all(readers=None, writers=None, composite_sensors=None, + pooch_kwargs=None): """Find cache-able data files for Satpy and download them. The typical use case for this function is to download all ancillary files before going to an environment/system that does not have internet access. + Args: + readers (list or None): Limit searching to these readers. If not + specified or ``None`` then all readers are searched. If an + empty list then no readers are searched. + writers (list or None): Limit searching to these writers. If not + specified or ``None`` then all writers are searched. If an + empty list then no writers are searched. + composite_sensors (list or None): Limit searching to composite + configuration files for these sensors. If ``None`` then all sensor + configs will be searched. If an empty list then no composites + will be searched. + pooch_kwargs (dict): Additional keyword arguments to pass to pooch + ``fetch``. + """ if pooch_kwargs is None: pooch_kwargs = {} - find_registerable_files() + find_registerable_files(readers=readers, + writers=writers, + composite_sensors=composite_sensors) path = satpy.config.get('data_dir') pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) @@ -119,15 +136,30 @@ def retrieve_all(pooch_kwargs=None): logger.info("Done downloading all extra files.") -def find_registerable_files(): - """Load all Satpy components so they can be downloaded.""" - _find_registerable_files_compositors() - _find_registerable_files_readers() - _find_registerable_files_writers() +def find_registerable_files(readers=None, writers=None, + composite_sensors=None): + """Load all Satpy components so they can be downloaded. + + Args: + readers (list or None): Limit searching to these readers. If not + specified or ``None`` then all readers are searched. If an + empty list then no readers are searched. + writers (list or None): Limit searching to these writers. If not + specified or ``None`` then all writers are searched. If an + empty list then no writers are searched. + composite_sensors (list or None): Limit searching to composite + configuration files for these sensors. If ``None`` then all sensor + configs will be searched. If an empty list then no composites + will be searched. + + """ + _find_registerable_files_compositors(composite_sensors) + _find_registerable_files_readers(readers) + _find_registerable_files_writers(writers) return sorted(_FILE_REGISTRY.keys()) -def _find_registerable_files_compositors(): +def _find_registerable_files_compositors(sensors=None): """Load all compositor configs so that files are registered. Compositor objects should register files when they are initialized. @@ -135,25 +167,27 @@ def _find_registerable_files_compositors(): """ from satpy.composites.config_loader import CompositorLoader composite_loader = CompositorLoader() - all_sensor_names = composite_loader.all_composite_sensors() - composite_loader.load_compositors(all_sensor_names) + if sensors is None: + sensors = composite_loader.all_composite_sensors() + if sensors: + composite_loader.load_compositors(sensors) -def _find_registerable_files_readers(): +def _find_registerable_files_readers(readers=None): """Load all readers so that files are registered.""" import yaml from satpy.readers import configs_for_reader, load_reader - for reader_configs in configs_for_reader(): + for reader_configs in configs_for_reader(reader=readers): try: load_reader(reader_configs) except (ModuleNotFoundError, yaml.YAMLError): continue -def _find_registerable_files_writers(): +def _find_registerable_files_writers(writers=None): """Load all writers so that files are registered.""" from satpy.writers import configs_for_writer, load_writer_configs - for writer_configs in configs_for_writer(): + for writer_configs in configs_for_writer(writer=writers): try: load_writer_configs(writer_configs) except ValueError: @@ -264,9 +298,43 @@ def register_data_files(self, data_files=None): cache_keys.append(cache_key) return cache_keys - def _register_data_file(self, data_file_entry, comp_type): + @staticmethod + def _register_data_file(data_file_entry, comp_type): url = data_file_entry['url'] filename = data_file_entry.get('filename', os.path.basename(url)) known_hash = data_file_entry.get('known_hash') return register_file(url, filename, component_type=comp_type, known_hash=known_hash) + + +def retrieve_all_cmd(): + """Call 'retrieve_all' function from console script 'satpy_retrieve_all'.""" + import argparse + parser = argparse.ArgumentParser(description="Download auxiliary data files used by Satpy.") + parser.add_argument('--data-dir', + help="Override 'SATPY_DATA_DIR' for destination of " + "downloaded files. This does NOT change the " + "directory Satpy will look at when searching " + "for files outside of this script.") + parser.add_argument('--composite-sensors', nargs="*", + help="Limit loaded composites for the specified " + "sensors. If specified with no arguments, " + "no composite files will be downloaded.") + parser.add_argument('--readers', nargs="*", + help="Limit searching to these readers. If specified " + "with no arguments, no reader files will be " + "downloaded.") + parser.add_argument('--writers', nargs="*", + help="Limit searching to these writers. If specified " + "with no arguments, no writer files will be " + "downloaded.") + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO) + + if args.data_dir is None: + args.data_dir = satpy.config.get('data_dir') + + with satpy.config.set(datA_dir=args.data_dir): + retrieve_all(readers=args.readers, writers=args.writers, + composite_sensors=args.composite_sensors) diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index a2eb7f8850..fb77967e25 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -84,10 +84,38 @@ def _setup_custom_configs(base_dir): _setup_custom_writer_config(base_dir) +def _get_reader_find_conditions(readers, found_files): + r_cond1 = 'readers/README.rst' in found_files + r_cond2 = 'readers/README2.rst' in found_files + if readers is not None and not readers: + r_cond1 = not r_cond1 + r_cond2 = not r_cond2 + return r_cond1, r_cond2 + + +def _get_writer_find_conditions(writers, found_files): + w_cond1 = 'writers/README.rst' in found_files + w_cond2 = 'writers/README2.rst' in found_files + if writers is not None and not writers: + w_cond1 = not w_cond1 + w_cond2 = not w_cond2 + return w_cond1, w_cond2 + + +def _get_comp_find_conditions(comp_sensors, found_files): + comp_cond = 'composites/README.rst' in found_files + if comp_sensors is not None and not comp_sensors: + comp_cond = not comp_cond + return comp_cond + + class TestDataDownload: """Test basic data downloading functionality.""" - def test_find_registerable(self, tmpdir): + @pytest.mark.parametrize('comp_sensors', [[], None, ['visir']]) + @pytest.mark.parametrize('writers', [[], None, ['fake']]) + @pytest.mark.parametrize('readers', [[], None, ['fake']]) + def test_find_registerable(self, readers, writers, comp_sensors, tmpdir): """Test that find_registerable finds some things.""" import satpy from satpy.data_download import find_registerable_files @@ -95,12 +123,35 @@ def test_find_registerable(self, tmpdir): file_registry = {} with satpy.config.set(config_path=[tmpdir]), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): - found_files = find_registerable_files() - assert 'composites/README.rst' in found_files - assert 'readers/README.rst' in found_files - assert 'readers/README2.rst' in found_files - assert 'writers/README.rst' in found_files - assert 'writers/README2.rst' in found_files + found_files = find_registerable_files( + readers=readers, writers=writers, + composite_sensors=comp_sensors, + ) + + r_cond1, r_cond2 = _get_reader_find_conditions(readers, found_files) + assert r_cond1 + assert r_cond2 + w_cond1, w_cond2 = _get_writer_find_conditions(writers, found_files) + assert w_cond1 + assert w_cond2 + comp_cond = _get_comp_find_conditions(comp_sensors, found_files) + assert comp_cond + + @pytest.mark.parametrize('comp_sensors', [[], None, ['visir']]) + @pytest.mark.parametrize('writers', [[], None, ['fake']]) + @pytest.mark.parametrize('readers', [[], None, ['fake']]) + def test_limited_find_registerable(self, readers, writers, comp_sensors, tmpdir): + """Test that find_registerable doesn't find anything when limited.""" + import satpy + from satpy.data_download import find_registerable_files + _setup_custom_configs(tmpdir) + file_registry = {} + with satpy.config.set(config_path=[tmpdir]), \ + mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + found_files = find_registerable_files( + readers=[], writers=[], composite_sensors=[], + ) + assert not found_files def test_retrieve(self, tmpdir): """Test retrieving a single file.""" diff --git a/setup.py b/setup.py index 0279070a72..8f91d51196 100644 --- a/setup.py +++ b/setup.py @@ -103,6 +103,13 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): return data_files +entry_points = { + 'console_scripts': [ + 'satpy_retrieve_all=satpy.data_download:retrieve_all_cmd', + ], +} + + NAME = 'satpy' with open('README.rst', 'r') as readme: README = readme.read() @@ -139,4 +146,5 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): tests_require=test_requires, python_requires='>=3.6', extras_require=extras_require, + entry_points=entry_points, ) From ba6509c5736c3b9c04dabed4e422d5677e2d63f5 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 10 Feb 2021 10:17:41 -0600 Subject: [PATCH 13/16] Add configuration value to disable auxiliary data download --- doc/source/config.rst | 15 ++++++++++++++ doc/source/dev_guide/aux_data.rst | 3 +++ satpy/_config.py | 1 + satpy/data_download.py | 16 +++++++++++++++ satpy/tests/test_data_download.py | 34 +++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+) diff --git a/doc/source/config.rst b/doc/source/config.rst index f6c7c31aec..88df2965d7 100644 --- a/doc/source/config.rst +++ b/doc/source/config.rst @@ -130,6 +130,21 @@ defaults to a different path depending on your operating system following the `appdirs `_ "user data dir". +.. _download_aux_setting: + +Download Auxiliary Data +^^^^^^^^^^^^^^^^^^^^^^^ + +* **Environment variable**: ``SATPY_DOWNLOAD_AUX`` +* **YAML/Config Key**: ``download_aux`` +* **Default**: True + +Whether to allow downloading of auxiliary files for certain Satpy operations. +See :doc:`dev_guide/aux_data` for more information. If ``True`` then Satpy +will download and cache any necessary data files to :ref:`data_dir_setting` +when needed. If ``False`` then pre-downloaded files will be used, but any +other files will not be downloaded or checked for validity. + .. _component_configuration: Component Configuration diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst index 24efedc37a..d79b9b1509 100644 --- a/doc/source/dev_guide/aux_data.rst +++ b/doc/source/dev_guide/aux_data.rst @@ -117,3 +117,6 @@ called ``satpy_retrieve_all``. Run the following for usage information. .. code-block:: bash satpy_retrieve_all --help + +To make sure that no additional files are downloaded when running Satpy see +:ref:`download_aux_setting`. diff --git a/satpy/_config.py b/satpy/_config.py index 49a5064c92..c268a09daf 100644 --- a/satpy/_config.py +++ b/satpy/_config.py @@ -38,6 +38,7 @@ 'cache_dir': _satpy_dirs.user_cache_dir, 'data_dir': _satpy_dirs.user_data_dir, 'config_path': [], + 'download_aux': True, } # Satpy main configuration object diff --git a/satpy/data_download.py b/satpy/data_download.py index 82e55fbac8..5553008eff 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -73,6 +73,17 @@ def _generate_filename(filename, component_type): return path +def _retrieve_offline(data_dir, cache_key): + logger.debug('Downloading auxiliary files is turned off, will check ' + 'local files.') + local_file = os.path.join(data_dir, *cache_key.split('/')) + if not os.path.isfile(local_file): + raise RuntimeError("Satpy 'download_aux' setting is False meaning " + "no new files will be downloaded and the local " + "file '{}' does not exist.".format(local_file)) + return local_file + + def retrieve(cache_key, pooch_kwargs=None): """Download and cache the file associated with the provided ``cache_key``. @@ -93,6 +104,8 @@ def retrieve(cache_key, pooch_kwargs=None): pooch_kwargs = pooch_kwargs or {} path = satpy.config.get('data_dir') + if not satpy.config.get('download_aux'): + return _retrieve_offline(path, cache_key) # reuse data directory as the default URL where files can be downloaded from pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) @@ -123,6 +136,9 @@ def retrieve_all(readers=None, writers=None, composite_sensors=None, """ if pooch_kwargs is None: pooch_kwargs = {} + if not satpy.config.get('download_aux'): + raise RuntimeError("Satpy 'download_aux' setting is False so no files " + "will be downloaded.") find_registerable_files(readers=readers, writers=writers, diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index fb77967e25..ee9e4ae7ea 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -168,6 +168,40 @@ def test_retrieve(self, tmpdir): retrieve(comp_file) assert tmpdir.join(comp_file).exists() + def test_offline_retrieve(self, tmpdir): + """Test retrieving a single file when offline.""" + import satpy + from satpy.data_download import find_registerable_files, retrieve + _setup_custom_configs(tmpdir) + file_registry = {} + with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir), download_aux=True), \ + mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + comp_file = 'composites/README.rst' + found_files = find_registerable_files() + assert comp_file in found_files + + # the file doesn't exist, we can't download it + assert not tmpdir.join(comp_file).exists() + with satpy.config.set(download_aux=False): + pytest.raises(RuntimeError, retrieve, comp_file) + + # allow downloading and get it + retrieve(comp_file) + assert tmpdir.join(comp_file).exists() + + # turn off downloading and make sure we get local file + with satpy.config.set(download_aux=False): + local_file = retrieve(comp_file) + assert local_file + + def test_offline_retrieve_all(self, tmpdir): + """Test registering and retrieving all files fails when offline.""" + import satpy + from satpy.data_download import retrieve_all + _setup_custom_configs(tmpdir) + with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir), download_aux=False): + pytest.raises(RuntimeError, retrieve_all) + def test_retrieve_all(self, tmpdir): """Test registering and retrieving all files.""" import satpy From d7909932cbf1ffd7cbf264829675f76a59e340a7 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 12 Feb 2021 09:15:46 -0600 Subject: [PATCH 14/16] Refactor and other fixes to address reviewer comments --- doc/source/dev_guide/aux_data.rst | 4 +- satpy/data_download.py | 24 ++++++------ satpy/tests/test_data_download.py | 61 +++++++++++++------------------ setup.py | 2 +- 4 files changed, 41 insertions(+), 50 deletions(-) diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst index d79b9b1509..92219a0a3e 100644 --- a/doc/source/dev_guide/aux_data.rst +++ b/doc/source/dev_guide/aux_data.rst @@ -112,11 +112,11 @@ This function allows you to specify a list of ``readers``, ``writers``, or download. The ``retrieve_all`` function is also available through a command line script -called ``satpy_retrieve_all``. Run the following for usage information. +called ``satpy_retrieve_all_aux_data``. Run the following for usage information. .. code-block:: bash - satpy_retrieve_all --help + satpy_retrieve_all_aux_data --help To make sure that no additional files are downloaded when running Satpy see :ref:`download_aux_setting`. diff --git a/satpy/data_download.py b/satpy/data_download.py index 5553008eff..24ba8b4fae 100644 --- a/satpy/data_download.py +++ b/satpy/data_download.py @@ -56,9 +56,6 @@ def register_file(url, filename, component_type=None, known_hash=None): """ fname = _generate_filename(filename, component_type) - - global _FILE_REGISTRY - global _FILE_URLS _FILE_REGISTRY[fname] = known_hash _FILE_URLS[fname] = url return fname @@ -112,6 +109,17 @@ def retrieve(cache_key, pooch_kwargs=None): return pooch_obj.fetch(cache_key, **pooch_kwargs) +def _retrieve_all_with_pooch(pooch_kwargs): + if pooch_kwargs is None: + pooch_kwargs = {} + path = satpy.config.get('data_dir') + pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, + urls=_FILE_URLS) + for fname in _FILE_REGISTRY: + logger.info("Downloading extra data file '%s'...", fname) + pooch_obj.fetch(fname, **pooch_kwargs) + + def retrieve_all(readers=None, writers=None, composite_sensors=None, pooch_kwargs=None): """Find cache-able data files for Satpy and download them. @@ -134,8 +142,6 @@ def retrieve_all(readers=None, writers=None, composite_sensors=None, ``fetch``. """ - if pooch_kwargs is None: - pooch_kwargs = {} if not satpy.config.get('download_aux'): raise RuntimeError("Satpy 'download_aux' setting is False so no files " "will be downloaded.") @@ -143,12 +149,7 @@ def retrieve_all(readers=None, writers=None, composite_sensors=None, find_registerable_files(readers=readers, writers=writers, composite_sensors=composite_sensors) - path = satpy.config.get('data_dir') - pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, - urls=_FILE_URLS) - for fname in _FILE_REGISTRY: - logger.info("Downloading extra data file '%s'...", fname) - pooch_obj.fetch(fname, **pooch_kwargs) + _retrieve_all_with_pooch(pooch_kwargs) logger.info("Done downloading all extra files.") @@ -256,7 +257,6 @@ def __init__(self, *args, **kwargs): known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" - url: "https://raw.githubusercontent.com/pytroll/satpy/master/RELEASING.md" filename: "satpy_releasing.md" - known_hash: null In this example we register two files that might be downloaded. If ``known_hash`` is not provided or None (null in YAML) then the data diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index ee9e4ae7ea..79ace3888a 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -78,12 +78,6 @@ def _setup_custom_writer_config(base_dir): """.format(README_URL, README_URL)) -def _setup_custom_configs(base_dir): - _setup_custom_composite_config(base_dir) - _setup_custom_reader_config(base_dir) - _setup_custom_writer_config(base_dir) - - def _get_reader_find_conditions(readers, found_files): r_cond1 = 'readers/README.rst' in found_files r_cond2 = 'readers/README2.rst' in found_files @@ -112,17 +106,22 @@ def _get_comp_find_conditions(comp_sensors, found_files): class TestDataDownload: """Test basic data downloading functionality.""" + @pytest.fixture(autouse=True) + def _setup_custom_configs(self, tmpdir): + _setup_custom_composite_config(tmpdir) + _setup_custom_reader_config(tmpdir) + _setup_custom_writer_config(tmpdir) + self.tmpdir = tmpdir + @pytest.mark.parametrize('comp_sensors', [[], None, ['visir']]) @pytest.mark.parametrize('writers', [[], None, ['fake']]) @pytest.mark.parametrize('readers', [[], None, ['fake']]) - def test_find_registerable(self, readers, writers, comp_sensors, tmpdir): + def test_find_registerable(self, readers, writers, comp_sensors): """Test that find_registerable finds some things.""" import satpy from satpy.data_download import find_registerable_files - _setup_custom_configs(tmpdir) - file_registry = {} - with satpy.config.set(config_path=[tmpdir]), \ - mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + with satpy.config.set(config_path=[self.tmpdir]), \ + mock.patch('satpy.data_download._FILE_REGISTRY', {}): found_files = find_registerable_files( readers=readers, writers=writers, composite_sensors=comp_sensors, @@ -137,85 +136,77 @@ def test_find_registerable(self, readers, writers, comp_sensors, tmpdir): comp_cond = _get_comp_find_conditions(comp_sensors, found_files) assert comp_cond - @pytest.mark.parametrize('comp_sensors', [[], None, ['visir']]) - @pytest.mark.parametrize('writers', [[], None, ['fake']]) - @pytest.mark.parametrize('readers', [[], None, ['fake']]) - def test_limited_find_registerable(self, readers, writers, comp_sensors, tmpdir): + def test_limited_find_registerable(self): """Test that find_registerable doesn't find anything when limited.""" import satpy from satpy.data_download import find_registerable_files - _setup_custom_configs(tmpdir) file_registry = {} - with satpy.config.set(config_path=[tmpdir]), \ + with satpy.config.set(config_path=[self.tmpdir]), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): found_files = find_registerable_files( readers=[], writers=[], composite_sensors=[], ) assert not found_files - def test_retrieve(self, tmpdir): + def test_retrieve(self): """Test retrieving a single file.""" import satpy from satpy.data_download import find_registerable_files, retrieve - _setup_custom_configs(tmpdir) file_registry = {} - with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir)), \ + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): comp_file = 'composites/README.rst' found_files = find_registerable_files() assert comp_file in found_files - assert not tmpdir.join(comp_file).exists() + assert not self.tmpdir.join(comp_file).exists() retrieve(comp_file) - assert tmpdir.join(comp_file).exists() + assert self.tmpdir.join(comp_file).exists() - def test_offline_retrieve(self, tmpdir): + def test_offline_retrieve(self): """Test retrieving a single file when offline.""" import satpy from satpy.data_download import find_registerable_files, retrieve - _setup_custom_configs(tmpdir) file_registry = {} - with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir), download_aux=True), \ + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=True), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): comp_file = 'composites/README.rst' found_files = find_registerable_files() assert comp_file in found_files # the file doesn't exist, we can't download it - assert not tmpdir.join(comp_file).exists() + assert not self.tmpdir.join(comp_file).exists() with satpy.config.set(download_aux=False): pytest.raises(RuntimeError, retrieve, comp_file) # allow downloading and get it retrieve(comp_file) - assert tmpdir.join(comp_file).exists() + assert self.tmpdir.join(comp_file).exists() # turn off downloading and make sure we get local file with satpy.config.set(download_aux=False): local_file = retrieve(comp_file) assert local_file - def test_offline_retrieve_all(self, tmpdir): + def test_offline_retrieve_all(self): """Test registering and retrieving all files fails when offline.""" import satpy from satpy.data_download import retrieve_all - _setup_custom_configs(tmpdir) - with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir), download_aux=False): + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=False): pytest.raises(RuntimeError, retrieve_all) - def test_retrieve_all(self, tmpdir): + def test_retrieve_all(self): """Test registering and retrieving all files.""" import satpy from satpy.data_download import retrieve_all - _setup_custom_configs(tmpdir) file_registry = {} file_urls = {} - with satpy.config.set(config_path=[tmpdir], data_dir=str(tmpdir)), \ + with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ mock.patch('satpy.data_download._FILE_REGISTRY', file_registry), \ mock.patch('satpy.data_download._FILE_URLS', file_urls), \ mock.patch('satpy.data_download.find_registerable_files'): comp_file = 'composites/README.rst' file_registry[comp_file] = None file_urls[comp_file] = README_URL - assert not tmpdir.join(comp_file).exists() + assert not self.tmpdir.join(comp_file).exists() retrieve_all() - assert tmpdir.join(comp_file).exists() + assert self.tmpdir.join(comp_file).exists() diff --git a/setup.py b/setup.py index 8f91d51196..9aa6d90f06 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): entry_points = { 'console_scripts': [ - 'satpy_retrieve_all=satpy.data_download:retrieve_all_cmd', + 'satpy_retrieve_all_aux_data=satpy.data_download:retrieve_all_cmd', ], } From 6ef79504d9ffa828f97e27977d4ce92892f868e4 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 12 Feb 2021 09:19:27 -0600 Subject: [PATCH 15/16] Rename satpy.data_download to satpy.aux_download --- doc/source/dev_guide/aux_data.rst | 14 +++++------ satpy/{data_download.py => aux_download.py} | 20 ++++++++-------- satpy/composites/__init__.py | 4 ++-- satpy/readers/yaml_reader.py | 2 +- satpy/tests/test_data_download.py | 26 ++++++++++----------- satpy/writers/__init__.py | 2 +- 6 files changed, 34 insertions(+), 34 deletions(-) rename satpy/{data_download.py => aux_download.py} (95%) diff --git a/doc/source/dev_guide/aux_data.rst b/doc/source/dev_guide/aux_data.rst index 92219a0a3e..5e53aa1703 100644 --- a/doc/source/dev_guide/aux_data.rst +++ b/doc/source/dev_guide/aux_data.rst @@ -37,17 +37,17 @@ Once registered, Satpy can be told to retrieve the file (see below) by using a Satpy includes a low-level function and a high-level Mixin class for registering files. The higher level class is recommended for any Satpy component like readers, writers, and compositors. The lower-level -:func:`~satpy.data_download.register_file` function can be used for any other +:func:`~satpy.aux_download.register_file` function can be used for any other use case. -The :class:`~satpy.data_download.DataMixIn` class is automatically included +The :class:`~satpy.aux_download.DataMixIn` class is automatically included in the :class:`~satpy.readers.yaml_reader.FileYAMLReader` and :class:`~satpy.writers.Writer` base classes. For any other component (like a compositor) you should include it as another parent class: .. code-block:: python - from satpy.data_download import DataDownloadMixin + from satpy.aux_download import DataDownloadMixin from satpy.composites import GenericCompositor class MyCompositor(GenericCompositor, DataDownloadMixin): @@ -59,7 +59,7 @@ a compositor) you should include it as another parent class: self.register_data_files(data_files) However your code registers files, to be consistent it must do it during -initialization so that the :func:`~satpy.data_download.find_registerable_files`. +initialization so that the :func:`~satpy.aux_download.find_registerable_files`. If your component isn't a reader, writer, or compositor then this function will need to be updated to find and load your registered files. See :ref:`offline_aux_downloads` below for more information. @@ -84,13 +84,13 @@ of dictionaries including a ``url``, ``known_hash``, and optional filename: "satpy_releasing.md" known_hash: null -See the :class:`~satpy.data_download.DataDownloadMixin` for more information. +See the :class:`~satpy.aux_download.DataDownloadMixin` for more information. Retrieving ^^^^^^^^^^ Files that have been registered (see above) can be retrieved by calling the -:func:`~satpy.data_download.retrieve` function. This function expects a single +:func:`~satpy.aux_download.retrieve` function. This function expects a single argument: the cache key. Cache keys are returned by registering functions, but can also be pre-determined by following the scheme ``/`` (ex. ``readers/README.rst``). @@ -104,7 +104,7 @@ Offline Downloads ----------------- To assist with operational environments, Satpy includes a -:func:`~satpy.data_download.retrieve_all` function that will try to find all +:func:`~satpy.aux_download.retrieve_all` function that will try to find all files that Satpy components may need to download in the future and download them to the current directory specified by :ref:`data_dir_setting`. This function allows you to specify a list of ``readers``, ``writers``, or diff --git a/satpy/data_download.py b/satpy/aux_download.py similarity index 95% rename from satpy/data_download.py rename to satpy/aux_download.py index 24ba8b4fae..1d973254f7 100644 --- a/satpy/data_download.py +++ b/satpy/aux_download.py @@ -34,7 +34,7 @@ def register_file(url, filename, component_type=None, known_hash=None): This function only prepares Satpy to be able to download and cache the provided file. It will not download the file. See - :func:`satpy.data_download.retrieve` for more information. + :func:`satpy.aux_download.retrieve` for more information. Args: url (str): URL where remote file can be downloaded. @@ -51,7 +51,7 @@ def register_file(url, filename, component_type=None, known_hash=None): Returns: Cache key that can be used to retrieve the file later. The cache key consists of the ``component_type`` and provided ``filename``. This - should be passed to :func:`satpy.data_download_retrieve` when the + should be passed to :func:`satpy.aux_download_retrieve` when the file will be used. """ @@ -89,7 +89,7 @@ def retrieve(cache_key, pooch_kwargs=None): Args: cache_key (str): Cache key returned by - :func:`~satpy.data_download.register_file`. + :func:`~satpy.aux_download.register_file`. pooch_kwargs (dict or None): Extra keyword arguments to pass to :meth:`pooch.Pooch.fetch`. @@ -232,7 +232,7 @@ class DataDownloadMixin: The below code is shown as an example:: from satpy.readers.yaml_reader import AbstractYAMLReader - from satpy.data_download import DataDownloadMixin + from satpy.aux_download import DataDownloadMixin class MyReader(AbstractYAMLReader, DataDownloadMixin): def __init__(self, *args, **kwargs): @@ -261,22 +261,22 @@ def __init__(self, *args, **kwargs): In this example we register two files that might be downloaded. If ``known_hash`` is not provided or None (null in YAML) then the data file will not be checked for validity when downloaded. See - :func:`~satpy.data_download.register_file` for more information. You can + :func:`~satpy.aux_download.register_file` for more information. You can optionally specify ``filename`` to define the in-cache name when this file is downloaded. This can be useful in cases when the filename can not be easily determined from the URL. When it comes time to needing the file, you can retrieve the local path - by calling ``~satpy.data_download.retrieve(cache_key)`` with the + by calling ``~satpy.aux_download.retrieve(cache_key)`` with the "cache key" generated during registration. These keys will be in the format: ``/``. For a reader this would be ``readers/satpy_release.md``. This Mixin is not the only way to register and download files for a Satpy component, but is the most generic and flexible. Feel free to - use the :func:`~satpy.data_download.register_file` and - :func:`~satpy.data_download.retrieve` functions directly. - However, :meth:`~satpy.data_download.find_registerable_files` must also + use the :func:`~satpy.aux_download.register_file` and + :func:`~satpy.aux_download.retrieve` functions directly. + However, :meth:`~satpy.aux_download.find_registerable_files` must also be updated to support your component (if files are not register during initialization). @@ -299,7 +299,7 @@ def _data_file_component_type(self): def register_data_files(self, data_files=None): """Register a series of files that may be downloaded later. - See :class:`~satpy.data_download.DataDownloadMixin` for more + See :class:`~satpy.aux_download.DataDownloadMixin` for more information on the assumptions and structure of the data file configuration dictionary. diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 8ff89db1bd..fb1d9acbf4 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -27,7 +27,7 @@ from satpy.dataset import DataID, combine_metadata from satpy.dataset.dataid import minimal_default_keys_config -from satpy.data_download import DataDownloadMixin +from satpy.aux_download import DataDownloadMixin from satpy.writers import get_enhanced_image @@ -1040,7 +1040,7 @@ def register_data_files(self, data_files): }]) def _retrieve_data_file(self): - from satpy.data_download import retrieve + from satpy.aux_download import retrieve if os.path.isabs(self._cache_filename): return self._cache_filename return retrieve(self._cache_key) diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index 4092877d17..5c7f8e3479 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -42,7 +42,7 @@ from satpy.utils import recursive_dict_update from satpy.dataset import DataQuery, DataID, get_key from satpy.dataset.dataid import get_keys_from_config, default_id_keys_config, default_co_keys_config -from satpy.data_download import DataDownloadMixin +from satpy.aux_download import DataDownloadMixin from satpy import DatasetDict from satpy.resample import add_crs_xy_coords from trollsift.parser import globify, parse diff --git a/satpy/tests/test_data_download.py b/satpy/tests/test_data_download.py index 79ace3888a..9e0ceb8d46 100644 --- a/satpy/tests/test_data_download.py +++ b/satpy/tests/test_data_download.py @@ -119,9 +119,9 @@ def _setup_custom_configs(self, tmpdir): def test_find_registerable(self, readers, writers, comp_sensors): """Test that find_registerable finds some things.""" import satpy - from satpy.data_download import find_registerable_files + from satpy.aux_download import find_registerable_files with satpy.config.set(config_path=[self.tmpdir]), \ - mock.patch('satpy.data_download._FILE_REGISTRY', {}): + mock.patch('satpy.aux_download._FILE_REGISTRY', {}): found_files = find_registerable_files( readers=readers, writers=writers, composite_sensors=comp_sensors, @@ -139,10 +139,10 @@ def test_find_registerable(self, readers, writers, comp_sensors): def test_limited_find_registerable(self): """Test that find_registerable doesn't find anything when limited.""" import satpy - from satpy.data_download import find_registerable_files + from satpy.aux_download import find_registerable_files file_registry = {} with satpy.config.set(config_path=[self.tmpdir]), \ - mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): found_files = find_registerable_files( readers=[], writers=[], composite_sensors=[], ) @@ -151,10 +151,10 @@ def test_limited_find_registerable(self): def test_retrieve(self): """Test retrieving a single file.""" import satpy - from satpy.data_download import find_registerable_files, retrieve + from satpy.aux_download import find_registerable_files, retrieve file_registry = {} with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ - mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): comp_file = 'composites/README.rst' found_files = find_registerable_files() assert comp_file in found_files @@ -165,10 +165,10 @@ def test_retrieve(self): def test_offline_retrieve(self): """Test retrieving a single file when offline.""" import satpy - from satpy.data_download import find_registerable_files, retrieve + from satpy.aux_download import find_registerable_files, retrieve file_registry = {} with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=True), \ - mock.patch('satpy.data_download._FILE_REGISTRY', file_registry): + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry): comp_file = 'composites/README.rst' found_files = find_registerable_files() assert comp_file in found_files @@ -190,20 +190,20 @@ def test_offline_retrieve(self): def test_offline_retrieve_all(self): """Test registering and retrieving all files fails when offline.""" import satpy - from satpy.data_download import retrieve_all + from satpy.aux_download import retrieve_all with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir), download_aux=False): pytest.raises(RuntimeError, retrieve_all) def test_retrieve_all(self): """Test registering and retrieving all files.""" import satpy - from satpy.data_download import retrieve_all + from satpy.aux_download import retrieve_all file_registry = {} file_urls = {} with satpy.config.set(config_path=[self.tmpdir], data_dir=str(self.tmpdir)), \ - mock.patch('satpy.data_download._FILE_REGISTRY', file_registry), \ - mock.patch('satpy.data_download._FILE_URLS', file_urls), \ - mock.patch('satpy.data_download.find_registerable_files'): + mock.patch('satpy.aux_download._FILE_REGISTRY', file_registry), \ + mock.patch('satpy.aux_download._FILE_URLS', file_urls), \ + mock.patch('satpy.aux_download.find_registerable_files'): comp_file = 'composites/README.rst' file_registry[comp_file] = None file_urls[comp_file] = README_URL diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index 83fc2874cf..2657f177bc 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -39,7 +39,7 @@ from satpy import CHUNK_SIZE from satpy.plugin_base import Plugin from satpy.resample import get_area_def -from satpy.data_download import DataDownloadMixin +from satpy.aux_download import DataDownloadMixin from trollsift import parser From f7eb5fb9b6d0504de4bce30ad926ad4c161eec50 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 12 Feb 2021 09:43:17 -0600 Subject: [PATCH 16/16] Fix missing data_download to aux_download renames --- satpy/composites/__init__.py | 2 +- satpy/tests/test_composites.py | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index fb1d9acbf4..5233a82dcd 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -997,7 +997,7 @@ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, Environment variables are expanded. known_hash (str or None): Hash of the remote file used to verify a successful download. If not provided then the download will - not be verified. See :func:`satpy.data_download.register_file` + not be verified. See :func:`satpy.aux_download.register_file` for more information. area (str): Name of area definition for the image. Optional for images with built-in area definitions (geotiff). diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 3de80935bd..9a6dea5b02 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -906,8 +906,8 @@ def test_init(self, get_area_def): self.assertEqual(comp.area, "bar") get_area_def.assert_called_once_with("euro4") - @mock.patch('satpy.data_download.retrieve') - @mock.patch('satpy.data_download.register_file') + @mock.patch('satpy.aux_download.retrieve') + @mock.patch('satpy.aux_download.register_file') @mock.patch('satpy.Scene') def test_call(self, Scene, register, retrieve): # noqa """Test the static compositing.""" diff --git a/setup.py b/setup.py index 9aa6d90f06..2fc2f89feb 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ def _config_data_files(base_dirs, extensions=(".cfg", )): entry_points = { 'console_scripts': [ - 'satpy_retrieve_all_aux_data=satpy.data_download:retrieve_all_cmd', + 'satpy_retrieve_all_aux_data=satpy.aux_download:retrieve_all_cmd', ], }