From 0f4ff97fe55f2c9371dea78759c4e1db855e5092 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 13 Jan 2019 09:32:00 -0600 Subject: [PATCH 1/8] Add utility functions to group multiple files in to individual Scenes --- satpy/readers/__init__.py | 87 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 15423a4411..5178357c22 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -26,6 +26,7 @@ import numbers import os import warnings +from datetime import datetime, timedelta import six import yaml @@ -407,6 +408,89 @@ def __delitem__(self, key): return super(DatasetDict, self).__delitem__(key) +def group_files(files_to_sort, reader=None, time_threshold=10, + group_keys=('start_time',), ppp_config_dir=None, reader_kwargs=None): + """Group series of files by file pattern information. + + By default this will group files by their filename ``start_time`` + assuming it exists in the pattern. By passing the individual + dictionaries returned by this function to the Scene classes' + ``filenames``, a series `Scene` objects can be easily created. + + Args: + files_to_sort (iterable): File paths to sort in to group + reader (str): Reader whose file patterns should be used to sort files. + This + time_threshold (int): Number of seconds used to consider time elements + in a group as being equal. For example, if the 'start_time' item + is used to group files then any time within `time_threshold` + seconds of the first file's 'start_time' will be seen as occurring + at the same time. + group_keys (list or tuple): File pattern information to use to group + files. Keys are sorted in order and only the first key is used when + comparing datetime elements with `time_threshold` (see above). This + means it is recommended that datetime values should only come from + the first key in ``group_keys``. Otherwise, there is a good chance + that files will not be grouped properly (datetimes being barely + unequal). Defaults to ``('start_time',)``. + ppp_config_dir (str): Root usser configuration directory for SatPy. + This will be deprecated in the future, but is here for consistency + with other SatPy features. + reader_kwargs (dict): Additional keyword arguments to pass to reader + creation. + + Returns: + List of dictionaries mapping 'reader' to a list of filenames. + Each of these dictionaries can be passed as ``filenames`` to + a `Scene` object. + + """ + # FUTURE: Find the best reader for each filename using `find_files_and_readers` + if reader is None: + raise ValueError("'reader' keyword argument is required.") + # FUTURE: Handle multiple readers + reader_configs = list(configs_for_reader(reader, ppp_config_dir))[0] + reader_kwargs = reader_kwargs or {} + try: + reader_instance = load_reader(reader_configs, **reader_kwargs) + except (KeyError, IOError, yaml.YAMLError) as err: + LOG.info('Cannot use %s', str(reader_configs)) + LOG.debug(str(err)) + if reader and (isinstance(reader, str) or len(reader) == 1): + # if it is a single reader then give a more usable error + raise + return + + file_keys = [] + for filetype, filetype_info in reader_instance.sorted_filetype_items(): + for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info): + group_key = tuple(file_info.get(k) for k in group_keys) + file_keys.append((group_key, f)) + + prev_key = None + threshold = timedelta(seconds=time_threshold) + file_groups = {} + for gk, f in sorted(file_keys): + # use first element of key as time identifier (if datetime type) + if prev_key is None: + is_new_group = True + elif isinstance(gk[0], datetime): + # datetimes within threshold difference are "the same time" + is_new_group = (gk[0] - prev_key[0]) > threshold + else: + is_new_group = gk[0] != prev_key[0] + + # if this is a new group based on the first element + if is_new_group or gk[1:] != prev_key[1:]: + file_groups[gk] = [f] + prev_key = gk + else: + file_groups[gk].append(f) + sorted_group_keys = sorted(file_groups) + # passable to Scene as 'filenames' + return [{reader: file_groups[group_key]} for group_key in sorted_group_keys] + + def read_reader_config(config_files, loader=yaml.Loader): """Read the reader `config_files` and return the info extracted.""" @@ -427,8 +511,7 @@ def read_reader_config(config_files, loader=yaml.Loader): def load_reader(reader_configs, **reader_kwargs): - """Import and setup the reader from *reader_info* - """ + """Import and setup the reader from *reader_info*.""" reader_info = read_reader_config(reader_configs) reader_instance = reader_info['reader']( config_files=reader_configs, From 068f7b82fe35a69c5580620189f3d7c75ef59439 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 13 Jan 2019 09:35:53 -0600 Subject: [PATCH 2/8] Add 'from_files' classmethod to MultiScene for easier creation. Uses 'group_files' utility function. --- satpy/multiscene.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 1b0bf22d5d..799241b4d2 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -155,6 +155,19 @@ def __init__(self, scenes=None): """ self._scenes = scenes or [] + @classmethod + def from_files(cls, files_to_sort, reader=None, **kwargs): + """Create multiple Scene objects from multiple files. + + This uses the :func:`satpy.readers.group_files` function to group + files. See this function for more details on possible keyword + arguments. + + """ + from satpy.readers import group_files + file_groups = group_files(files_to_sort, reader=reader, **kwargs) + return cls(file_groups) + def __iter__(self): """Iterate over the provided Scenes once.""" return self.scenes From 18b27d8daf59f8ef0d593b1d183a341b9dd1371d Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 13 Jan 2019 09:39:50 -0600 Subject: [PATCH 3/8] Fix MultiScene from_files classmethod --- satpy/multiscene.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 799241b4d2..8421000769 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -166,7 +166,8 @@ def from_files(cls, files_to_sort, reader=None, **kwargs): """ from satpy.readers import group_files file_groups = group_files(files_to_sort, reader=reader, **kwargs) - return cls(file_groups) + scenes = [Scene(filenames=fg) for fg in file_groups] + return cls(scenes) def __iter__(self): """Iterate over the provided Scenes once.""" From 5e01978b066b5f2d68ab142070df4a72f1197eca Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 13 Jan 2019 17:19:11 -0600 Subject: [PATCH 4/8] Add tests for from_files and group_files --- satpy/readers/__init__.py | 19 +++---- satpy/tests/test_multiscene.py | 17 +++++++ satpy/tests/test_readers.py | 90 ++++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 13 deletions(-) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 5178357c22..d74a65c65d 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -448,7 +448,11 @@ def group_files(files_to_sort, reader=None, time_threshold=10, # FUTURE: Find the best reader for each filename using `find_files_and_readers` if reader is None: raise ValueError("'reader' keyword argument is required.") + elif not isinstance(reader, (list, tuple)): + reader = [reader] + # FUTURE: Handle multiple readers + reader = reader[0] reader_configs = list(configs_for_reader(reader, ppp_config_dir))[0] reader_kwargs = reader_kwargs or {} try: @@ -456,10 +460,10 @@ def group_files(files_to_sort, reader=None, time_threshold=10, except (KeyError, IOError, yaml.YAMLError) as err: LOG.info('Cannot use %s', str(reader_configs)) LOG.debug(str(err)) - if reader and (isinstance(reader, str) or len(reader) == 1): - # if it is a single reader then give a more usable error - raise - return + # if reader and (isinstance(reader, str) or len(reader) == 1): + # # if it is a single reader then give a more usable error + # raise + raise file_keys = [] for filetype, filetype_info in reader_instance.sorted_filetype_items(): @@ -485,7 +489,7 @@ def group_files(files_to_sort, reader=None, time_threshold=10, file_groups[gk] = [f] prev_key = gk else: - file_groups[gk].append(f) + file_groups[prev_key].append(f) sorted_group_keys = sorted(file_groups) # passable to Scene as 'filenames' return [{reader: file_groups[group_key]} for group_key in sorted_group_keys] @@ -513,10 +517,7 @@ def read_reader_config(config_files, loader=yaml.Loader): def load_reader(reader_configs, **reader_kwargs): """Import and setup the reader from *reader_info*.""" reader_info = read_reader_config(reader_configs) - reader_instance = reader_info['reader']( - config_files=reader_configs, - **reader_kwargs - ) + reader_instance = reader_info['reader'](config_files=reader_configs, **reader_kwargs) return reader_instance diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index 977905f28a..00c0a64578 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -134,6 +134,23 @@ def test_properties(self): self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id}) self.assertFalse(mscn.all_same_area) + def test_from_files(self): + """Test creating a multiscene from multiple files.""" + from satpy import MultiScene + input_files = [ + "OR_ABI-L1b-RadC-M3C01_G16_s20171171502203_e20171171504576_c20171171505018.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171507203_e20171171509576_c20171171510018.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171512203_e20171171514576_c20171171515017.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171517203_e20171171519577_c20171171520019.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171522203_e20171171524576_c20171171525020.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171527203_e20171171529576_c20171171530017.nc", + ] + with mock.patch('satpy.multiscene.Scene') as scn_mock: + mscn = MultiScene.from_files(input_files, reader='abi_l1b') + calls = [mock.call(filenames={'abi_l1b': [in_file]}) for in_file in input_files] + scn_mock.assert_has_calls(calls) + self.assertTrue(len(mscn.scenes), 6) + class TestMultiSceneSave(unittest.TestCase): """Test saving a MultiScene to various formats.""" diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index 1ddd148a24..d8bbd7b6e1 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -527,8 +527,7 @@ def test_reader_load_failed(self): # touch the file so it exists on disk with mock.patch('yaml.load') as load: load.side_effect = yaml.YAMLError("Import problems") - self.assertRaises(yaml.YAMLError, find_files_and_readers, - reader='viirs_sdr') + self.assertRaises(yaml.YAMLError, find_files_and_readers, reader='viirs_sdr') def test_old_reader_name_mapping(self): """Test that requesting old reader names raises a warning.""" @@ -582,15 +581,98 @@ def test_available_readers(self): self.assertIn('name', reader_info) +class TestGroupFiles(unittest.TestCase): + """Test the 'group_files' utility function.""" + + def setUp(self): + """Set up test filenames to use.""" + input_files = [ + "OR_ABI-L1b-RadC-M3C01_G16_s20171171502203_e20171171504576_c20171171505018.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171507203_e20171171509576_c20171171510018.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171512203_e20171171514576_c20171171515017.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171517203_e20171171519577_c20171171520019.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171522203_e20171171524576_c20171171525020.nc", + "OR_ABI-L1b-RadC-M3C01_G16_s20171171527203_e20171171529576_c20171171530017.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171502203_e20171171504576_c20171171505008.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171507203_e20171171509576_c20171171510012.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171512203_e20171171514576_c20171171515007.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171517203_e20171171519576_c20171171520010.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171522203_e20171171524576_c20171171525008.nc", + "OR_ABI-L1b-RadC-M3C02_G16_s20171171527203_e20171171529576_c20171171530008.nc", + ] + self.g16_files = input_files + self.g17_files = [x.replace('G16', 'G17') for x in input_files] + + def test_no_reader(self): + """Test that reader must be provided.""" + from satpy.readers import group_files + self.assertRaises(ValueError, group_files, []) + + def test_bad_reader(self): + """Test that reader not existing causes an error.""" + from satpy.readers import group_files + import yaml + # touch the file so it exists on disk + with mock.patch('yaml.load') as load: + load.side_effect = yaml.YAMLError("Import problems") + self.assertRaises(yaml.YAMLError, group_files, [], reader='abi_l1b') + + def test_default_behavior(self): + """Test the default behavior with the 'abi_l1b' reader.""" + from satpy.readers import group_files + groups = group_files(self.g16_files, reader='abi_l1b') + self.assertTrue(len(groups), 6) + self.assertTrue(len(groups[0]['abi_l1b']), 2) + + def test_non_datetime_group_key(self): + """Test what happens when the start_time isn't used for grouping.""" + from satpy.readers import group_files + groups = group_files(self.g16_files, reader='abi_l1b', group_keys=('platform_shortname',)) + self.assertTrue(len(groups), 1) + self.assertTrue(len(groups[0]['abi_l1b']), 6) + + def test_large_time_threshold(self): + """Test what happens when the time threshold holds multiple files.""" + from satpy.readers import group_files + groups = group_files(self.g16_files, reader='abi_l1b', time_threshold=60*8) + self.assertTrue(len(groups), 3) + self.assertTrue(len(groups[0]['abi_l1b']), 2) + + def test_two_instruments_files(self): + """Test the default behavior when two instruments files are provided. + + This is undesired from a user point of view since we don't want G16 + and G17 files in the same Scene. + + """ + from satpy.readers import group_files + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b') + self.assertTrue(len(groups), 6) + self.assertTrue(len(groups[0]['abi_l1b']), 4) + + def test_two_instruments_files_split(self): + """Test the default behavior when two instruments files are provided and split. + + Tell the sorting to include the platform identifier as another field + to use for grouping. + + """ + from satpy.readers import group_files + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', + group_keys=('start_time', 'platform_shortname')) + self.assertTrue(len(groups), 12) + self.assertTrue(len(groups[0]['abi_l1b']), 2) + + def suite(): - """The test suite for test_scene. - """ + """The test suite for test_readers.""" loader = unittest.TestLoader() mysuite = unittest.TestSuite() mysuite.addTest(loader.loadTestsFromTestCase(TestDatasetDict)) mysuite.addTest(loader.loadTestsFromTestCase(TestReaderLoader)) mysuite.addTest(loader.loadTestsFromTestCase(TestFindFilesAndReaders)) mysuite.addTest(loader.loadTestsFromTestCase(TestYAMLFiles)) + mysuite.addTest(loader.loadTestsFromTestCase(TestGroupFiles)) return mysuite From f3c156118bcec12ab34f5abf14b7add67ce4a15e Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 13 Jan 2019 17:34:14 -0600 Subject: [PATCH 5/8] Add usage of `from_files` in MultiScene documentation --- doc/source/conf.py | 3 ++- doc/source/multiscene.rst | 25 +++++++++++++++++++++---- satpy/multiscene.py | 2 ++ satpy/readers/__init__.py | 2 ++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index f8d048c430..c9007f6ad8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -16,6 +16,7 @@ import os import sys +from datetime import datetime # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -75,7 +76,7 @@ def __getattr__(cls, name): # General information about the project. project = u'SatPy' -copyright = u'2009-2016, The PyTroll Team' +copyright = u'2009-{}, The PyTroll Team'.format(datetime.utcnow().strftime("%Y")) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst index 08139f7a55..04314697e0 100644 --- a/doc/source/multiscene.rst +++ b/doc/source/multiscene.rst @@ -41,10 +41,27 @@ The MultiScene can take "frames" of data and join them together in a single animation movie file. Saving animations required the `imageio` python library and for most available formats the ``ffmpeg`` command line tool suite should also be installed. The below example saves a series of GOES-EAST ABI channel -1 and channel 2 frames to MP4 movie files. Note that currently there is no -easy way to map files from multiple time steps/orbits in to individual Scene -objects. The `glob` function and for loops are used to group files into Scene -objects that, if used individually, could load the data we want. +1 and channel 2 frames to MP4 movie files. We can use the +:meth:`MultiScene.from_files ` class +method to create a `MultiScene` from a series of files. This uses the +:func:`~satpy.readers.group_files` utility function to group files by start +time. + + >>> from satpy import Scene, MultiScene + >>> from glob import glob + >>> mscn = MultiScene.from_files(glob('/data/abi/day_1/*C0[12]*.nc'), reader='abi_l1b') + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + +.. versionadded:: 0.12 + + The ``from_files`` and ``group_files`` functions were added in SatPy 0.12. + See below for an alternative solution. + +For older versions of SatPy we can manually create the `Scene` objects used. +The :func:`~glob.glob` function and for loops are used to group files into +Scene objects that, if used individually, could load the data we want. The +code below is equivalent to the ``from_files`` code above: >>> from satpy import Scene, MultiScene >>> from glob import glob diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 8421000769..baaa97322b 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -163,6 +163,8 @@ def from_files(cls, files_to_sort, reader=None, **kwargs): files. See this function for more details on possible keyword arguments. + .. versionadded:: 0.12 + """ from satpy.readers import group_files file_groups = group_files(files_to_sort, reader=reader, **kwargs) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index d74a65c65d..30bbaaaa5e 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -417,6 +417,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10, dictionaries returned by this function to the Scene classes' ``filenames``, a series `Scene` objects can be easily created. + .. versionadded:: 0.12 + Args: files_to_sort (iterable): File paths to sort in to group reader (str): Reader whose file patterns should be used to sort files. From 0fe44de35fac133a372221caf284224fb6a85278 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sat, 19 Jan 2019 16:56:27 -0600 Subject: [PATCH 6/8] Change MultiScene.from_files to use a generator instead of a list --- satpy/multiscene.py | 2 +- satpy/tests/test_multiscene.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index baaa97322b..cc2415a607 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -168,7 +168,7 @@ def from_files(cls, files_to_sort, reader=None, **kwargs): """ from satpy.readers import group_files file_groups = group_files(files_to_sort, reader=reader, **kwargs) - scenes = [Scene(filenames=fg) for fg in file_groups] + scenes = (Scene(filenames=fg) for fg in file_groups) return cls(scenes) def __iter__(self): diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index 00c0a64578..ee2783f435 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -147,9 +147,9 @@ def test_from_files(self): ] with mock.patch('satpy.multiscene.Scene') as scn_mock: mscn = MultiScene.from_files(input_files, reader='abi_l1b') + self.assertTrue(len(mscn.scenes), 6) calls = [mock.call(filenames={'abi_l1b': [in_file]}) for in_file in input_files] scn_mock.assert_has_calls(calls) - self.assertTrue(len(mscn.scenes), 6) class TestMultiSceneSave(unittest.TestCase): From f580a891dd54a1833d86650c06ef2e48fb4afde7 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Mon, 21 Jan 2019 13:31:14 -0600 Subject: [PATCH 7/8] Add better handling of cached generators in MultiScene --- satpy/multiscene.py | 87 ++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index cc2415a607..b7ec4edc11 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -107,14 +107,33 @@ def __init__(self, scene_gen): self._scene_cache = [] self._dataset_idx = {} # this class itself is not an iterator, make one - self._self_iter = iter(self) + self._self_iter = self._create_cached_iter() - def __iter__(self): + @property + def first(self): + """First element in the generator.""" + return next(iter(self)) + + def _create_cached_iter(self): """Iterate over the provided scenes, caching them for later.""" for scn in self._scene_gen: self._scene_cache.append(scn) yield scn + def __iter__(self): + """Iterate over the provided scenes, caching them for later.""" + idx = 0 + while True: + if idx >= len(self._scene_cache): + try: + scn = next(self._self_iter) + except StopIteration: + return + else: + scn = self._scene_cache[idx] + yield scn + idx += 1 + def __getitem__(self, ds_id): """Get a specific dataset from the scenes.""" if ds_id in self._dataset_idx: @@ -154,6 +173,19 @@ def __init__(self, scenes=None): """ self._scenes = scenes or [] + scenes = iter(self._scenes) + self._scene_gen = _SceneGenerator(iter(scenes)) + # if we were originally given a generator-like object then we want to + # coordinate the loading between _SceneGenerator and _scenes + # otherwise it doesn't really matter and other operations may prefer + # a list + if not isinstance(scenes, (list, tuple)): + self._scenes = iter(self._scene_gen) + + @property + def first_scene(self): + """First Scene of this MultiScene object.""" + return self._scene_gen.first @classmethod def from_files(cls, files_to_sort, reader=None, **kwargs): @@ -173,7 +205,8 @@ def from_files(cls, files_to_sort, reader=None, **kwargs): def __iter__(self): """Iterate over the provided Scenes once.""" - return self.scenes + for scn in self._scenes: + yield scn @property def scenes(self): @@ -225,27 +258,35 @@ def _all_same_area(self, dataset_ids): def all_same_area(self): return self._all_same_area(self.loaded_dataset_ids) - def _gen_load(self, gen, *args, **kwargs): - """Perform a load in a generator so it is delayed.""" + @staticmethod + def _call_scene_func(gen, func_name, create_new_scene, *args, **kwargs): + """Abstract method for running a Scene method on each Scene.""" for scn in gen: - scn.load(*args, **kwargs) - yield scn + new_scn = getattr(scn, func_name)(*args, **kwargs) + if create_new_scene: + yield new_scn + else: + yield scn + + def _generate_scene_func(self, gen, func_name, create_new_scene, *args, **kwargs): + """Abstract method for running a Scene method on each Scene. + + Additionally, modifies current MultiScene or creates a new one if needed. + """ + new_gen = self._call_scene_func(gen, func_name, create_new_scene, *args, **kwargs) + new_gen = new_gen if self.is_generator else list(new_gen) + if create_new_scene: + return self.__class__(new_gen) + self._scene_gen = _SceneGenerator(new_gen) + self._scenes = iter(self._scene_gen) def load(self, *args, **kwargs): """Load the required datasets from the multiple scenes.""" - scene_gen = self._gen_load(self._scenes, *args, **kwargs) - self._scenes = scene_gen if self.is_generator else list(scene_gen) - - def _gen_resample(self, gen, destination=None, **kwargs): - for scn in gen: - new_scn = scn.resample(destination, **kwargs) - yield new_scn + self._generate_scene_func(self._scenes, 'load', False, *args, **kwargs) def resample(self, destination=None, **kwargs): """Resample the multiscene.""" - new_scenes = self._gen_resample(self._scenes, destination=destination, **kwargs) - new_scenes = new_scenes if self.is_generator else list(new_scenes) - return self.__class__(new_scenes) + return self._generate_scene_func(self._scenes, 'resample', True, destination=destination, **kwargs) def blend(self, blend_function=stack): """Blend the datasets into one scene. @@ -335,8 +376,9 @@ def save_animation(self, filename, datasets=None, fps=10, fill_value=None, if imageio is None: raise ImportError("Missing required 'imageio' library") - scenes = iter(self._scenes) - first_scene = next(scenes) + scene_gen = self._scene_gen + first_scene = self.first_scene + scenes = iter(self._scene_gen) info_scenes = [first_scene] if 'end_time' in filename: # if we need the last scene to generate the filename @@ -344,12 +386,9 @@ def save_animation(self, filename, datasets=None, fps=10, fill_value=None, log.debug("Generating scenes to compute end_time for filename") scenes = list(scenes) info_scenes.append(scenes[-1]) - scene_gen = _SceneGenerator(chain([first_scene], scenes)) - if not self.is_generator: - available_ds = self.loaded_dataset_ids - else: - available_ds = list(first_scene.keys()) + available_ds = [first_scene.datasets.get(ds) for ds in first_scene.wishlist] + available_ds = [ds for ds in available_ds if ds is not None] dataset_ids = datasets or available_ds writers = [] From 87d7ac1dfd0df65810b9c29a15f7571d496d1997 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 25 Jan 2019 12:06:03 -0600 Subject: [PATCH 8/8] Add 'group_keys' to ABI/AHI reader yaml files for group_files --- satpy/etc/readers/abi_l1b.yaml | 2 + satpy/etc/readers/ahi_hrit.yaml | 3 +- satpy/etc/readers/ahi_hsd.yaml | 3 +- satpy/readers/__init__.py | 15 ++++++-- satpy/tests/test_readers.py | 66 ++++++++++++++++++++------------- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/satpy/etc/readers/abi_l1b.yaml b/satpy/etc/readers/abi_l1b.yaml index 56913e79ba..7f7c32c68b 100644 --- a/satpy/etc/readers/abi_l1b.yaml +++ b/satpy/etc/readers/abi_l1b.yaml @@ -10,6 +10,8 @@ reader: sensors: [abi] default_channels: reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'platform_shortname', 'scene_abbr'] file_types: # NOTE: observation_type == product acronym in PUG document diff --git a/satpy/etc/readers/ahi_hrit.yaml b/satpy/etc/readers/ahi_hrit.yaml index db4485d716..49a5aeb2d0 100644 --- a/satpy/etc/readers/ahi_hrit.yaml +++ b/satpy/etc/readers/ahi_hrit.yaml @@ -6,8 +6,9 @@ reader: description: JMA HRIT Reader name: ahi_hrit sensors: [ahi] - default_channels: [] reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'area'] file_types: hrit_b01: diff --git a/satpy/etc/readers/ahi_hsd.yaml b/satpy/etc/readers/ahi_hsd.yaml index fc0fc90847..3821228d9c 100644 --- a/satpy/etc/readers/ahi_hsd.yaml +++ b/satpy/etc/readers/ahi_hsd.yaml @@ -6,7 +6,8 @@ reader: name: ahi_hsd reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader '' sensors: [ahi] - default_datasets: + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'platform_shortname', 'area'] datasets: B01: diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 72e9c90efe..e4e99eb1da 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -409,7 +409,7 @@ def __delitem__(self, key): def group_files(files_to_sort, reader=None, time_threshold=10, - group_keys=('start_time',), ppp_config_dir=None, reader_kwargs=None): + group_keys=None, ppp_config_dir=None, reader_kwargs=None): """Group series of files by file pattern information. By default this will group files by their filename ``start_time`` @@ -434,7 +434,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10, means it is recommended that datetime values should only come from the first key in ``group_keys``. Otherwise, there is a good chance that files will not be grouped properly (datetimes being barely - unequal). Defaults to ``('start_time',)``. + unequal). Defaults to a reader's ``group_keys`` configuration (set + in YAML), otherwise ``('start_time',)``. ppp_config_dir (str): Root usser configuration directory for SatPy. This will be deprecated in the future, but is here for consistency with other SatPy features. @@ -467,6 +468,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10, # raise raise + if group_keys is None: + group_keys = reader_instance.info.get('group_keys', ('start_time',)) file_keys = [] for filetype, filetype_info in reader_instance.sorted_filetype_items(): for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info): @@ -480,14 +483,20 @@ def group_files(files_to_sort, reader=None, time_threshold=10, # use first element of key as time identifier (if datetime type) if prev_key is None: is_new_group = True + prev_key = gk elif isinstance(gk[0], datetime): # datetimes within threshold difference are "the same time" is_new_group = (gk[0] - prev_key[0]) > threshold else: is_new_group = gk[0] != prev_key[0] + # compare keys for those that are found for both the key and + # this is a generator and is not computed until the if statement below + # when we know that `prev_key` is not None + vals_not_equal = (this_val != prev_val for this_val, prev_val in zip(gk[1:], prev_key[1:]) + if this_val is not None and prev_val is not None) # if this is a new group based on the first element - if is_new_group or gk[1:] != prev_key[1:]: + if is_new_group or any(vals_not_equal): file_groups[gk] = [f] prev_key = gk else: diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index d8bbd7b6e1..7cb2c2e3d4 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -383,12 +383,13 @@ def test_reader_name(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr') self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_other_name(self): @@ -396,12 +397,13 @@ def test_reader_other_name(self): from satpy.readers import find_files_and_readers fn = 'S_NWC_CPP_npp_32505_20180204T1114116Z_20180204T1128227Z.nc' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='nwcsaf-pps_nc') self.assertListEqual(list(ri.keys()), ['nwcsaf-pps_nc']) self.assertListEqual(ri['nwcsaf-pps_nc'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_start_end_time(self): @@ -410,7 +412,7 @@ def test_reader_name_matched_start_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', start_time=datetime(2012, 2, 25, 18, 0, 0), @@ -419,6 +421,7 @@ def test_reader_name_matched_start_end_time(self): self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_start_time(self): @@ -430,12 +433,13 @@ def test_reader_name_matched_start_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', start_time=datetime(2012, 2, 25, 18, 1, 30)) self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_end_time(self): @@ -448,12 +452,13 @@ def test_reader_name_matched_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', end_time=datetime(2012, 2, 25, 18, 1, 30)) self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_unmatched_start_end_time(self): @@ -462,7 +467,7 @@ def test_reader_name_unmatched_start_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: self.assertRaises(ValueError, find_files_and_readers, reader='viirs_sdr', @@ -470,6 +475,7 @@ def test_reader_name_unmatched_start_end_time(self): end_time=datetime(2012, 2, 26, 19, 0, 0), ) finally: + test_file.close() os.remove(fn) def test_no_parameters(self): @@ -477,12 +483,13 @@ def test_no_parameters(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers() self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_bad_sensor(self): @@ -490,11 +497,11 @@ def test_bad_sensor(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: - self.assertRaises(ValueError, find_files_and_readers, - sensor='i_dont_exist') + self.assertRaises(ValueError, find_files_and_readers, sensor='i_dont_exist') finally: + test_file.close() os.remove(fn) def test_sensor(self): @@ -502,7 +509,7 @@ def test_sensor(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: # we can't easily know how many readers satpy has that support # 'viirs' so we just pass it and hope that this works @@ -510,6 +517,7 @@ def test_sensor(self): self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_sensor_no_files(self): @@ -517,8 +525,7 @@ def test_sensor_no_files(self): from satpy.readers import find_files_and_readers # we can't easily know how many readers satpy has that support # 'viirs' so we just pass it and hope that this works - self.assertRaises(ValueError, find_files_and_readers, - sensor='viirs') + self.assertRaises(ValueError, find_files_and_readers, sensor='viirs') def test_reader_load_failed(self): """Test that an exception is raised when a reader can't be loaded.""" @@ -621,34 +628,37 @@ def test_default_behavior(self): """Test the default behavior with the 'abi_l1b' reader.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b') - self.assertTrue(len(groups), 6) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(6, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) def test_non_datetime_group_key(self): """Test what happens when the start_time isn't used for grouping.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b', group_keys=('platform_shortname',)) - self.assertTrue(len(groups), 1) - self.assertTrue(len(groups[0]['abi_l1b']), 6) + self.assertEqual(1, len(groups)) + self.assertEqual(12, len(groups[0]['abi_l1b'])) def test_large_time_threshold(self): """Test what happens when the time threshold holds multiple files.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b', time_threshold=60*8) - self.assertTrue(len(groups), 3) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(3, len(groups)) + self.assertEqual(4, len(groups[0]['abi_l1b'])) def test_two_instruments_files(self): - """Test the default behavior when two instruments files are provided. + """Test the behavior when two instruments files are provided. This is undesired from a user point of view since we don't want G16 - and G17 files in the same Scene. + and G17 files in the same Scene. Readers (like abi_l1b) are or can be + configured to have specific group keys for handling these situations. + Due to that this test forces the fallback group keys of + ('start_time',). """ from satpy.readers import group_files - groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b') - self.assertTrue(len(groups), 6) - self.assertTrue(len(groups[0]['abi_l1b']), 4) + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', group_keys=('start_time',)) + self.assertEqual(6, len(groups)) + self.assertEqual(4, len(groups[0]['abi_l1b'])) def test_two_instruments_files_split(self): """Test the default behavior when two instruments files are provided and split. @@ -660,8 +670,12 @@ def test_two_instruments_files_split(self): from satpy.readers import group_files groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', group_keys=('start_time', 'platform_shortname')) - self.assertTrue(len(groups), 12) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(12, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) + # default for abi_l1b should also behave like this + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b') + self.assertEqual(12, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) def suite():