pytroll · djhoese · Jan 29, 2019 · Jan 13, 2019 · Jan 13, 2019 · Jan 13, 2019
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -16,6 +16,7 @@
 
 import os
 import sys
+from datetime import datetime
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -75,7 +76,7 @@ def __getattr__(cls, name):
 
 # General information about the project.
 project = u'SatPy'
-copyright = u'2009-2016, The PyTroll Team'
+copyright = u'2009-{}, The PyTroll Team'.format(datetime.utcnow().strftime("%Y"))
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the

diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst
@@ -41,10 +41,27 @@ The MultiScene can take "frames" of data and join them together in a single
 animation movie file. Saving animations required the `imageio` python library
 and for most available formats the ``ffmpeg`` command line tool suite should
 also be installed. The below example saves a series of GOES-EAST ABI channel
-1 and channel 2 frames to MP4 movie files. Note that currently there is no
-easy way to map files from multiple time steps/orbits in to individual Scene
-objects. The `glob` function and for loops are used to group files into Scene
-objects that, if used individually, could load the data we want.
+1 and channel 2 frames to MP4 movie files. We can use the
+:meth:`MultiScene.from_files <satpy.multiscene.MultiScene.from_files>` class
+method to create a `MultiScene` from a series of files. This uses the
+:func:`~satpy.readers.group_files` utility function to group files by start
+time.
+
+    >>> from satpy import Scene, MultiScene
+    >>> from glob import glob
+    >>> mscn = MultiScene.from_files(glob('/data/abi/day_1/*C0[12]*.nc'), reader='abi_l1b')
+    >>> mscn.load(['C01', 'C02'])
+    >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2)
+
+.. versionadded:: 0.12
+
+    The ``from_files`` and ``group_files`` functions were added in SatPy 0.12.
+    See below for an alternative solution.
+
+For older versions of SatPy we can manually create the `Scene` objects used.
+The :func:`~glob.glob` function and for loops are used to group files into
+Scene objects that, if used individually, could load the data we want. The
+code below is equivalent to the ``from_files`` code above:
 
     >>> from satpy import Scene, MultiScene
     >>> from glob import glob

diff --git a/satpy/etc/readers/abi_l1b.yaml b/satpy/etc/readers/abi_l1b.yaml
@@ -10,6 +10,8 @@ reader:
   sensors: [abi]
   default_channels:
   reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
+  # file pattern keys to sort files by with 'satpy.utils.group_files'
+  group_keys: ['start_time', 'platform_shortname', 'scene_abbr']
 
 file_types:
     # NOTE: observation_type == product acronym in PUG document

diff --git a/satpy/etc/readers/ahi_hrit.yaml b/satpy/etc/readers/ahi_hrit.yaml
@@ -6,8 +6,9 @@ reader:
   description: JMA HRIT Reader
   name: ahi_hrit
   sensors: [ahi]
-  default_channels: []
   reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
+  # file pattern keys to sort files by with 'satpy.utils.group_files'
+  group_keys: ['start_time', 'area']
 
 file_types:
     hrit_b01:

diff --git a/satpy/etc/readers/ahi_hsd.yaml b/satpy/etc/readers/ahi_hsd.yaml
@@ -6,7 +6,8 @@ reader:
   name: ahi_hsd
   reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader ''
   sensors: [ahi]
-  default_datasets:
+  # file pattern keys to sort files by with 'satpy.utils.group_files'
+  group_keys: ['start_time', 'platform_shortname', 'area']
 
 datasets:
   B01:

diff --git a/satpy/multiscene.py b/satpy/multiscene.py
@@ -108,14 +108,33 @@ def __init__(self, scene_gen):
         self._scene_cache = []
         self._dataset_idx = {}
         # this class itself is not an iterator, make one
-        self._self_iter = iter(self)
+        self._self_iter = self._create_cached_iter()
 
-    def __iter__(self):
+    @property
+    def first(self):
+        """First element in the generator."""
+        return next(iter(self))
+
+    def _create_cached_iter(self):
         """Iterate over the provided scenes, caching them for later."""
         for scn in self._scene_gen:
             self._scene_cache.append(scn)
             yield scn
 
+    def __iter__(self):
+        """Iterate over the provided scenes, caching them for later."""
+        idx = 0
+        while True:
+            if idx >= len(self._scene_cache):
+                try:
+                    scn = next(self._self_iter)
+                except StopIteration:
+                    return
+            else:
+                scn = self._scene_cache[idx]
+            yield scn
+            idx += 1
+
     def __getitem__(self, ds_id):
         """Get a specific dataset from the scenes."""
         if ds_id in self._dataset_idx:
@@ -155,10 +174,40 @@ def __init__(self, scenes=None):
 
         """
         self._scenes = scenes or []
+        scenes = iter(self._scenes)
+        self._scene_gen = _SceneGenerator(iter(scenes))
+        # if we were originally given a generator-like object then we want to
+        # coordinate the loading between _SceneGenerator and _scenes
+        # otherwise it doesn't really matter and other operations may prefer
+        # a list
+        if not isinstance(scenes, (list, tuple)):
+            self._scenes = iter(self._scene_gen)
+
+    @property
+    def first_scene(self):
+        """First Scene of this MultiScene object."""
+        return self._scene_gen.first
+
+    @classmethod
+    def from_files(cls, files_to_sort, reader=None, **kwargs):
+        """Create multiple Scene objects from multiple files.
+
+        This uses the :func:`satpy.readers.group_files` function to group
+        files. See this function for more details on possible keyword
+        arguments.
+
+        .. versionadded:: 0.12
+
+        """
+        from satpy.readers import group_files
+        file_groups = group_files(files_to_sort, reader=reader, **kwargs)
+        scenes = (Scene(filenames=fg) for fg in file_groups)
+        return cls(scenes)
 
     def __iter__(self):
         """Iterate over the provided Scenes once."""
-        return self.scenes
+        for scn in self._scenes:
+            yield scn
 
     @property
     def scenes(self):
@@ -210,27 +259,35 @@ def _all_same_area(self, dataset_ids):
     def all_same_area(self):
         return self._all_same_area(self.loaded_dataset_ids)
 
-    def _gen_load(self, gen, *args, **kwargs):
-        """Perform a load in a generator so it is delayed."""
+    @staticmethod
+    def _call_scene_func(gen, func_name, create_new_scene, *args, **kwargs):
+        """Abstract method for running a Scene method on each Scene."""
         for scn in gen:
-            scn.load(*args, **kwargs)
-            yield scn
+            new_scn = getattr(scn, func_name)(*args, **kwargs)
+            if create_new_scene:
+                yield new_scn
+            else:
+                yield scn
+
+    def _generate_scene_func(self, gen, func_name, create_new_scene, *args, **kwargs):
+        """Abstract method for running a Scene method on each Scene.
+
+        Additionally, modifies current MultiScene or creates a new one if needed.
+        """
+        new_gen = self._call_scene_func(gen, func_name, create_new_scene, *args, **kwargs)
+        new_gen = new_gen if self.is_generator else list(new_gen)
+        if create_new_scene:
+            return self.__class__(new_gen)
+        self._scene_gen = _SceneGenerator(new_gen)
+        self._scenes = iter(self._scene_gen)
 
     def load(self, *args, **kwargs):
         """Load the required datasets from the multiple scenes."""
-        scene_gen = self._gen_load(self._scenes, *args, **kwargs)
-        self._scenes = scene_gen if self.is_generator else list(scene_gen)
-
-    def _gen_resample(self, gen, destination=None, **kwargs):
-        for scn in gen:
-            new_scn = scn.resample(destination, **kwargs)
-            yield new_scn
+        self._generate_scene_func(self._scenes, 'load', False, *args, **kwargs)
 
     def resample(self, destination=None, **kwargs):
         """Resample the multiscene."""
-        new_scenes = self._gen_resample(self._scenes, destination=destination, **kwargs)
-        new_scenes = new_scenes if self.is_generator else list(new_scenes)
-        return self.__class__(new_scenes)
+        return self._generate_scene_func(self._scenes, 'resample', True, destination=destination, **kwargs)
 
     def blend(self, blend_function=stack):
         """Blend the datasets into one scene.
@@ -320,21 +377,19 @@ def save_animation(self, filename, datasets=None, fps=10, fill_value=None,
         if imageio is None:
             raise ImportError("Missing required 'imageio' library")
 
-        scenes = iter(self._scenes)
-        first_scene = next(scenes)
+        scene_gen = self._scene_gen
+        first_scene = self.first_scene
+        scenes = iter(self._scene_gen)
         info_scenes = [first_scene]
         if 'end_time' in filename:
             # if we need the last scene to generate the filename
             # then compute all the scenes so we can figure it out
             log.debug("Generating scenes to compute end_time for filename")
             scenes = list(scenes)
             info_scenes.append(scenes[-1])
-        scene_gen = _SceneGenerator(chain([first_scene], scenes))
 
-        if not self.is_generator:
-            available_ds = self.loaded_dataset_ids
-        else:
-            available_ds = list(first_scene.keys())
+        available_ds = [first_scene.datasets.get(ds) for ds in first_scene.wishlist]
+        available_ds = [ds for ds in available_ds if ds is not None]
         dataset_ids = datasets or available_ds
 
         writers = []

diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py
@@ -26,6 +26,7 @@
 import numbers
 import os
 import warnings
+from datetime import datetime, timedelta
 
 import six
 import yaml
@@ -407,6 +408,104 @@ def __delitem__(self, key):
             return super(DatasetDict, self).__delitem__(key)
 
 
+def group_files(files_to_sort, reader=None, time_threshold=10,
+                group_keys=None, ppp_config_dir=None, reader_kwargs=None):
+    """Group series of files by file pattern information.
+
+    By default this will group files by their filename ``start_time``
+    assuming it exists in the pattern. By passing the individual
+    dictionaries returned by this function to the Scene classes'
+    ``filenames``, a series `Scene` objects can be easily created.
+
+    .. versionadded:: 0.12
+
+    Args:
+        files_to_sort (iterable): File paths to sort in to group
+        reader (str): Reader whose file patterns should be used to sort files.
+            This
+        time_threshold (int): Number of seconds used to consider time elements
+            in a group as being equal. For example, if the 'start_time' item
+            is used to group files then any time within `time_threshold`
+            seconds of the first file's 'start_time' will be seen as occurring
+            at the same time.
+        group_keys (list or tuple): File pattern information to use to group
+            files. Keys are sorted in order and only the first key is used when
+            comparing datetime elements with `time_threshold` (see above). This
+            means it is recommended that datetime values should only come from
+            the first key in ``group_keys``. Otherwise, there is a good chance
+            that files will not be grouped properly (datetimes being barely
+            unequal). Defaults to a reader's ``group_keys`` configuration (set
+            in YAML), otherwise ``('start_time',)``.
+        ppp_config_dir (str): Root usser configuration directory for SatPy.
+            This will be deprecated in the future, but is here for consistency
+            with other SatPy features.
+        reader_kwargs (dict): Additional keyword arguments to pass to reader
+            creation.
+
+    Returns:
+        List of dictionaries mapping 'reader' to a list of filenames.
+        Each of these dictionaries can be passed as ``filenames`` to
+        a `Scene` object.
+
+    """
+    # FUTURE: Find the best reader for each filename using `find_files_and_readers`
+    if reader is None:
+        raise ValueError("'reader' keyword argument is required.")
+    elif not isinstance(reader, (list, tuple)):
+        reader = [reader]
+
+    # FUTURE: Handle multiple readers
+    reader = reader[0]
+    reader_configs = list(configs_for_reader(reader, ppp_config_dir))[0]
+    reader_kwargs = reader_kwargs or {}
+    try:
+        reader_instance = load_reader(reader_configs, **reader_kwargs)
+    except (KeyError, IOError, yaml.YAMLError) as err:
+        LOG.info('Cannot use %s', str(reader_configs))
+        LOG.debug(str(err))
+        # if reader and (isinstance(reader, str) or len(reader) == 1):
+        #     # if it is a single reader then give a more usable error
+        #     raise
+        raise
+
+    if group_keys is None:
+        group_keys = reader_instance.info.get('group_keys', ('start_time',))
+    file_keys = []
+    for filetype, filetype_info in reader_instance.sorted_filetype_items():
+        for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info):
+            group_key = tuple(file_info.get(k) for k in group_keys)
+            file_keys.append((group_key, f))
+
+    prev_key = None
+    threshold = timedelta(seconds=time_threshold)
+    file_groups = {}
+    for gk, f in sorted(file_keys):
+        # use first element of key as time identifier (if datetime type)
+        if prev_key is None:
+            is_new_group = True
+            prev_key = gk
+        elif isinstance(gk[0], datetime):
+            # datetimes within threshold difference are "the same time"
+            is_new_group = (gk[0] - prev_key[0]) > threshold
+        else:
+            is_new_group = gk[0] != prev_key[0]
+
+        # compare keys for those that are found for both the key and
+        # this is a generator and is not computed until the if statement below
+        # when we know that `prev_key` is not None
+        vals_not_equal = (this_val != prev_val for this_val, prev_val in zip(gk[1:], prev_key[1:])
+                          if this_val is not None and prev_val is not None)
+        # if this is a new group based on the first element
+        if is_new_group or any(vals_not_equal):
+            file_groups[gk] = [f]
+            prev_key = gk
+        else:
+            file_groups[prev_key].append(f)
+    sorted_group_keys = sorted(file_groups)
+    # passable to Scene as 'filenames'
+    return [{reader: file_groups[group_key]} for group_key in sorted_group_keys]
+
+
 def read_reader_config(config_files, loader=yaml.Loader):
     """Read the reader `config_files` and return the info extracted."""
 
@@ -427,13 +526,9 @@ def read_reader_config(config_files, loader=yaml.Loader):
 
 
 def load_reader(reader_configs, **reader_kwargs):
-    """Import and setup the reader from *reader_info*
-    """
+    """Import and setup the reader from *reader_info*."""
     reader_info = read_reader_config(reader_configs)
-    reader_instance = reader_info['reader'](
-        config_files=reader_configs,
-        **reader_kwargs
-    )
+    reader_instance = reader_info['reader'](config_files=reader_configs, **reader_kwargs)
     return reader_instance
 
 

diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py
@@ -134,6 +134,23 @@ def test_properties(self):
         self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id})
         self.assertFalse(mscn.all_same_area)
 
+    def test_from_files(self):
+        """Test creating a multiscene from multiple files."""
+        from satpy import MultiScene
+        input_files = [
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171502203_e20171171504576_c20171171505018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171507203_e20171171509576_c20171171510018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171512203_e20171171514576_c20171171515017.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171517203_e20171171519577_c20171171520019.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171522203_e20171171524576_c20171171525020.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171527203_e20171171529576_c20171171530017.nc",
+        ]
+        with mock.patch('satpy.multiscene.Scene') as scn_mock:
+            mscn = MultiScene.from_files(input_files, reader='abi_l1b')
+            self.assertTrue(len(mscn.scenes), 6)
+            calls = [mock.call(filenames={'abi_l1b': [in_file]}) for in_file in input_files]
+            scn_mock.assert_has_calls(calls)
+
 
 class TestMultiSceneSave(unittest.TestCase):
     """Test saving a MultiScene to various formats."""