pytroll · djhoese · Jan 29, 2019 · Jan 13, 2019 · Jan 13, 2019 · Jan 13, 2019
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -16,6 +16,7 @@
 
 import os
 import sys
+from datetime import datetime
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -75,7 +76,7 @@ def __getattr__(cls, name):
 
 # General information about the project.
 project = u'SatPy'
-copyright = u'2009-2016, The PyTroll Team'
+copyright = u'2009-{}, The PyTroll Team'.format(datetime.utcnow().strftime("%Y"))
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the

diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst
@@ -41,10 +41,27 @@ The MultiScene can take "frames" of data and join them together in a single
 animation movie file. Saving animations required the `imageio` python library
 and for most available formats the ``ffmpeg`` command line tool suite should
 also be installed. The below example saves a series of GOES-EAST ABI channel
-1 and channel 2 frames to MP4 movie files. Note that currently there is no
-easy way to map files from multiple time steps/orbits in to individual Scene
-objects. The `glob` function and for loops are used to group files into Scene
-objects that, if used individually, could load the data we want.
+1 and channel 2 frames to MP4 movie files. We can use the
+:meth:`MultiScene.from_files <satpy.multiscene.MultiScene.from_files>` class
+method to create a `MultiScene` from a series of files. This uses the
+:func:`~satpy.readers.group_files` utility function to group files by start
+time.
+
+    >>> from satpy import Scene, MultiScene
+    >>> from glob import glob
+    >>> mscn = MultiScene.from_files(glob('/data/abi/day_1/*C0[12]*.nc'), reader='abi_l1b')
+    >>> mscn.load(['C01', 'C02'])
+    >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2)
+
+.. versionadded:: 0.12
+
+    The ``from_files`` and ``group_files`` functions were added in SatPy 0.12.
+    See below for an alternative solution.
+
+For older versions of SatPy we can manually create the `Scene` objects used.
+The :func:`~glob.glob` function and for loops are used to group files into
+Scene objects that, if used individually, could load the data we want. The
+code below is equivalent to the ``from_files`` code above:
 
     >>> from satpy import Scene, MultiScene
     >>> from glob import glob

diff --git a/satpy/multiscene.py b/satpy/multiscene.py
@@ -155,6 +155,22 @@ def __init__(self, scenes=None):
         """
         self._scenes = scenes or []
 
+    @classmethod
+    def from_files(cls, files_to_sort, reader=None, **kwargs):
+        """Create multiple Scene objects from multiple files.
+
+        This uses the :func:`satpy.readers.group_files` function to group
+        files. See this function for more details on possible keyword
+        arguments.
+
+        .. versionadded:: 0.12
+
+        """
+        from satpy.readers import group_files
+        file_groups = group_files(files_to_sort, reader=reader, **kwargs)
+        scenes = [Scene(filenames=fg) for fg in file_groups]
+        return cls(scenes)
+
     def __iter__(self):
         """Iterate over the provided Scenes once."""
         return self.scenes

diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py
@@ -26,6 +26,7 @@
 import numbers
 import os
 import warnings
+from datetime import datetime, timedelta
 
 import six
 import yaml
@@ -407,6 +408,95 @@ def __delitem__(self, key):
             return super(DatasetDict, self).__delitem__(key)
 
 
+def group_files(files_to_sort, reader=None, time_threshold=10,
+                group_keys=('start_time',), ppp_config_dir=None, reader_kwargs=None):
+    """Group series of files by file pattern information.
+
+    By default this will group files by their filename ``start_time``
+    assuming it exists in the pattern. By passing the individual
+    dictionaries returned by this function to the Scene classes'
+    ``filenames``, a series `Scene` objects can be easily created.
+
+    .. versionadded:: 0.12
+
+    Args:
+        files_to_sort (iterable): File paths to sort in to group
+        reader (str): Reader whose file patterns should be used to sort files.
+            This
+        time_threshold (int): Number of seconds used to consider time elements
+            in a group as being equal. For example, if the 'start_time' item
+            is used to group files then any time within `time_threshold`
+            seconds of the first file's 'start_time' will be seen as occurring
+            at the same time.
+        group_keys (list or tuple): File pattern information to use to group
+            files. Keys are sorted in order and only the first key is used when
+            comparing datetime elements with `time_threshold` (see above). This
+            means it is recommended that datetime values should only come from
+            the first key in ``group_keys``. Otherwise, there is a good chance
+            that files will not be grouped properly (datetimes being barely
+            unequal). Defaults to ``('start_time',)``.
+        ppp_config_dir (str): Root usser configuration directory for SatPy.
+            This will be deprecated in the future, but is here for consistency
+            with other SatPy features.
+        reader_kwargs (dict): Additional keyword arguments to pass to reader
+            creation.
+
+    Returns:
+        List of dictionaries mapping 'reader' to a list of filenames.
+        Each of these dictionaries can be passed as ``filenames`` to
+        a `Scene` object.
+
+    """
+    # FUTURE: Find the best reader for each filename using `find_files_and_readers`
+    if reader is None:
+        raise ValueError("'reader' keyword argument is required.")
+    elif not isinstance(reader, (list, tuple)):
+        reader = [reader]
+
+    # FUTURE: Handle multiple readers
+    reader = reader[0]
+    reader_configs = list(configs_for_reader(reader, ppp_config_dir))[0]
+    reader_kwargs = reader_kwargs or {}
+    try:
+        reader_instance = load_reader(reader_configs, **reader_kwargs)
+    except (KeyError, IOError, yaml.YAMLError) as err:
+        LOG.info('Cannot use %s', str(reader_configs))
+        LOG.debug(str(err))
+        # if reader and (isinstance(reader, str) or len(reader) == 1):
+        #     # if it is a single reader then give a more usable error
+        #     raise
+        raise
+
+    file_keys = []
+    for filetype, filetype_info in reader_instance.sorted_filetype_items():
+        for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info):
+            group_key = tuple(file_info.get(k) for k in group_keys)
+            file_keys.append((group_key, f))
+
+    prev_key = None
+    threshold = timedelta(seconds=time_threshold)
+    file_groups = {}
+    for gk, f in sorted(file_keys):
+        # use first element of key as time identifier (if datetime type)
+        if prev_key is None:
+            is_new_group = True
+        elif isinstance(gk[0], datetime):
+            # datetimes within threshold difference are "the same time"
+            is_new_group = (gk[0] - prev_key[0]) > threshold
+        else:
+            is_new_group = gk[0] != prev_key[0]
+
+        # if this is a new group based on the first element
+        if is_new_group or gk[1:] != prev_key[1:]:
+            file_groups[gk] = [f]
+            prev_key = gk
+        else:
+            file_groups[prev_key].append(f)
+    sorted_group_keys = sorted(file_groups)
+    # passable to Scene as 'filenames'
+    return [{reader: file_groups[group_key]} for group_key in sorted_group_keys]
+
+
 def read_reader_config(config_files, loader=yaml.Loader):
     """Read the reader `config_files` and return the info extracted."""
 
@@ -427,13 +517,9 @@ def read_reader_config(config_files, loader=yaml.Loader):
 
 
 def load_reader(reader_configs, **reader_kwargs):
-    """Import and setup the reader from *reader_info*
-    """
+    """Import and setup the reader from *reader_info*."""
     reader_info = read_reader_config(reader_configs)
-    reader_instance = reader_info['reader'](
-        config_files=reader_configs,
-        **reader_kwargs
-    )
+    reader_instance = reader_info['reader'](config_files=reader_configs, **reader_kwargs)
     return reader_instance
 
 

diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py
@@ -134,6 +134,23 @@ def test_properties(self):
         self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id})
         self.assertFalse(mscn.all_same_area)
 
+    def test_from_files(self):
+        """Test creating a multiscene from multiple files."""
+        from satpy import MultiScene
+        input_files = [
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171502203_e20171171504576_c20171171505018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171507203_e20171171509576_c20171171510018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171512203_e20171171514576_c20171171515017.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171517203_e20171171519577_c20171171520019.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171522203_e20171171524576_c20171171525020.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171527203_e20171171529576_c20171171530017.nc",
+        ]
+        with mock.patch('satpy.multiscene.Scene') as scn_mock:
+            mscn = MultiScene.from_files(input_files, reader='abi_l1b')
+            calls = [mock.call(filenames={'abi_l1b': [in_file]}) for in_file in input_files]
+            scn_mock.assert_has_calls(calls)
+            self.assertTrue(len(mscn.scenes), 6)
+
 
 class TestMultiSceneSave(unittest.TestCase):
     """Test saving a MultiScene to various formats."""

diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py
@@ -527,8 +527,7 @@ def test_reader_load_failed(self):
         # touch the file so it exists on disk
         with mock.patch('yaml.load') as load:
             load.side_effect = yaml.YAMLError("Import problems")
-            self.assertRaises(yaml.YAMLError, find_files_and_readers,
-                              reader='viirs_sdr')
+            self.assertRaises(yaml.YAMLError, find_files_and_readers, reader='viirs_sdr')
 
     def test_old_reader_name_mapping(self):
         """Test that requesting old reader names raises a warning."""
@@ -582,15 +581,98 @@ def test_available_readers(self):
             self.assertIn('name', reader_info)
 
 
+class TestGroupFiles(unittest.TestCase):
+    """Test the 'group_files' utility function."""
+
+    def setUp(self):
+        """Set up test filenames to use."""
+        input_files = [
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171502203_e20171171504576_c20171171505018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171507203_e20171171509576_c20171171510018.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171512203_e20171171514576_c20171171515017.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171517203_e20171171519577_c20171171520019.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171522203_e20171171524576_c20171171525020.nc",
+            "OR_ABI-L1b-RadC-M3C01_G16_s20171171527203_e20171171529576_c20171171530017.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171502203_e20171171504576_c20171171505008.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171507203_e20171171509576_c20171171510012.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171512203_e20171171514576_c20171171515007.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171517203_e20171171519576_c20171171520010.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171522203_e20171171524576_c20171171525008.nc",
+            "OR_ABI-L1b-RadC-M3C02_G16_s20171171527203_e20171171529576_c20171171530008.nc",
+        ]
+        self.g16_files = input_files
+        self.g17_files = [x.replace('G16', 'G17') for x in input_files]
+
+    def test_no_reader(self):
+        """Test that reader must be provided."""
+        from satpy.readers import group_files
+        self.assertRaises(ValueError, group_files, [])
+
+    def test_bad_reader(self):
+        """Test that reader not existing causes an error."""
+        from satpy.readers import group_files
+        import yaml
+        # touch the file so it exists on disk
+        with mock.patch('yaml.load') as load:
+            load.side_effect = yaml.YAMLError("Import problems")
+            self.assertRaises(yaml.YAMLError, group_files, [], reader='abi_l1b')
+
+    def test_default_behavior(self):
+        """Test the default behavior with the 'abi_l1b' reader."""
+        from satpy.readers import group_files
+        groups = group_files(self.g16_files, reader='abi_l1b')
+        self.assertTrue(len(groups), 6)
+        self.assertTrue(len(groups[0]['abi_l1b']), 2)
+
+    def test_non_datetime_group_key(self):
+        """Test what happens when the start_time isn't used for grouping."""
+        from satpy.readers import group_files
+        groups = group_files(self.g16_files, reader='abi_l1b', group_keys=('platform_shortname',))
+        self.assertTrue(len(groups), 1)
+        self.assertTrue(len(groups[0]['abi_l1b']), 6)
+
+    def test_large_time_threshold(self):
+        """Test what happens when the time threshold holds multiple files."""
+        from satpy.readers import group_files
+        groups = group_files(self.g16_files, reader='abi_l1b', time_threshold=60*8)
+        self.assertTrue(len(groups), 3)
+        self.assertTrue(len(groups[0]['abi_l1b']), 2)
+
+    def test_two_instruments_files(self):
+        """Test the default behavior when two instruments files are provided.
+
+        This is undesired from a user point of view since we don't want G16
+        and G17 files in the same Scene.
+
+        """
+        from satpy.readers import group_files
+        groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b')
+        self.assertTrue(len(groups), 6)
+        self.assertTrue(len(groups[0]['abi_l1b']), 4)
+
+    def test_two_instruments_files_split(self):
+        """Test the default behavior when two instruments files are provided and split.
+
+        Tell the sorting to include the platform identifier as another field
+        to use for grouping.
+
+        """
+        from satpy.readers import group_files
+        groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b',
+                             group_keys=('start_time', 'platform_shortname'))
+        self.assertTrue(len(groups), 12)
+        self.assertTrue(len(groups[0]['abi_l1b']), 2)
+
+
 def suite():
-    """The test suite for test_scene.
-    """
+    """The test suite for test_readers."""
     loader = unittest.TestLoader()
     mysuite = unittest.TestSuite()
     mysuite.addTest(loader.loadTestsFromTestCase(TestDatasetDict))
     mysuite.addTest(loader.loadTestsFromTestCase(TestReaderLoader))
     mysuite.addTest(loader.loadTestsFromTestCase(TestFindFilesAndReaders))
     mysuite.addTest(loader.loadTestsFromTestCase(TestYAMLFiles))
+    mysuite.addTest(loader.loadTestsFromTestCase(TestGroupFiles))
 
     return mysuite