diff --git a/satpy/etc/readers/abi_l1b.yaml b/satpy/etc/readers/abi_l1b.yaml index 56913e79ba..7f7c32c68b 100644 --- a/satpy/etc/readers/abi_l1b.yaml +++ b/satpy/etc/readers/abi_l1b.yaml @@ -10,6 +10,8 @@ reader: sensors: [abi] default_channels: reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'platform_shortname', 'scene_abbr'] file_types: # NOTE: observation_type == product acronym in PUG document diff --git a/satpy/etc/readers/ahi_hrit.yaml b/satpy/etc/readers/ahi_hrit.yaml index db4485d716..49a5aeb2d0 100644 --- a/satpy/etc/readers/ahi_hrit.yaml +++ b/satpy/etc/readers/ahi_hrit.yaml @@ -6,8 +6,9 @@ reader: description: JMA HRIT Reader name: ahi_hrit sensors: [ahi] - default_channels: [] reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'area'] file_types: hrit_b01: diff --git a/satpy/etc/readers/ahi_hsd.yaml b/satpy/etc/readers/ahi_hsd.yaml index fc0fc90847..3821228d9c 100644 --- a/satpy/etc/readers/ahi_hsd.yaml +++ b/satpy/etc/readers/ahi_hsd.yaml @@ -6,7 +6,8 @@ reader: name: ahi_hsd reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader '' sensors: [ahi] - default_datasets: + # file pattern keys to sort files by with 'satpy.utils.group_files' + group_keys: ['start_time', 'platform_shortname', 'area'] datasets: B01: diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 72e9c90efe..e4e99eb1da 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -409,7 +409,7 @@ def __delitem__(self, key): def group_files(files_to_sort, reader=None, time_threshold=10, - group_keys=('start_time',), ppp_config_dir=None, reader_kwargs=None): + group_keys=None, ppp_config_dir=None, reader_kwargs=None): """Group series of files by file pattern information. By default this will group files by their filename ``start_time`` @@ -434,7 +434,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10, means it is recommended that datetime values should only come from the first key in ``group_keys``. Otherwise, there is a good chance that files will not be grouped properly (datetimes being barely - unequal). Defaults to ``('start_time',)``. + unequal). Defaults to a reader's ``group_keys`` configuration (set + in YAML), otherwise ``('start_time',)``. ppp_config_dir (str): Root usser configuration directory for SatPy. This will be deprecated in the future, but is here for consistency with other SatPy features. @@ -467,6 +468,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10, # raise raise + if group_keys is None: + group_keys = reader_instance.info.get('group_keys', ('start_time',)) file_keys = [] for filetype, filetype_info in reader_instance.sorted_filetype_items(): for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info): @@ -480,14 +483,20 @@ def group_files(files_to_sort, reader=None, time_threshold=10, # use first element of key as time identifier (if datetime type) if prev_key is None: is_new_group = True + prev_key = gk elif isinstance(gk[0], datetime): # datetimes within threshold difference are "the same time" is_new_group = (gk[0] - prev_key[0]) > threshold else: is_new_group = gk[0] != prev_key[0] + # compare keys for those that are found for both the key and + # this is a generator and is not computed until the if statement below + # when we know that `prev_key` is not None + vals_not_equal = (this_val != prev_val for this_val, prev_val in zip(gk[1:], prev_key[1:]) + if this_val is not None and prev_val is not None) # if this is a new group based on the first element - if is_new_group or gk[1:] != prev_key[1:]: + if is_new_group or any(vals_not_equal): file_groups[gk] = [f] prev_key = gk else: diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index d8bbd7b6e1..7cb2c2e3d4 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -383,12 +383,13 @@ def test_reader_name(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr') self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_other_name(self): @@ -396,12 +397,13 @@ def test_reader_other_name(self): from satpy.readers import find_files_and_readers fn = 'S_NWC_CPP_npp_32505_20180204T1114116Z_20180204T1128227Z.nc' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='nwcsaf-pps_nc') self.assertListEqual(list(ri.keys()), ['nwcsaf-pps_nc']) self.assertListEqual(ri['nwcsaf-pps_nc'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_start_end_time(self): @@ -410,7 +412,7 @@ def test_reader_name_matched_start_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', start_time=datetime(2012, 2, 25, 18, 0, 0), @@ -419,6 +421,7 @@ def test_reader_name_matched_start_end_time(self): self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_start_time(self): @@ -430,12 +433,13 @@ def test_reader_name_matched_start_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', start_time=datetime(2012, 2, 25, 18, 1, 30)) self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_matched_end_time(self): @@ -448,12 +452,13 @@ def test_reader_name_matched_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers(reader='viirs_sdr', end_time=datetime(2012, 2, 25, 18, 1, 30)) self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_reader_name_unmatched_start_end_time(self): @@ -462,7 +467,7 @@ def test_reader_name_unmatched_start_end_time(self): from datetime import datetime fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: self.assertRaises(ValueError, find_files_and_readers, reader='viirs_sdr', @@ -470,6 +475,7 @@ def test_reader_name_unmatched_start_end_time(self): end_time=datetime(2012, 2, 26, 19, 0, 0), ) finally: + test_file.close() os.remove(fn) def test_no_parameters(self): @@ -477,12 +483,13 @@ def test_no_parameters(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: ri = find_files_and_readers() self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_bad_sensor(self): @@ -490,11 +497,11 @@ def test_bad_sensor(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: - self.assertRaises(ValueError, find_files_and_readers, - sensor='i_dont_exist') + self.assertRaises(ValueError, find_files_and_readers, sensor='i_dont_exist') finally: + test_file.close() os.remove(fn) def test_sensor(self): @@ -502,7 +509,7 @@ def test_sensor(self): from satpy.readers import find_files_and_readers fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' # touch the file so it exists on disk - open(fn, 'w') + test_file = open(fn, 'w') try: # we can't easily know how many readers satpy has that support # 'viirs' so we just pass it and hope that this works @@ -510,6 +517,7 @@ def test_sensor(self): self.assertListEqual(list(ri.keys()), ['viirs_sdr']) self.assertListEqual(ri['viirs_sdr'], [fn]) finally: + test_file.close() os.remove(fn) def test_sensor_no_files(self): @@ -517,8 +525,7 @@ def test_sensor_no_files(self): from satpy.readers import find_files_and_readers # we can't easily know how many readers satpy has that support # 'viirs' so we just pass it and hope that this works - self.assertRaises(ValueError, find_files_and_readers, - sensor='viirs') + self.assertRaises(ValueError, find_files_and_readers, sensor='viirs') def test_reader_load_failed(self): """Test that an exception is raised when a reader can't be loaded.""" @@ -621,34 +628,37 @@ def test_default_behavior(self): """Test the default behavior with the 'abi_l1b' reader.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b') - self.assertTrue(len(groups), 6) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(6, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) def test_non_datetime_group_key(self): """Test what happens when the start_time isn't used for grouping.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b', group_keys=('platform_shortname',)) - self.assertTrue(len(groups), 1) - self.assertTrue(len(groups[0]['abi_l1b']), 6) + self.assertEqual(1, len(groups)) + self.assertEqual(12, len(groups[0]['abi_l1b'])) def test_large_time_threshold(self): """Test what happens when the time threshold holds multiple files.""" from satpy.readers import group_files groups = group_files(self.g16_files, reader='abi_l1b', time_threshold=60*8) - self.assertTrue(len(groups), 3) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(3, len(groups)) + self.assertEqual(4, len(groups[0]['abi_l1b'])) def test_two_instruments_files(self): - """Test the default behavior when two instruments files are provided. + """Test the behavior when two instruments files are provided. This is undesired from a user point of view since we don't want G16 - and G17 files in the same Scene. + and G17 files in the same Scene. Readers (like abi_l1b) are or can be + configured to have specific group keys for handling these situations. + Due to that this test forces the fallback group keys of + ('start_time',). """ from satpy.readers import group_files - groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b') - self.assertTrue(len(groups), 6) - self.assertTrue(len(groups[0]['abi_l1b']), 4) + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', group_keys=('start_time',)) + self.assertEqual(6, len(groups)) + self.assertEqual(4, len(groups[0]['abi_l1b'])) def test_two_instruments_files_split(self): """Test the default behavior when two instruments files are provided and split. @@ -660,8 +670,12 @@ def test_two_instruments_files_split(self): from satpy.readers import group_files groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', group_keys=('start_time', 'platform_shortname')) - self.assertTrue(len(groups), 12) - self.assertTrue(len(groups[0]['abi_l1b']), 2) + self.assertEqual(12, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) + # default for abi_l1b should also behave like this + groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b') + self.assertEqual(12, len(groups)) + self.assertEqual(2, len(groups[0]['abi_l1b'])) def suite():