Skip to content

Commit

Permalink
Add 'group_keys' to ABI/AHI reader yaml files for group_files
Browse files Browse the repository at this point in the history
  • Loading branch information
djhoese committed Jan 25, 2019
1 parent b60273e commit 87d7ac1
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 31 deletions.
2 changes: 2 additions & 0 deletions satpy/etc/readers/abi_l1b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ reader:
sensors: [abi]
default_channels:
reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
# file pattern keys to sort files by with 'satpy.utils.group_files'
group_keys: ['start_time', 'platform_shortname', 'scene_abbr']

file_types:
# NOTE: observation_type == product acronym in PUG document
Expand Down
3 changes: 2 additions & 1 deletion satpy/etc/readers/ahi_hrit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ reader:
description: JMA HRIT Reader
name: ahi_hrit
sensors: [ahi]
default_channels: []
reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
# file pattern keys to sort files by with 'satpy.utils.group_files'
group_keys: ['start_time', 'area']

file_types:
hrit_b01:
Expand Down
3 changes: 2 additions & 1 deletion satpy/etc/readers/ahi_hsd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ reader:
name: ahi_hsd
reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader ''
sensors: [ahi]
default_datasets:
# file pattern keys to sort files by with 'satpy.utils.group_files'
group_keys: ['start_time', 'platform_shortname', 'area']

datasets:
B01:
Expand Down
15 changes: 12 additions & 3 deletions satpy/readers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def __delitem__(self, key):


def group_files(files_to_sort, reader=None, time_threshold=10,
group_keys=('start_time',), ppp_config_dir=None, reader_kwargs=None):
group_keys=None, ppp_config_dir=None, reader_kwargs=None):
"""Group series of files by file pattern information.
By default this will group files by their filename ``start_time``
Expand All @@ -434,7 +434,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10,
means it is recommended that datetime values should only come from
the first key in ``group_keys``. Otherwise, there is a good chance
that files will not be grouped properly (datetimes being barely
unequal). Defaults to ``('start_time',)``.
unequal). Defaults to a reader's ``group_keys`` configuration (set
in YAML), otherwise ``('start_time',)``.
ppp_config_dir (str): Root usser configuration directory for SatPy.
This will be deprecated in the future, but is here for consistency
with other SatPy features.
Expand Down Expand Up @@ -467,6 +468,8 @@ def group_files(files_to_sort, reader=None, time_threshold=10,
# raise
raise

if group_keys is None:
group_keys = reader_instance.info.get('group_keys', ('start_time',))
file_keys = []
for filetype, filetype_info in reader_instance.sorted_filetype_items():
for f, file_info in reader_instance.filename_items_for_filetype(files_to_sort, filetype_info):
Expand All @@ -480,14 +483,20 @@ def group_files(files_to_sort, reader=None, time_threshold=10,
# use first element of key as time identifier (if datetime type)
if prev_key is None:
is_new_group = True
prev_key = gk
elif isinstance(gk[0], datetime):
# datetimes within threshold difference are "the same time"
is_new_group = (gk[0] - prev_key[0]) > threshold
else:
is_new_group = gk[0] != prev_key[0]

# compare keys for those that are found for both the key and
# this is a generator and is not computed until the if statement below
# when we know that `prev_key` is not None
vals_not_equal = (this_val != prev_val for this_val, prev_val in zip(gk[1:], prev_key[1:])
if this_val is not None and prev_val is not None)
# if this is a new group based on the first element
if is_new_group or gk[1:] != prev_key[1:]:
if is_new_group or any(vals_not_equal):
file_groups[gk] = [f]
prev_key = gk
else:
Expand Down
66 changes: 40 additions & 26 deletions satpy/tests/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,25 +383,27 @@ def test_reader_name(self):
from satpy.readers import find_files_and_readers
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers(reader='viirs_sdr')
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_reader_other_name(self):
"""Test with default base_dir and reader specified."""
from satpy.readers import find_files_and_readers
fn = 'S_NWC_CPP_npp_32505_20180204T1114116Z_20180204T1128227Z.nc'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers(reader='nwcsaf-pps_nc')
self.assertListEqual(list(ri.keys()), ['nwcsaf-pps_nc'])
self.assertListEqual(ri['nwcsaf-pps_nc'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_reader_name_matched_start_end_time(self):
Expand All @@ -410,7 +412,7 @@ def test_reader_name_matched_start_end_time(self):
from datetime import datetime
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers(reader='viirs_sdr',
start_time=datetime(2012, 2, 25, 18, 0, 0),
Expand All @@ -419,6 +421,7 @@ def test_reader_name_matched_start_end_time(self):
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_reader_name_matched_start_time(self):
Expand All @@ -430,12 +433,13 @@ def test_reader_name_matched_start_time(self):
from datetime import datetime
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers(reader='viirs_sdr', start_time=datetime(2012, 2, 25, 18, 1, 30))
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_reader_name_matched_end_time(self):
Expand All @@ -448,12 +452,13 @@ def test_reader_name_matched_end_time(self):
from datetime import datetime
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers(reader='viirs_sdr', end_time=datetime(2012, 2, 25, 18, 1, 30))
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_reader_name_unmatched_start_end_time(self):
Expand All @@ -462,63 +467,65 @@ def test_reader_name_unmatched_start_end_time(self):
from datetime import datetime
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
self.assertRaises(ValueError, find_files_and_readers,
reader='viirs_sdr',
start_time=datetime(2012, 2, 26, 18, 0, 0),
end_time=datetime(2012, 2, 26, 19, 0, 0),
)
finally:
test_file.close()
os.remove(fn)

def test_no_parameters(self):
"""Test with no limiting parameters."""
from satpy.readers import find_files_and_readers
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
ri = find_files_and_readers()
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_bad_sensor(self):
"""Test bad sensor doesn't find any files"""
from satpy.readers import find_files_and_readers
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
self.assertRaises(ValueError, find_files_and_readers,
sensor='i_dont_exist')
self.assertRaises(ValueError, find_files_and_readers, sensor='i_dont_exist')
finally:
test_file.close()
os.remove(fn)

def test_sensor(self):
"""Test that readers for the current sensor are loaded"""
from satpy.readers import find_files_and_readers
fn = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
# touch the file so it exists on disk
open(fn, 'w')
test_file = open(fn, 'w')
try:
# we can't easily know how many readers satpy has that support
# 'viirs' so we just pass it and hope that this works
ri = find_files_and_readers(sensor='viirs')
self.assertListEqual(list(ri.keys()), ['viirs_sdr'])
self.assertListEqual(ri['viirs_sdr'], [fn])
finally:
test_file.close()
os.remove(fn)

def test_sensor_no_files(self):
"""Test that readers for the current sensor are loaded"""
from satpy.readers import find_files_and_readers
# we can't easily know how many readers satpy has that support
# 'viirs' so we just pass it and hope that this works
self.assertRaises(ValueError, find_files_and_readers,
sensor='viirs')
self.assertRaises(ValueError, find_files_and_readers, sensor='viirs')

def test_reader_load_failed(self):
"""Test that an exception is raised when a reader can't be loaded."""
Expand Down Expand Up @@ -621,34 +628,37 @@ def test_default_behavior(self):
"""Test the default behavior with the 'abi_l1b' reader."""
from satpy.readers import group_files
groups = group_files(self.g16_files, reader='abi_l1b')
self.assertTrue(len(groups), 6)
self.assertTrue(len(groups[0]['abi_l1b']), 2)
self.assertEqual(6, len(groups))
self.assertEqual(2, len(groups[0]['abi_l1b']))

def test_non_datetime_group_key(self):
"""Test what happens when the start_time isn't used for grouping."""
from satpy.readers import group_files
groups = group_files(self.g16_files, reader='abi_l1b', group_keys=('platform_shortname',))
self.assertTrue(len(groups), 1)
self.assertTrue(len(groups[0]['abi_l1b']), 6)
self.assertEqual(1, len(groups))
self.assertEqual(12, len(groups[0]['abi_l1b']))

def test_large_time_threshold(self):
"""Test what happens when the time threshold holds multiple files."""
from satpy.readers import group_files
groups = group_files(self.g16_files, reader='abi_l1b', time_threshold=60*8)
self.assertTrue(len(groups), 3)
self.assertTrue(len(groups[0]['abi_l1b']), 2)
self.assertEqual(3, len(groups))
self.assertEqual(4, len(groups[0]['abi_l1b']))

def test_two_instruments_files(self):
"""Test the default behavior when two instruments files are provided.
"""Test the behavior when two instruments files are provided.
This is undesired from a user point of view since we don't want G16
and G17 files in the same Scene.
and G17 files in the same Scene. Readers (like abi_l1b) are or can be
configured to have specific group keys for handling these situations.
Due to that this test forces the fallback group keys of
('start_time',).
"""
from satpy.readers import group_files
groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b')
self.assertTrue(len(groups), 6)
self.assertTrue(len(groups[0]['abi_l1b']), 4)
groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b', group_keys=('start_time',))
self.assertEqual(6, len(groups))
self.assertEqual(4, len(groups[0]['abi_l1b']))

def test_two_instruments_files_split(self):
"""Test the default behavior when two instruments files are provided and split.
Expand All @@ -660,8 +670,12 @@ def test_two_instruments_files_split(self):
from satpy.readers import group_files
groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b',
group_keys=('start_time', 'platform_shortname'))
self.assertTrue(len(groups), 12)
self.assertTrue(len(groups[0]['abi_l1b']), 2)
self.assertEqual(12, len(groups))
self.assertEqual(2, len(groups[0]['abi_l1b']))
# default for abi_l1b should also behave like this
groups = group_files(self.g16_files + self.g17_files, reader='abi_l1b')
self.assertEqual(12, len(groups))
self.assertEqual(2, len(groups[0]['abi_l1b']))


def suite():
Expand Down

0 comments on commit 87d7ac1

Please sign in to comment.