Skip to content

Commit

Permalink
Add GOES-16 ABI Hurricane Florence demo data case
Browse files Browse the repository at this point in the history
Includes a lot of changes to make fancier glob patterns possible on GCP
  • Loading branch information
djhoese committed Apr 9, 2019
1 parent 834329a commit 41867c4
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 15 deletions.
66 changes: 62 additions & 4 deletions satpy/demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
LOG = logging.getLogger(__name__)


def makedirs(directory, exist_ok=False):
def _makedirs(directory, exist_ok=False):
"""Python 2.7 friendly os.makedirs.
After python 2.7 is dropped, just use `os.makedirs` with `existsok=True`.
Expand All @@ -69,7 +69,7 @@ def makedirs(directory, exist_ok=False):


def get_us_midlatitude_cyclone_abi(base_dir='.', method=None, force=False):
"""Get GOES-16 ABI (CONUS sector) data from March 14th 00:00Z.
"""Get GOES-16 ABI (CONUS sector) data from 2019-03-14 00:00Z.
Args:
base_dir (str): Base directory for downloaded files.
Expand All @@ -80,6 +80,8 @@ def get_us_midlatitude_cyclone_abi(base_dir='.', method=None, force=False):
the local system. Warning: May delete non-demo files stored in
download directory.
Total size: ~110MB
"""
if method is None:
method = 'gcsfs'
Expand All @@ -90,7 +92,63 @@ def get_us_midlatitude_cyclone_abi(base_dir='.', method=None, force=False):
from ._google_cloud_platform import get_bucket_files
patterns = ['gs://gcp-public-data-goes-16/ABI-L1b-RadC/2019/073/00/*0002*.nc']
subdir = os.path.join(base_dir, 'abi_l1b', '20190314_us_midlatitude_cyclone')
makedirs(subdir, exist_ok=True)
_makedirs(subdir, exist_ok=True)
filenames = get_bucket_files(patterns, subdir, force=force)
assert len(filenames) == 16, "Not all ABI files could be downloaded"
assert len(filenames) == 16, "Not all files could be downloaded"
return filenames


def get_hurricane_florence_abi(base_dir='.', method=None, force=False,
channels=range(1, 17), num_frames=10):
"""Get GOES-16 ABI (Meso sector) data from 2018-09-11 13:00Z to 17:00Z.
Args:
base_dir (str): Base directory for downloaded files.
method (str): Force download method for the data if not already cached.
Allowed options are: 'gcsfs'. Default of ``None`` will
choose the best method based on environment settings.
force (bool): Force re-download of data regardless of its existence on
the local system. Warning: May delete non-demo files stored in
download directory.
channels (list): Channels to include in download. Defaults to all
16 channels.
num_frames (int or slice): Number of frames to download. Maximum
240 frames. Default 10 frames.
Size per frame (all channels): ~15MB
Total size (default 10 frames, all channels): ~124MB
Total size (240 frames, all channels): ~3.5GB
"""
if method is None:
method = 'gcsfs'
if method not in ['gcsfs']:
raise NotImplementedError("Demo data download method '{}' not "
"implemented yet.".format(method))
if isinstance(num_frames, (int, float)):
frame_slice = slice(0, num_frames)
else:
frame_slice = num_frames

from ._google_cloud_platform import get_bucket_files

patterns = []
for channel in channels:
# patterns += ['gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/1[3456]/'
# '*C{:02d}*s20182541[3456]*.nc'.format(channel)]
patterns += [(
'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/13/*C{:02d}*s201825413*.nc'.format(channel),
'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/14/*C{:02d}*s201825414*.nc'.format(channel),
'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/15/*C{:02d}*s201825415*.nc'.format(channel),
'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/16/*C{:02d}*s201825416*.nc'.format(channel),
)]
subdir = os.path.join(base_dir, 'abi_l1b', '20180911_hurricane_florence_abi_l1b')
_makedirs(subdir, exist_ok=True)
filenames = get_bucket_files(patterns, subdir, force=force, pattern_slice=frame_slice)

actual_slice = frame_slice.indices(240) # 240 max frames
num_frames = int((actual_slice[1] - actual_slice[0]) / actual_slice[2])
assert len(filenames) == len(channels) * num_frames, "Not all files could be downloaded"
return filenames
23 changes: 19 additions & 4 deletions satpy/demo/_google_cloud_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,24 @@ def is_google_cloud_instance():
return False


def get_bucket_files(glob_pattern, base_dir, force=False):
def get_bucket_files(glob_pattern, base_dir, force=False, pattern_slice=slice(None)):
"""Helper function to download files from Google Cloud Storage.
Args:
glob_pattern (str or list): Glob pattern string or series of patterns
used to search for on Google Cloud Storage. The pattern should
include the "gs://" protocol prefix.
include the "gs://" protocol prefix. If a list of lists, then the
results of each sublist pattern are concatenated and the result is
treated as one pattern result. This is important for things like
``pattern_slice`` and complicated glob patterns not supported by
GCP.
base_dir (str): Root directory to place downloaded files on the local
system.
force (bool): Force re-download of data regardless of its existence on
the local system. Warning: May delete non-demo files stored in
download directory.
pattern_slice (slice): Slice object to limit the number of files
returned by each glob pattern.
"""
if gcsfs is None:
Expand All @@ -67,16 +73,25 @@ def get_bucket_files(glob_pattern, base_dir, force=False):
fs = gcsfs.GCSFileSystem(token='anon')
filenames = []
for gp in glob_pattern:
for fn in fs.glob(gp):
# handle multiple glob patterns being treated as one pattern
# for complicated patterns that GCP can't handle
if isinstance(gp, str):
glob_results = list(fs.glob(gp))
else:
# flat list of results
glob_results = [fn for pat in gp for fn in fs.glob(pat)]

for fn in glob_results[pattern_slice]:
ondisk_fn = os.path.basename(fn)
ondisk_pathname = os.path.join(base_dir, ondisk_fn)
filenames.append(ondisk_pathname)
LOG.info("Downloading: {}".format(ondisk_pathname))

if force and os.path.isfile(ondisk_pathname):
os.remove(ondisk_pathname)
elif os.path.isfile(ondisk_pathname):
LOG.info("Found existing: {}".format(ondisk_pathname))
continue
LOG.info("Downloading: {}".format(ondisk_pathname))
fs.get('gs://' + fn, ondisk_pathname)

if not filenames:
Expand Down
74 changes: 68 additions & 6 deletions satpy/tests/test_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,47 @@
import mock


class _GlobHelper(object):
"""Create side effect function for mocking gcsfs glob method."""

def __init__(self, num_results):
"""Initialize side_effect function for mocking gcsfs glob method.
Args:
num_results (int or list): Number of results for each glob call
to return. If a list then number of results per call. The
last number is used for any additional calls.
"""
self.current_call = 0
if not isinstance(num_results, (list, tuple)):
num_results = [num_results]
self.num_results = num_results

def __call__(self, pattern):
"""The side effect function to be called as glob."""
try:
num_results = self.num_results[self.current_call]
except IndexError:
num_results = self.num_results[-1]
self.current_call += 1
return [pattern + '.{:03d}'.format(idx) for idx in range(num_results)]


class TestDemo(unittest.TestCase):
"""Test demo data download functions."""

@mock.patch('satpy.demo.google_cloud_platform.gcsfs')
@mock.patch('satpy.demo._google_cloud_platform.gcsfs')
def test_get_us_midlatitude_cyclone_abi(self, gcsfs_mod):
"""Test data download function."""
from satpy.demo import get_us_midlatitude_cyclone_abi
gcsfs_mod.GCSFileSystem = mock.MagicMock()
gcsfs_inst = mock.MagicMock()
gcsfs_mod.GCSFileSystem.return_value = gcsfs_inst
gcsfs_inst.glob.return_value = ['a.nc', 'b.nc']
# expected 16 files, got 2
self.assertRaises(AssertionError, get_us_midlatitude_cyclone_abi)
# unknown access method
self.assertRaises(NotImplementedError, get_us_midlatitude_cyclone_abi, method='unknown')

gcsfs_inst.glob.return_value = ['a.nc'] * 16
Expand All @@ -50,18 +79,47 @@ def test_get_us_midlatitude_cyclone_abi(self, gcsfs_mod):
for fn in filenames:
self.assertEqual(expected, fn)

@mock.patch('satpy.demo._google_cloud_platform.gcsfs')
def test_get_hurricane_florence_abi(self, gcsfs_mod):
"""Test data download function."""
from satpy.demo import get_hurricane_florence_abi
gcsfs_mod.GCSFileSystem = mock.MagicMock()
gcsfs_inst = mock.MagicMock()
gcsfs_mod.GCSFileSystem.return_value = gcsfs_inst
# only return 5 results total
gcsfs_inst.glob.side_effect = _GlobHelper([5, 0])
# expected 16 files * 10 frames, got 16 * 5
self.assertRaises(AssertionError, get_hurricane_florence_abi)
self.assertRaises(NotImplementedError, get_hurricane_florence_abi, method='unknown')

gcsfs_inst.glob.side_effect = _GlobHelper([int(240 / 16), 0, 0, 0] * 16)
filenames = get_hurricane_florence_abi()
self.assertEqual(10 * 16, len(filenames))

gcsfs_inst.glob.side_effect = _GlobHelper([int(240 / 16), 0, 0, 0] * 16)
filenames = get_hurricane_florence_abi(channels=[2, 3, 4])
self.assertEqual(10 * 3, len(filenames))

gcsfs_inst.glob.side_effect = _GlobHelper([int(240 / 16), 0, 0, 0] * 16)
filenames = get_hurricane_florence_abi(channels=[2, 3, 4], num_frames=5)
self.assertEqual(5 * 3, len(filenames))

gcsfs_inst.glob.side_effect = _GlobHelper([int(240 / 16), 0, 0, 0] * 16)
filenames = get_hurricane_florence_abi(num_frames=5)
self.assertEqual(5 * 16, len(filenames))


class TestGCPUtils(unittest.TestCase):
"""Test Google Cloud Platform utilities."""

@mock.patch('satpy.demo.google_cloud_platform.urlopen')
@mock.patch('satpy.demo._google_cloud_platform.urlopen')
def test_is_gcp_instance(self, uo):
"""Test is_google_cloud_instance."""
from satpy.demo._google_cloud_platform import is_google_cloud_instance, URLError
uo.side_effect = URLError("Test Environment")
self.assertFalse(is_google_cloud_instance())

@mock.patch('satpy.demo.google_cloud_platform.gcsfs')
@mock.patch('satpy.demo._google_cloud_platform.gcsfs')
def test_get_bucket_files(self, gcsfs_mod):
"""Test get_bucket_files basic cases."""
from satpy.demo._google_cloud_platform import get_bucket_files
Expand All @@ -73,11 +131,15 @@ def test_get_bucket_files(self, gcsfs_mod):
expected = [os.path.join('.', 'a.nc'), os.path.join('.', 'b.nc')]
self.assertEqual(expected, filenames)

gcsfs_inst.glob.side_effect = _GlobHelper(10)
filenames = get_bucket_files(['*.nc', '*.txt'], '.', pattern_slice=slice(2, 5))
self.assertEqual(len(filenames), 3 * 2)
gcsfs_inst.glob.side_effect = None # reset mock side effect

gcsfs_inst.glob.return_value = ['a.nc', 'b.nc']
self.assertRaises(OSError, get_bucket_files, '*.nc', 'does_not_exist')

# touch the file
open('a.nc', 'w').close()
open('a.nc', 'w').close() # touch the file
gcsfs_inst.get.reset_mock()
gcsfs_inst.glob.return_value = ['a.nc']
filenames = get_bucket_files('*.nc', '.')
Expand All @@ -96,7 +158,7 @@ def test_get_bucket_files(self, gcsfs_mod):
gcsfs_inst.glob.return_value = []
self.assertRaises(OSError, get_bucket_files, '*.nc', '.')

@mock.patch('satpy.demo.google_cloud_platform.gcsfs', None)
@mock.patch('satpy.demo._google_cloud_platform.gcsfs', None)
def test_no_gcsfs(self):
"""Test that 'gcsfs' is required."""
from satpy.demo._google_cloud_platform import get_bucket_files
Expand Down
2 changes: 1 addition & 1 deletion satpy/tests/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def mkdir_p(path):

# Use for python 2.7 compatibility
# When python 2.7 support is dropped just use
# `os.makedirs(path, exist_ok=True)`
# `os._makedirs(path, exist_ok=True)`
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
Expand Down

0 comments on commit 41867c4

Please sign in to comment.