Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for s3 buckets in OLCI and ABI l1 readers #1439

Merged
merged 23 commits into from Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 25 additions & 0 deletions satpy/_compat.py
@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Backports and compatibility fixes for satpy."""

from functools import lru_cache


def cached_property(func):
"""Port back functools.cached_property."""
return property(lru_cache(maxsize=None)(func))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I was thinking putting the import of cached_property in here too. That way I could do from satpy._compat import cached_property with no try/except in my reader module. Thoughts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you got it.

73 changes: 72 additions & 1 deletion satpy/readers/__init__.py
Expand Up @@ -21,6 +21,7 @@
import os
import warnings
from datetime import datetime, timedelta
from functools import total_ordering

import yaml

Expand Down Expand Up @@ -519,7 +520,7 @@ def load_readers(filenames=None, reader=None, reader_kwargs=None,


def _get_reader_kwargs(reader, reader_kwargs):
"""Helper for load_readers to form reader_kwargs.
"""Help load_readers to form reader_kwargs.

Helper for load_readers to get reader_kwargs and
reader_kwargs_without_filter in the desirable form.
Expand All @@ -538,3 +539,73 @@ def _get_reader_kwargs(reader, reader_kwargs):
reader_kwargs_without_filter[k].pop('filter_parameters', None)

return (reader_kwargs, reader_kwargs_without_filter)


@total_ordering
class FSFile(os.PathLike):
"""Implementation of a PathLike file object, that can be opened.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused by this line. Is it a file object or a path object? If it's a file object it's already opened? I think os.PathLike represents a path, not an open file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a path indeed, but can be opened to return a file object.


This is made to be used in conjuction with fsspec or s3fs. For example::

from satpy import Scene

import fsspec
filename = 'noaa-goes16/ABI-L1b-RadC/2019/001/17/*_G16_s20190011702186*'

the_files = fsspec.open_files("simplecache::s3://" + filename, s3={'anon': True})

from satpy.readers import FSFile
fs_files = [FSFile(open_file) for open_file in the_files]

scn = Scene(filenames=fs_files, reader='abi_l1b')
scn.load(['true_color_raw'])

"""

def __init__(self, file, fs=None):
"""Initialise the FSFile instance.

*file* can be string or an fsspec.OpenFile instance. In the latter case, the follow argument *fs* has no effect.
*fs* can be None or a fsspec filesystem instance.
"""
try:
self._file = file.path
self._fs = file.fs
except AttributeError:
self._file = file
self._fs = fs

def __str__(self):
"""Return the string version of the filename."""
return self._file

def __fspath__(self):
"""Comply with PathLike."""
return self._file

def __repr__(self):
"""Representation of the object."""
return '<FSFile "' + str(self._file) + '">'

def open(self):
"""Open the file.

This is read-only.
"""
try:
return self._fs.open(self._file)
except AttributeError:
return open(self._file)

def __lt__(self, other):
"""Implement ordering."""
return os.fspath(self) < os.fspath(other)


def open_file_or_filename(unknown_file_thing):
"""Try to open the *unknown_file_thing*, otherwise return the filename."""
try:
f_obj = unknown_file_thing.open()
except AttributeError:
f_obj = unknown_file_thing
return f_obj
95 changes: 61 additions & 34 deletions satpy/readers/abi_base.py
Expand Up @@ -18,14 +18,22 @@
"""Advance Baseline Imager reader base class for the Level 1b and l2+ reader."""

import logging
from contextlib import suppress
from datetime import datetime

import numpy as np
import xarray as xr

from pyresample import geometry
from satpy.readers.file_handlers import BaseFileHandler

from satpy import CHUNK_SIZE
from satpy.readers import open_file_or_filename
from satpy.readers.file_handlers import BaseFileHandler

try:
from functools import cached_property
except ImportError:
# for python < 3.8
from satpy._compat import cached_property

logger = logging.getLogger(__name__)

Expand All @@ -41,33 +49,40 @@ class NC_ABI_BASE(BaseFileHandler):
def __init__(self, filename, filename_info, filetype_info):
"""Open the NetCDF file with xarray and prepare the Dataset for reading."""
super(NC_ABI_BASE, self).__init__(filename, filename_info, filetype_info)
# xarray's default netcdf4 engine
try:
self.nc = xr.open_dataset(self.filename,
decode_cf=True,
mask_and_scale=False,
chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, )
except ValueError:
self.nc = xr.open_dataset(self.filename,
decode_cf=True,
mask_and_scale=False,
chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, )

if 't' in self.nc.dims or 't' in self.nc.coords:
self.nc = self.nc.rename({'t': 'time'})
platform_shortname = filename_info['platform_shortname']
self.platform_name = PLATFORM_NAMES.get(platform_shortname)

if 'goes_imager_projection' in self.nc:
self.nlines = self.nc['y'].size
self.ncols = self.nc['x'].size
elif 'goes_lat_lon_projection' in self.nc:
self.nlines = self.nc['lat'].size
self.ncols = self.nc['lon'].size
self.nc = self.nc.rename({'lon': 'x', 'lat': 'y'})
self.nlines = self.nc['y'].size
self.ncols = self.nc['x'].size

self.coords = {}

@cached_property
def nc(self):
"""Get the xarray dataset for this file."""
f_obj = open_file_or_filename(self.filename)
try:
nc = xr.open_dataset(f_obj,
decode_cf=True,
mask_and_scale=False,
chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, )
except ValueError:
nc = xr.open_dataset(f_obj,
decode_cf=True,
mask_and_scale=False,
chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, )
nc = self._rename_dims(nc)
return nc

@staticmethod
def _rename_dims(nc):
if 't' in nc.dims or 't' in nc.coords:
nc = nc.rename({'t': 'time'})
if 'goes_lat_lon_projection' in nc:
nc = nc.rename({'lon': 'x', 'lat': 'y'})
return nc

@property
def sensor(self):
"""Get sensor name for current file handler."""
Expand All @@ -80,32 +95,47 @@ def __getitem__(self, item):
variables which causes inaccurate unscaled data values. This method
forces the scale factor to a 64-bit float first.
"""
def is_int(val):
return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int)

data = self.nc[item]
attrs = data.attrs

data = self._adjust_data(data, item)

data.attrs = attrs

data = self._adjust_coords(data, item)

return data

def _adjust_data(self, data, item):
"""Adjust data with typing, scaling and filling."""
factor = data.attrs.get('scale_factor', 1)
offset = data.attrs.get('add_offset', 0)
fill = data.attrs.get('_FillValue')
unsigned = data.attrs.get('_Unsigned', None)

def is_int(val):
return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int)

# Ref. GOESR PUG-L1B-vol3, section 5.0.2 Unsigned Integer Processing
if unsigned is not None and unsigned.lower() == 'true':
# cast the data from int to uint
data = data.astype('u%s' % data.dtype.itemsize)

if fill is not None:
fill = fill.astype('u%s' % fill.dtype.itemsize)

if fill is not None:
# Some backends (h5netcdf) may return attributes as shape (1,)
# arrays rather than shape () scalars, which according to the netcdf
# documentation at <URL:https://www.unidata.ucar.edu
# /software/netcdf/docs/netcdf_data_set_components.html#attributes>
# is correct.
if np.ndim(fill) > 0:
fill = fill.item()
if is_int(data) and is_int(factor) and is_int(offset):
new_fill = fill
else:
new_fill = np.nan
data = data.where(data != fill, new_fill)

if factor != 1 and item in ('x', 'y'):
# be more precise with x/y coordinates
# see get_area_def for more information
Expand All @@ -117,10 +147,10 @@ def is_int(val):
if not is_int(factor):
factor = float(factor)
data = data * factor + offset
return data

data.attrs = attrs

# handle coordinates (and recursive fun)
def _adjust_coords(self, data, item):
"""Handle coordinates (and recursive fun)."""
new_coords = {}
# 'time' dimension causes issues in other processing
# 'x_image' and 'y_image' are confusing to some users and unnecessary
Expand All @@ -135,7 +165,6 @@ def is_int(val):
self.coords[coord_name] = self[coord_name]
new_coords[coord_name] = self.coords[coord_name]
data.coords.update(new_coords)

return data

def get_dataset(self, key, info):
Expand Down Expand Up @@ -263,7 +292,5 @@ def spatial_resolution_to_number(self):

def __del__(self):
"""Close the NetCDF file that may still be open."""
try:
with suppress(IOError, OSError, AttributeError):
self.nc.close()
except (IOError, OSError, AttributeError):
pass
8 changes: 2 additions & 6 deletions satpy/readers/file_handlers.py
Expand Up @@ -20,9 +20,8 @@
from abc import ABCMeta

import numpy as np
from pathlib import PurePath

from pyresample.geometry import SwathDefinition

from satpy.dataset import combine_metadata


Expand All @@ -31,10 +30,7 @@ class BaseFileHandler(metaclass=ABCMeta):

def __init__(self, filename, filename_info, filetype_info):
"""Initialize file handler."""
if isinstance(filename, PurePath):
self.filename = str(filename)
else:
self.filename = filename
self.filename = filename
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change will break pathlib objects for all other readers right? (assuming the low-level I/O library doesn't support them)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, I'm ok with this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably yes... Is this bad?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @gerritholl was the first user to point out that Satpy didn't work with pathlib objects so he should maybe make the final decision. It probably isn't great that this breaks it for all other readers, but I'm not sure how many readers need strings for their lower-level I/O libraries either.

self.navigation_reader = None
self.filename_info = filename_info
self.filetype_info = filetype_info
Expand Down
46 changes: 28 additions & 18 deletions satpy/readers/olci_nc.py
Expand Up @@ -40,16 +40,23 @@


import logging
from datetime import datetime
from contextlib import suppress
from functools import reduce

import dask.array as da
import numpy as np
import xarray as xr

from satpy import CHUNK_SIZE
from satpy.readers import open_file_or_filename
from satpy.readers.file_handlers import BaseFileHandler
from satpy.utils import angle2xyz, xyz2angle
from satpy import CHUNK_SIZE
from functools import reduce

try:
from functools import cached_property
except ImportError:
# for python < 3.8
from satpy._compat import cached_property

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,30 +107,35 @@ def __init__(self, filename, filename_info, filetype_info,
"""Init the olci reader base."""
super(NCOLCIBase, self).__init__(filename, filename_info,
filetype_info)
self.nc = xr.open_dataset(self.filename,
self._engine = engine
self._start_time = filename_info['start_time']
self._end_time = filename_info['end_time']
# TODO: get metadata from the manifest file (xfdumanifest.xml)
self.platform_name = PLATFORM_NAMES[filename_info['mission_id']]
self.sensor = 'olci'
self.open_file = None

@cached_property
def nc(self):
"""Get the nc xr dataset."""
f_obj = open_file_or_filename(self.filename)
dataset = xr.open_dataset(f_obj,
decode_cf=True,
mask_and_scale=True,
engine=engine,
engine=self._engine,
chunks={'columns': CHUNK_SIZE,
'rows': CHUNK_SIZE})

self.nc = self.nc.rename({'columns': 'x', 'rows': 'y'})

# TODO: get metadata from the manifest file (xfdumanifest.xml)
self.platform_name = PLATFORM_NAMES[filename_info['mission_id']]
self.sensor = 'olci'
return dataset.rename({'columns': 'x', 'rows': 'y'})

@property
def start_time(self):
"""Start time property."""
return datetime.strptime(self.nc.attrs['start_time'],
'%Y-%m-%dT%H:%M:%S.%fZ')
return self._start_time

@property
def end_time(self):
"""End time property."""
return datetime.strptime(self.nc.attrs['stop_time'],
'%Y-%m-%dT%H:%M:%S.%fZ')
return self._end_time

def get_dataset(self, key, info):
"""Load a dataset."""
Expand All @@ -134,10 +146,8 @@ def get_dataset(self, key, info):

def __del__(self):
"""Close the NetCDF file that may still be open."""
try:
with suppress(IOError, OSError, AttributeError):
self.nc.close()
except (IOError, OSError, AttributeError):
pass


class NCOLCICal(NCOLCIBase):
Expand Down