pytroll · mraspaud · Dec 2, 2020 · Nov 16, 2020 · Nov 16, 2020 · Nov 18, 2020
diff --git a/satpy/_compat.py b/satpy/_compat.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020 Satpy developers
+#
+# This file is part of satpy.
+#
+# satpy is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# satpy.  If not, see <http://www.gnu.org/licenses/>.
+"""Backports and compatibility fixes for satpy."""
+
+from functools import lru_cache
+
+
+def cached_property(func):
+    """Port back functools.cached_property."""
+    return property(lru_cache(maxsize=None)(func))
diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py
@@ -21,6 +21,7 @@
 import os
 import warnings
 from datetime import datetime, timedelta
+from functools import total_ordering
 
 import yaml
 
@@ -519,7 +520,7 @@ def load_readers(filenames=None, reader=None, reader_kwargs=None,
 
 
 def _get_reader_kwargs(reader, reader_kwargs):
-    """Helper for load_readers to form reader_kwargs.
+    """Help load_readers to form reader_kwargs.
 
     Helper for load_readers to get reader_kwargs and
     reader_kwargs_without_filter in the desirable form.
@@ -538,3 +539,73 @@ def _get_reader_kwargs(reader, reader_kwargs):
         reader_kwargs_without_filter[k].pop('filter_parameters', None)
 
     return (reader_kwargs, reader_kwargs_without_filter)
+
+
+@total_ordering
+class FSFile(os.PathLike):
+    """Implementation of a PathLike file object, that can be opened.
+
+    This is made to be used in conjuction with fsspec or s3fs. For example::
+
+        from satpy import Scene
+
+        import fsspec
+        filename = 'noaa-goes16/ABI-L1b-RadC/2019/001/17/*_G16_s20190011702186*'
+
+        the_files = fsspec.open_files("simplecache::s3://" + filename, s3={'anon': True})
+
+        from satpy.readers import FSFile
+        fs_files = [FSFile(open_file) for open_file in the_files]
+
+        scn = Scene(filenames=fs_files, reader='abi_l1b')
+        scn.load(['true_color_raw'])
+
+    """
+
+    def __init__(self, file, fs=None):
+        """Initialise the FSFile instance.
+
+        *file* can be string or an fsspec.OpenFile instance. In the latter case, the follow argument *fs* has no effect.
+        *fs* can be None or a fsspec filesystem instance.
+        """
+        try:
+            self._file = file.path
+            self._fs = file.fs
+        except AttributeError:
+            self._file = file
+            self._fs = fs
+
+    def __str__(self):
+        """Return the string version of the filename."""
+        return self._file
+
+    def __fspath__(self):
+        """Comply with PathLike."""
+        return self._file
+
+    def __repr__(self):
+        """Representation of the object."""
+        return '<FSFile "' + str(self._file) + '">'
+
+    def open(self):
+        """Open the file.
+
+        This is read-only.
+        """
+        try:
+            return self._fs.open(self._file)
+        except AttributeError:
+            return open(self._file)
+
+    def __lt__(self, other):
+        """Implement ordering."""
+        return os.fspath(self) < os.fspath(other)
+
+
+def open_file_or_filename(unknown_file_thing):
+    """Try to open the *unknown_file_thing*, otherwise return the filename."""
+    try:
+        f_obj = unknown_file_thing.open()
+    except AttributeError:
+        f_obj = unknown_file_thing
+    return f_obj
diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py
@@ -18,14 +18,22 @@
 """Advance Baseline Imager reader base class for the Level 1b and l2+ reader."""
 
 import logging
+from contextlib import suppress
 from datetime import datetime
 
 import numpy as np
 import xarray as xr
-
 from pyresample import geometry
-from satpy.readers.file_handlers import BaseFileHandler
+
 from satpy import CHUNK_SIZE
+from satpy.readers import open_file_or_filename
+from satpy.readers.file_handlers import BaseFileHandler
+
+try:
+    from functools import cached_property
+except ImportError:
+    # for python < 3.8
+    from satpy._compat import cached_property
 
 logger = logging.getLogger(__name__)
 
@@ -41,33 +49,40 @@ class NC_ABI_BASE(BaseFileHandler):
     def __init__(self, filename, filename_info, filetype_info):
         """Open the NetCDF file with xarray and prepare the Dataset for reading."""
         super(NC_ABI_BASE, self).__init__(filename, filename_info, filetype_info)
-        # xarray's default netcdf4 engine
-        try:
-            self.nc = xr.open_dataset(self.filename,
-                                      decode_cf=True,
-                                      mask_and_scale=False,
-                                      chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, )
-        except ValueError:
-            self.nc = xr.open_dataset(self.filename,
-                                      decode_cf=True,
-                                      mask_and_scale=False,
-                                      chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, )
 
-        if 't' in self.nc.dims or 't' in self.nc.coords:
-            self.nc = self.nc.rename({'t': 'time'})
         platform_shortname = filename_info['platform_shortname']
         self.platform_name = PLATFORM_NAMES.get(platform_shortname)
 
-        if 'goes_imager_projection' in self.nc:
-            self.nlines = self.nc['y'].size
-            self.ncols = self.nc['x'].size
-        elif 'goes_lat_lon_projection' in self.nc:
-            self.nlines = self.nc['lat'].size
-            self.ncols = self.nc['lon'].size
-            self.nc = self.nc.rename({'lon': 'x', 'lat': 'y'})
+        self.nlines = self.nc['y'].size
+        self.ncols = self.nc['x'].size
 
         self.coords = {}
 
+    @cached_property
+    def nc(self):
+        """Get the xarray dataset for this file."""
+        f_obj = open_file_or_filename(self.filename)
+        try:
+            nc = xr.open_dataset(f_obj,
+                                 decode_cf=True,
+                                 mask_and_scale=False,
+                                 chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, )
+        except ValueError:
+            nc = xr.open_dataset(f_obj,
+                                 decode_cf=True,
+                                 mask_and_scale=False,
+                                 chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, )
+        nc = self._rename_dims(nc)
+        return nc
+
+    @staticmethod
+    def _rename_dims(nc):
+        if 't' in nc.dims or 't' in nc.coords:
+            nc = nc.rename({'t': 'time'})
+        if 'goes_lat_lon_projection' in nc:
+            nc = nc.rename({'lon': 'x', 'lat': 'y'})
+        return nc
+
     @property
     def sensor(self):
         """Get sensor name for current file handler."""
@@ -80,32 +95,47 @@ def __getitem__(self, item):
         variables which causes inaccurate unscaled data values. This method
         forces the scale factor to a 64-bit float first.
         """
-        def is_int(val):
-            return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int)
-
         data = self.nc[item]
         attrs = data.attrs
 
+        data = self._adjust_data(data, item)
+
+        data.attrs = attrs
+
+        data = self._adjust_coords(data, item)
+
+        return data
+
+    def _adjust_data(self, data, item):
+        """Adjust data with typing, scaling and filling."""
         factor = data.attrs.get('scale_factor', 1)
         offset = data.attrs.get('add_offset', 0)
         fill = data.attrs.get('_FillValue')
         unsigned = data.attrs.get('_Unsigned', None)
 
+        def is_int(val):
+            return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int)
+
         # Ref. GOESR PUG-L1B-vol3, section 5.0.2 Unsigned Integer Processing
         if unsigned is not None and unsigned.lower() == 'true':
             # cast the data from int to uint
             data = data.astype('u%s' % data.dtype.itemsize)
 
             if fill is not None:
                 fill = fill.astype('u%s' % fill.dtype.itemsize)
-
         if fill is not None:
+            # Some backends (h5netcdf) may return attributes as shape (1,)
+            # arrays rather than shape () scalars, which according to the netcdf
+            # documentation at <URL:https://www.unidata.ucar.edu
+            # /software/netcdf/docs/netcdf_data_set_components.html#attributes>
+            # is correct.
+            if np.ndim(fill) > 0:
+                fill = fill.item()
             if is_int(data) and is_int(factor) and is_int(offset):
                 new_fill = fill
             else:
                 new_fill = np.nan
             data = data.where(data != fill, new_fill)
-
         if factor != 1 and item in ('x', 'y'):
             # be more precise with x/y coordinates
             # see get_area_def for more information
@@ -117,10 +147,10 @@ def is_int(val):
             if not is_int(factor):
                 factor = float(factor)
             data = data * factor + offset
+        return data
 
-        data.attrs = attrs
-
-        # handle coordinates (and recursive fun)
+    def _adjust_coords(self, data, item):
+        """Handle coordinates (and recursive fun)."""
         new_coords = {}
         # 'time' dimension causes issues in other processing
         # 'x_image' and 'y_image' are confusing to some users and unnecessary
@@ -135,7 +165,6 @@ def is_int(val):
                 self.coords[coord_name] = self[coord_name]
             new_coords[coord_name] = self.coords[coord_name]
         data.coords.update(new_coords)
-
         return data
 
     def get_dataset(self, key, info):
@@ -263,7 +292,5 @@ def spatial_resolution_to_number(self):
 
     def __del__(self):
         """Close the NetCDF file that may still be open."""
-        try:
+        with suppress(IOError, OSError, AttributeError):
             self.nc.close()
-        except (IOError, OSError, AttributeError):
-            pass
diff --git a/satpy/readers/file_handlers.py b/satpy/readers/file_handlers.py
@@ -20,9 +20,8 @@
 from abc import ABCMeta
 
 import numpy as np
-from pathlib import PurePath
-
 from pyresample.geometry import SwathDefinition
+
 from satpy.dataset import combine_metadata
 
 
@@ -31,10 +30,7 @@ class BaseFileHandler(metaclass=ABCMeta):
 
     def __init__(self, filename, filename_info, filetype_info):
         """Initialize file handler."""
-        if isinstance(filename, PurePath):
-            self.filename = str(filename)
-        else:
-            self.filename = filename
+        self.filename = filename
         self.navigation_reader = None
         self.filename_info = filename_info
         self.filetype_info = filetype_info

diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py
@@ -40,16 +40,23 @@
 
 
 import logging
-from datetime import datetime
+from contextlib import suppress
+from functools import reduce
 
 import dask.array as da
 import numpy as np
 import xarray as xr
 
+from satpy import CHUNK_SIZE
+from satpy.readers import open_file_or_filename
 from satpy.readers.file_handlers import BaseFileHandler
 from satpy.utils import angle2xyz, xyz2angle
-from satpy import CHUNK_SIZE
-from functools import reduce
+
+try:
+    from functools import cached_property
+except ImportError:
+    # for python < 3.8
+    from satpy._compat import cached_property
 
 logger = logging.getLogger(__name__)
 
@@ -100,30 +107,35 @@ def __init__(self, filename, filename_info, filetype_info,
         """Init the olci reader base."""
         super(NCOLCIBase, self).__init__(filename, filename_info,
                                          filetype_info)
-        self.nc = xr.open_dataset(self.filename,
+        self._engine = engine
+        self._start_time = filename_info['start_time']
+        self._end_time = filename_info['end_time']
+        # TODO: get metadata from the manifest file (xfdumanifest.xml)
+        self.platform_name = PLATFORM_NAMES[filename_info['mission_id']]
+        self.sensor = 'olci'
+        self.open_file = None
+
+    @cached_property
+    def nc(self):
+        """Get the nc xr dataset."""
+        f_obj = open_file_or_filename(self.filename)
+        dataset = xr.open_dataset(f_obj,
                                   decode_cf=True,
                                   mask_and_scale=True,
-                                  engine=engine,
+                                  engine=self._engine,
                                   chunks={'columns': CHUNK_SIZE,
                                           'rows': CHUNK_SIZE})
-
-        self.nc = self.nc.rename({'columns': 'x', 'rows': 'y'})
-
-        # TODO: get metadata from the manifest file (xfdumanifest.xml)
-        self.platform_name = PLATFORM_NAMES[filename_info['mission_id']]
-        self.sensor = 'olci'
+        return dataset.rename({'columns': 'x', 'rows': 'y'})
 
     @property
     def start_time(self):
         """Start time property."""
-        return datetime.strptime(self.nc.attrs['start_time'],
-                                 '%Y-%m-%dT%H:%M:%S.%fZ')
+        return self._start_time
 
     @property
     def end_time(self):
         """End time property."""
-        return datetime.strptime(self.nc.attrs['stop_time'],
-                                 '%Y-%m-%dT%H:%M:%S.%fZ')
+        return self._end_time
 
     def get_dataset(self, key, info):
         """Load a dataset."""
@@ -134,10 +146,8 @@ def get_dataset(self, key, info):
 
     def __del__(self):
         """Close the NetCDF file that may still be open."""
-        try:
+        with suppress(IOError, OSError, AttributeError):
             self.nc.close()
-        except (IOError, OSError, AttributeError):
-            pass
 
 
 class NCOLCICal(NCOLCIBase):