Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fsspec support to li_l2_nc reader #2753

Merged
merged 6 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion satpy/etc/readers/generic_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ reader:
long_name: Generic Images e.g. GeoTIFF
description: generic image reader
status: Nominal
supports_fsspec: false
supports_fsspec: true
reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
sensors: [images]
default_channels: [image]
Expand Down
4 changes: 2 additions & 2 deletions satpy/readers/li_base_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@
import xarray as xr
from pyproj import Proj

from satpy.readers.netcdf_utils import NetCDF4FileHandler
from satpy.readers.netcdf_utils import NetCDF4FsspecFileHandler

logger = logging.getLogger(__name__)


class LINCFileHandler(NetCDF4FileHandler):
class LINCFileHandler(NetCDF4FsspecFileHandler):
"""Base class used as parent for the concrete LI reader classes."""

def __init__(self, filename, filename_info, filetype_info, cache_handle=True):
Expand Down
26 changes: 19 additions & 7 deletions satpy/readers/netcdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,7 @@ def collect_cache_vars(self, cache_var_size):
cache_vars = self._collect_cache_var_names(cache_var_size)
for var_name in cache_vars:
v = self.file_content[var_name]
try:
arr = xr.DataArray(
v[:], dims=v.dimensions, attrs=v.__dict__, name=v.name)
except ValueError:
# Handle scalars for h5netcdf backend
arr = xr.DataArray(
v.__array__(), dims=v.dimensions, attrs=v.__dict__, name=v.name)
arr = get_data_as_xarray(v)
self.cached_file_content[var_name] = arr

def _collect_cache_var_names(self, cache_var_size):
Expand Down Expand Up @@ -380,6 +374,24 @@ def _compose_replacement_names(variable_name_replacements, var, variable_names):
variable_names.append(var.format(**{key: val}))


def get_data_as_xarray(variable):
"""Get data in variable as xr.DataArray."""
try:
attrs = variable.attrs
except AttributeError:
# netCDF4 backend requires usage of __dict__ to get the attributes
attrs = variable.__dict__
try:
data = variable[:]
except (ValueError, IndexError):
# Handle scalars for h5netcdf backend
data = variable.__array__()

arr = xr.DataArray(data, dims=variable.dimensions, attrs=attrs, name=variable.name)

return arr


class NetCDF4FsspecFileHandler(NetCDF4FileHandler):
"""NetCDF4 file handler using fsspec to read files remotely."""

Expand Down
99 changes: 99 additions & 0 deletions satpy/tests/reader_tests/test_netcdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,102 @@
fh = NetCDF4FsspecFileHandler(fname, {}, {})
h5_file.assert_called_once()
assert fh._use_h5netcdf


NC_ATTRS = {
"standard_name": "test_data",
"scale_factor": 0.01,
"add_offset": 0}

def test_get_data_as_xarray_netcdf4(tmp_path):
"""Test getting xr.DataArray from netcdf4 variable."""
import numpy as np

from satpy.readers.netcdf_utils import get_data_as_xarray

data = np.array([1, 2, 3])
fname = tmp_path / "test.nc"
dset = _write_test_netcdf4(fname, data)

res = get_data_as_xarray(dset["test_data"])
np.testing.assert_equal(res.data, data)
assert res.attrs == NC_ATTRS


def test_get_data_as_xarray_scalar_netcdf4(tmp_path):
"""Test getting scalar xr.DataArray from netcdf4 variable."""
import numpy as np

from satpy.readers.netcdf_utils import get_data_as_xarray

data = 1
fname = tmp_path / "test.nc"
dset = _write_test_netcdf4(fname, data)

res = get_data_as_xarray(dset["test_data"])
np.testing.assert_equal(res.data, np.array(data))
assert res.attrs == NC_ATTRS


def _write_test_netcdf4(fname, data):
import netCDF4 as nc

dset = nc.Dataset(fname, "w")
try:
dset.createDimension("y", data.size)
dims = ("y",)
except AttributeError:
dims = ()
var = dset.createVariable("test_data", "uint8", dims)
var[:] = data
var.setncatts(NC_ATTRS)
# Turn off automatic scale factor and offset handling
dset.set_auto_maskandscale(False)

return dset


def test_get_data_as_xarray_h5netcdf(tmp_path):
"""Test getting xr.DataArray from h5netcdf variable."""
import numpy as np

from satpy.readers.netcdf_utils import get_data_as_xarray

data = np.array([1, 2, 3])
fname = tmp_path / "test.nc"
fid = _write_test_h5netcdf(fname, data)

res = get_data_as_xarray(fid["test_data"])
np.testing.assert_equal(res.data, data)
assert res.attrs == NC_ATTRS


def _write_test_h5netcdf(fname, data):
import h5netcdf

fid = h5netcdf.File(fname, "w")
try:
fid.dimensions = {"y": data.size}
dims = ("y",)
except AttributeError:
dims = ()
var = fid.create_variable("test_data", dims, "uint8", data=data)
for key in NC_ATTRS:
var.attrs[key] = NC_ATTRS[key]

return fid


def test_get_data_as_xarray_scalar_h5netcdf(tmp_path):
"""Test getting xr.DataArray from h5netcdf variable."""
import numpy as np

from satpy.readers.netcdf_utils import get_data_as_xarray

data = 1
fname = tmp_path / "test.nc"
fid = _write_test_h5netcdf(fname, data)

res = get_data_as_xarray(fid["test_data"])
np.testing.assert_equal(res.data, np.array(data))
assert res.attrs == NC_ATTRS

Check warning on line 394 in satpy/tests/reader_tests/test_netcdf_utils.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ New issue: Code Duplication

The module contains 4 functions with similar structure: test_get_data_as_xarray_h5netcdf,test_get_data_as_xarray_netcdf4,test_get_data_as_xarray_scalar_h5netcdf,test_get_data_as_xarray_scalar_netcdf4. Avoid duplicated, aka copy-pasted, code inside the module. More duplication lowers the code health.