diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 43af8d14f9d..ccde3064e4e 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -591,8 +591,8 @@ The library ``h5netcdf`` allows writing some dtypes that aren't allowed in netCDF4 (see `h5netcdf documentation `_). This feature is available through :py:meth:`DataArray.to_netcdf` and -:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"`` -and currently raises a warning unless ``invalid_netcdf=True`` is set. +:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``, only if +``invalid_netcdf=True`` is explicitly set. .. warning:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d6a986ceb2..76436377a6e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,9 +26,12 @@ Breaking changes dataset in-place. (:issue:`10167`) By `Maximilian Roos `_. -- The default ``engine`` when reading/writing netCDF files in-memory is now - netCDF4, consistent with Xarray's default ``engine`` when read/writing netCDF - files to disk (:pull:`10624`). +- The default ``engine`` when reading/writing netCDF files is now h5netcdf + or scipy, which are typically faster than the prior default of netCDF4-python. + You can control this default behavior explicitly via the new + ``netcdf_engine_order`` parameter in :py:func:`~xarray.set_options`, e.g., + ``xr.set_options(netcdf_engine_order=['netcdf4', 'scipy', 'h5netcdf'])`` to + restore the prior defaults (:issue:`10657`). By `Stephan Hoyer `_. Deprecations diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 37b5d819ae5..3ef75dcd2b2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -415,9 +415,10 @@ def open_dataset( , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``) - can also be used. + is chosen based on available dependencies, by default preferring + "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend + class (a subclass of ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. @@ -658,8 +659,10 @@ def open_dataarray( , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - "netcdf4". + is chosen based on available dependencies, by default preferring + "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend + class (a subclass of ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. @@ -882,9 +885,10 @@ def open_datatree( engine : {"netcdf4", "h5netcdf", "zarr", None}, \ installed backend or xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``) - can also be used. + is chosen based on available dependencies, by default preferring + "h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in + ``xarray.set_options()``). A custom backend class (a subclass of + ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. @@ -1040,7 +1044,7 @@ def open_datatree( kwargs.update(backend_kwargs) if engine is None: - engine = plugins.guess_engine(filename_or_obj) + engine = plugins.guess_engine(filename_or_obj, must_support_groups=True) if from_array_kwargs is None: from_array_kwargs = {} @@ -1126,8 +1130,10 @@ def open_groups( engine : {"netcdf4", "h5netcdf", "zarr", None}, \ installed backend or xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``) + is chosen based on available dependencies, by default preferring + "h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in + ``xarray.set_options()``). A custom backend class (a subclass of + ``BackendEntrypoint``) can also be used. can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. @@ -1283,7 +1289,7 @@ def open_groups( kwargs.update(backend_kwargs) if engine is None: - engine = plugins.guess_engine(filename_or_obj) + engine = plugins.guess_engine(filename_or_obj, must_support_groups=True) if from_array_kwargs is None: from_array_kwargs = {} @@ -1443,8 +1449,10 @@ def open_mfdataset( , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - "netcdf4". + is chosen based on available dependencies, by default preferring + "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend + class (a subclass of ``BackendEntrypoint``) can also be used. data_vars : {"minimal", "different", "all"} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 7f6921ae2a1..92694c16a52 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -53,14 +53,18 @@ @overload -def _normalize_path(path: str | os.PathLike) -> str: ... +def _normalize_path(path: os.PathLike) -> str: ... + + +@overload +def _normalize_path(path: str) -> str: ... @overload def _normalize_path(path: T) -> T: ... -def _normalize_path(path: str | os.PathLike | T) -> str | T: +def _normalize_path(path: os.PathLike | str | T) -> str | T: """ Normalize pathlikes to string. @@ -85,7 +89,7 @@ def _normalize_path(path: str | os.PathLike | T) -> str | T: if isinstance(path, str) and not is_remote_uri(path): path = os.path.abspath(os.path.expanduser(path)) - return path # type:ignore [return-value] + return path # type: ignore[return-value] @overload @@ -749,11 +753,15 @@ class BackendEntrypoint: url : str, default: "" A string with the URL to the backend's documentation. The setting of this attribute is not mandatory. + supports_groups : bool, default: False + Whether the backend supports opening groups (via open_datatree and + open_groups_as_dict) or not. """ open_dataset_parameters: ClassVar[tuple | None] = None description: ClassVar[str] = "" url: ClassVar[str] = "" + supports_groups: ClassVar[bool] = False def __repr__(self) -> str: txt = f"<{type(self).__name__}>" @@ -808,6 +816,8 @@ def open_datatree( ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. + + If implemented, set the class variable supports_groups to True. """ raise NotImplementedError() @@ -830,6 +840,8 @@ def open_groups_as_dict( This function exists to provide a universal way to open all groups in a file, before applying any additional consistency checks or requirements necessary to create a `DataTree` object (typically done using :py:meth:`~xarray.DataTree.from_dict`). + + If implemented, set the class variable supports_groups to True. """ raise NotImplementedError() diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 422eadc6c34..28565f92de9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -459,6 +459,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint): "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray" ) url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.H5netcdfBackendEntrypoint.html" + supports_groups = True def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: filename_or_obj = _normalize_filename_or_obj(filename_or_obj) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 234768ef891..8d4ca6441c9 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -698,6 +698,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint): "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray" ) url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html" + supports_groups = True def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj): diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 354fca692dd..97455d8acc0 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint +from xarray.core.options import OPTIONS from xarray.core.utils import module_available if TYPE_CHECKING: @@ -18,8 +19,6 @@ from xarray.backends.common import AbstractDataStore from xarray.core.types import ReadBuffer -NETCDF_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] - def remove_duplicates(entrypoints: EntryPoints) -> list[EntryPoint]: # sort and group entrypoints by name @@ -91,8 +90,8 @@ def set_missing_parameters( def sort_backends( backend_entrypoints: dict[str, type[BackendEntrypoint]], ) -> dict[str, type[BackendEntrypoint]]: - ordered_backends_entrypoints = {} - for be_name in NETCDF_BACKENDS_ORDER: + ordered_backends_entrypoints: dict[str, type[BackendEntrypoint]] = {} + for be_name in OPTIONS["netcdf_engine_order"]: if backend_entrypoints.get(be_name) is not None: ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name) ordered_backends_entrypoints.update( @@ -144,10 +143,13 @@ def guess_engine( | bytes | memoryview | AbstractDataStore, + must_support_groups: bool = False, ) -> str | type[BackendEntrypoint]: engines = list_engines() for engine, backend in engines.items(): + if must_support_groups and not backend.supports_groups: + continue try: if backend.guess_can_open(store_spec): return engine @@ -162,6 +164,8 @@ def guess_engine( for engine, (_, backend_cls) in BACKEND_ENTRYPOINTS.items(): try: backend = backend_cls() + if must_support_groups and not backend.supports_groups: + continue if backend.guess_can_open(store_spec): compatible_engines.append(engine) except Exception: @@ -180,6 +184,15 @@ def guess_engine( "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" "https://docs.xarray.dev/en/stable/user-guide/io.html" ) + elif must_support_groups: + error_msg = ( + "xarray is unable to open this file because it has no currently " + "installed IO backends that support reading groups (e.g., h5netcdf " + "or netCDF4-python). Xarray's read/write support requires " + "installing optional IO dependencies, see:\n" + "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" + "https://docs.xarray.dev/en/stable/user-guide/io" + ) else: error_msg = ( "xarray is unable to open this file because it has no currently " diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 0dae2ddb895..5ac5008098b 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -323,7 +323,7 @@ def _normalize_filename_or_obj( if isinstance(filename_or_obj, bytes | memoryview): return io.BytesIO(filename_or_obj) else: - return _normalize_path(filename_or_obj) # type: ignore[return-value] + return _normalize_path(filename_or_obj) class ScipyBackendEntrypoint(BackendEntrypoint): diff --git a/xarray/backends/writers.py b/xarray/backends/writers.py index a1901024b38..a1b02338dca 100644 --- a/xarray/backends/writers.py +++ b/xarray/backends/writers.py @@ -8,12 +8,11 @@ from itertools import starmap from numbers import Number from os import PathLike -from typing import TYPE_CHECKING, Any, Literal, cast, get_args, overload +from typing import TYPE_CHECKING, Any, Literal, get_args, overload import numpy as np from xarray import backends, conventions -from xarray.backends import plugins from xarray.backends.api import ( _normalize_path, delayed_close_after_writes, @@ -23,6 +22,7 @@ from xarray.backends.store import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree +from xarray.core.options import OPTIONS from xarray.core.types import NetcdfWriteModes, ZarrWriteModes from xarray.core.utils import emit_user_level_warning @@ -163,35 +163,53 @@ def check_attr(name, value, valid_types): def get_default_netcdf_write_engine( + path_or_file: str | IOBase | None, format: T_NetcdfTypes | None, - to_fileobject: bool, ) -> Literal["netcdf4", "h5netcdf", "scipy"]: """Return the default netCDF library to use for writing a netCDF file.""" + module_names = { "netcdf4": "netCDF4", "scipy": "scipy", "h5netcdf": "h5netcdf", } - - candidates = list(plugins.NETCDF_BACKENDS_ORDER) + candidates = list(OPTIONS["netcdf_engine_order"]) if format is not None: - if format.upper().startswith("NETCDF3"): + format = format.upper() # type: ignore[assignment] + if format not in { + "NETCDF4", + "NETCDF4_CLASSIC", + "NETCDF3_64BIT", + "NETCDF3_CLASSIC", + }: + raise ValueError(f"unexpected {format=}") + # TODO: allow format='NETCDF4_CLASSIC' to default to using h5netcdf, + # when the oldest supported version of h5netcdf supports it: + # https://github.com/h5netcdf/h5netcdf/pull/283 + if format != "NETCDF4": candidates.remove("h5netcdf") - elif format.upper().startswith("NETCDF4"): + if format not in {"NETCDF3_64BIT", "NETCDF3_CLASSIC"}: candidates.remove("scipy") - else: - raise ValueError(f"unexpected {format=}") - if to_fileobject: + nczarr_mode = isinstance(path_or_file, str) and path_or_file.endswith( + "#mode=nczarr" + ) + if nczarr_mode: + candidates[:] = ["netcdf4"] + + if isinstance(path_or_file, IOBase): candidates.remove("netcdf4") for engine in candidates: module_name = module_names[engine] if importlib.util.find_spec(module_name) is not None: - return cast(Literal["netcdf4", "h5netcdf", "scipy"], engine) + return engine - format_str = f" with {format=}" if format is not None else "" + if nczarr_mode: + format_str = " in NCZarr format" + else: + format_str = f" with {format=}" if format is not None else "" libraries = ", ".join(module_names[c] for c in candidates) raise ValueError( f"cannot write NetCDF files{format_str} because none of the suitable " @@ -378,11 +396,10 @@ def to_netcdf( if encoding is None: encoding = {} - path_or_file = _normalize_path(path_or_file) + normalized_path = _normalize_path(path_or_file) if engine is None: - to_fileobject = isinstance(path_or_file, IOBase) - engine = get_default_netcdf_write_engine(format, to_fileobject) + engine = get_default_netcdf_write_engine(normalized_path, format) # validate Dataset keys, DataArray names, and attr keys/values _validate_dataset_names(dataset) @@ -392,7 +409,7 @@ def to_netcdf( autoclose = _get_netcdf_autoclose(dataset, engine) - if path_or_file is None: + if normalized_path is None: if not compute: raise NotImplementedError( "to_netcdf() with compute=False is not yet implemented when " @@ -400,7 +417,7 @@ def to_netcdf( ) target = BytesIOProxy() else: - target = path_or_file # type: ignore[assignment] + target = normalized_path # type: ignore[assignment] store = get_writable_netcdf_store( target, @@ -529,9 +546,9 @@ def save_mfdataset( if necessary. engine : {"netcdf4", "scipy", "h5netcdf"}, optional Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for "netcdf4" if writing to a file on disk. - See `Dataset.to_netcdf` for additional information. + default engine is chosen based on available dependencies, by default + preferring "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). compute : bool If true compute immediately, otherwise return a ``dask.delayed.Delayed`` object that can be computed later. @@ -815,13 +832,12 @@ def _datatree_to_netcdf( "DataTree.to_netcdf only supports the netcdf4 and h5netcdf engines" ) - filepath = _normalize_path(filepath) + normalized_path = _normalize_path(filepath) if engine is None: - to_fileobject = isinstance(filepath, io.IOBase) engine = get_default_netcdf_write_engine( + path_or_file=normalized_path, format="NETCDF4", # required for supporting groups - to_fileobject=to_fileobject, ) # type: ignore[assignment] if group is not None: @@ -840,7 +856,7 @@ def _datatree_to_netcdf( f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" ) - if filepath is None: + if normalized_path is None: if not compute: raise NotImplementedError( "to_netcdf() with compute=False is not yet implemented when " @@ -848,7 +864,7 @@ def _datatree_to_netcdf( ) target = BytesIOProxy() else: - target = filepath # type: ignore[assignment] + target = normalized_path # type: ignore[assignment] if unlimited_dims is None: unlimited_dims = {} diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d6f5ba149ca..97b5c68491e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4170,8 +4170,9 @@ def to_netcdf( format='NETCDF4'). The group(s) will be created if necessary. engine : {"netcdf4", "scipy", "h5netcdf"}, optional Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for 'netcdf4' if writing to a file on disk. + default engine is chosen based on available dependencies, by default + preferring "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). encoding : dict, optional Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5f00fdc2f03..b4313e2da74 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2057,8 +2057,9 @@ def to_netcdf( format='NETCDF4'). The group(s) will be created if necessary. engine : {"netcdf4", "scipy", "h5netcdf"}, optional Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for 'netcdf4' if writing to a file on disk. + default engine is chosen based on available dependencies, by default + preferring "h5netcdf" over "scipy" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). encoding : dict, optional Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index e250a74ca0b..d3bfd8c8432 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1849,8 +1849,9 @@ def to_netcdf( * NETCDF4: Data is stored in an HDF5 file, using netCDF4 API features. engine : {"netcdf4", "h5netcdf"}, optional Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for "netcdf4" if writing to a file on disk. + default engine is chosen based on available dependencies, by default + preferring "h5netcdf" over "netcdf4" (customizable via + ``netcdf_engine_order`` in ``xarray.set_options()``). group : str, optional Path to the netCDF4 group in the given file to open as the root group of the ``DataTree``. Currently, specifying a group is not supported. diff --git a/xarray/core/options.py b/xarray/core/options.py index 791d3bd928e..c8d00eea5d2 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from collections.abc import Sequence from typing import TYPE_CHECKING, Any, Literal, TypedDict from xarray.core.utils import FrozenDict @@ -28,6 +29,7 @@ "enable_cftimeindex", "file_cache_maxsize", "keep_attrs", + "netcdf_engine_order", "warn_for_unclosed_files", "use_bottleneck", "use_new_combine_kwarg_defaults", @@ -57,6 +59,7 @@ class T_Options(TypedDict): enable_cftimeindex: bool file_cache_maxsize: int keep_attrs: Literal["default"] | bool + netcdf_engine_order: Sequence[Literal["h5netcdf", "scipy", "netcdf4"]] warn_for_unclosed_files: bool use_bottleneck: bool use_flox: bool @@ -86,6 +89,7 @@ class T_Options(TypedDict): "enable_cftimeindex": True, "file_cache_maxsize": 128, "keep_attrs": "default", + "netcdf_engine_order": ("h5netcdf", "scipy", "netcdf4"), "warn_for_unclosed_files": False, "use_bottleneck": True, "use_flox": True, @@ -96,6 +100,7 @@ class T_Options(TypedDict): _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) _DISPLAY_OPTIONS = frozenset(["text", "html"]) +_NETCDF_ENGINES = frozenset(["h5netcdf", "scipy", "netcdf4"]) def _positive_integer(value: Any) -> bool: @@ -119,6 +124,7 @@ def _positive_integer(value: Any) -> bool: "enable_cftimeindex": lambda value: isinstance(value, bool), "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], + "netcdf_engine_order": lambda engines: set(engines) <= _NETCDF_ENGINES, "use_bottleneck": lambda value: isinstance(value, bool), "use_new_combine_kwarg_defaults": lambda value: isinstance(value, bool), "use_numbagg": lambda value: isinstance(value, bool), @@ -254,6 +260,11 @@ class set_options: * ``False`` : to always discard attrs * ``default`` : to use original logic that attrs should only be kept in unambiguous circumstances + netcdf_engine_order : sequence, default ['h5netcdf', 'scipy', 'netcdf4'] + Preference order of backend engines to use when reading or writing + netCDF files with ``open_dataset()`` and ``to_netcdf()`` if ``engine`` + is not explicitly specified. May be any permutation or subset of + ``['h5netcdf', 'scipy', 'netcdf4']``. use_bottleneck : bool, default: True Whether to use ``bottleneck`` to accelerate 1D reductions and 1D rolling reduction operations. @@ -311,6 +322,8 @@ def __init__(self, **kwargs): expected = f"Expected one of {_JOIN_OPTIONS!r}" elif k == "display_style": expected = f"Expected one of {_DISPLAY_OPTIONS!r}" + elif k == "netcdf_engine_order": + expected = f"Expected a subset of {sorted(_NETCDF_ENGINES)}" else: expected = "" raise ValueError( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3a7635f6cce..1842d6e3fc3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2111,7 +2111,7 @@ def test_encoding_enum__no_fill_value(self, recwarn): fill_value=None, ) v[:] = 1 - with open_dataset(tmp_file) as original: + with open_dataset(tmp_file, engine="netcdf4") as original: save_kwargs = {} # We don't expect any errors. # This is effectively a void context manager @@ -2163,7 +2163,7 @@ def test_encoding_enum__multiple_variable_with_enum(self): "time", fill_value=255, ) - with open_dataset(tmp_file) as original: + with open_dataset(tmp_file, engine="netcdf4") as original: save_kwargs = {} if self.engine == "h5netcdf" and not has_h5netcdf_1_4_0_or_above: save_kwargs["invalid_netcdf"] = True @@ -2212,7 +2212,7 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self): "time", fill_value=255, ) - with open_dataset(tmp_file) as original: + with open_dataset(tmp_file, engine="netcdf4") as original: assert ( original.clouds.encoding["dtype"].metadata == original.tifa.encoding["dtype"].metadata @@ -6773,6 +6773,7 @@ def _assert_no_dates_out_of_range_warning(record): @requires_scipy_or_netCDF4 +@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager") @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_use_cftime_standard_calendar_default_in_range(calendar) -> None: x = [0, 1] @@ -7276,6 +7277,7 @@ def _create_nczarr(self, filename): # https://github.com/Unidata/netcdf-c/issues/2259 ds = ds.drop_vars("dim3") + # engine="netcdf4" is not required for backwards compatibility ds.to_netcdf(f"file://{filename}#mode=nczarr") return ds diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 2fc1c8faa6c..5efb0c50d46 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -1,5 +1,6 @@ from __future__ import annotations +import io import re import sys from numbers import Number @@ -23,20 +24,46 @@ @requires_scipy @requires_h5netcdf def test_get_default_netcdf_write_engine() -> None: - engine = get_default_netcdf_write_engine(format=None, to_fileobject=False) - assert engine == "netcdf4" + engine = get_default_netcdf_write_engine("", format=None) + assert engine == "h5netcdf" + + engine = get_default_netcdf_write_engine("", format="NETCDF4") + assert engine == "h5netcdf" - engine = get_default_netcdf_write_engine(format="NETCDF4", to_fileobject=False) + engine = get_default_netcdf_write_engine("", format="NETCDF4_CLASSIC") assert engine == "netcdf4" - engine = get_default_netcdf_write_engine(format="NETCDF4", to_fileobject=True) + engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF4") assert engine == "h5netcdf" - engine = get_default_netcdf_write_engine( - format="NETCDF3_CLASSIC", to_fileobject=True - ) + engine = get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") + assert engine == "scipy" + + engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF3_CLASSIC") assert engine == "scipy" + engine = get_default_netcdf_write_engine("path.zarr#mode=nczarr", format=None) + assert engine == "netcdf4" + + with xr.set_options(netcdf_engine_order=["netcdf4", "scipy", "h5netcdf"]): + engine = get_default_netcdf_write_engine("", format=None) + assert engine == "netcdf4" + + engine = get_default_netcdf_write_engine("", format="NETCDF4") + assert engine == "netcdf4" + + engine = get_default_netcdf_write_engine("", format="NETCDF4_CLASSIC") + assert engine == "netcdf4" + + engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF4") + assert engine == "h5netcdf" + + engine = get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") + assert engine == "netcdf4" + + engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF3_CLASSIC") + assert engine == "scipy" + @requires_h5netcdf def test_default_engine_h5netcdf(monkeypatch): @@ -46,17 +73,31 @@ def test_default_engine_h5netcdf(monkeypatch): monkeypatch.delitem(sys.modules, "scipy", raising=False) monkeypatch.setattr(sys, "meta_path", []) - engine = get_default_netcdf_write_engine(format=None, to_fileobject=False) + engine = get_default_netcdf_write_engine("", format=None) assert engine == "h5netcdf" with pytest.raises( ValueError, match=re.escape( "cannot write NetCDF files with format='NETCDF3_CLASSIC' because " - "none of the suitable backend libraries (netCDF4, scipy) are installed" + "none of the suitable backend libraries (scipy, netCDF4) are installed" + ), + ): + get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") + + +def test_default_engine_nczarr_no_netcdf4_python(monkeypatch): + monkeypatch.delitem(sys.modules, "netCDF4", raising=False) + monkeypatch.setattr(sys, "meta_path", []) + + with pytest.raises( + ValueError, + match=re.escape( + "cannot write NetCDF files in NCZarr format because " + "none of the suitable backend libraries (netCDF4) are installed" ), ): - get_default_netcdf_write_engine(format="NETCDF3_CLASSIC", to_fileobject=False) + get_default_netcdf_write_engine("#mode=nczarr", format=None) def test_custom_engine() -> None: diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 85c7c06ee0e..5b2b93f9ade 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -203,8 +203,8 @@ def _unaligned_datatree_zarr(zarr_format: Literal[2, 3]) -> Path: yield _unaligned_datatree_zarr -class DatatreeIOBase: - engine: T_DataTreeNetcdfEngine | None = None +class NetCDFIOBase: + engine: T_DataTreeNetcdfEngine | None def test_to_netcdf(self, tmpdir, simple_datatree): filepath = tmpdir / "test.nc" @@ -327,10 +327,6 @@ def test_default_write_engine(self, tmpdir, simple_datatree, monkeypatch): original_dt = simple_datatree original_dt.to_netcdf(filepath) # should not raise - -class NetCDFIOBase(DatatreeIOBase): - engine: T_DataTreeNetcdfEngine | None - @requires_dask def test_open_datatree_chunks(self, tmpdir) -> None: filepath = tmpdir / "test.nc" @@ -354,13 +350,13 @@ def test_open_datatree_chunks(self, tmpdir) -> None: assert_chunks_equal(tree, original_tree, enforce_dask=True) - # def test_roundtrip_via_memoryview(self, simple_datatree) -> None: - # original_dt = simple_datatree - # memview = original_dt.to_netcdf(engine=self.engine) - # roundtrip_dt = load_datatree(memview, engine=self.engine) - # assert_equal(original_dt, roundtrip_dt) + def test_roundtrip_via_memoryview(self, simple_datatree) -> None: + original_dt = simple_datatree + memview = original_dt.to_netcdf(engine=self.engine) + roundtrip_dt = load_datatree(memview, engine=self.engine) + assert_equal(original_dt, roundtrip_dt) - def test_to_bytes_compute_false(self, simple_datatree) -> None: + def test_to_memoryview_compute_false(self, simple_datatree) -> None: original_dt = simple_datatree with pytest.raises( NotImplementedError, @@ -385,6 +381,16 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: class TestGenericNetCDFIO(NetCDFIOBase): engine: T_DataTreeNetcdfEngine | None = None + @requires_netCDF4 + def test_open_netcdf3(self, tmpdir) -> None: + filepath = tmpdir / "test.nc" + ds = xr.Dataset({"foo": 1}) + ds.to_netcdf(filepath, format="NETCDF3_CLASSIC") + + expected_dt = DataTree(ds) + roundtrip_dt = load_datatree(filepath) # must use netCDF4 engine + assert_equal(expected_dt, roundtrip_dt) + @requires_h5netcdf @requires_netCDF4 def test_memoryview_write_h5netcdf_read_netcdf4(self, simple_datatree) -> None: diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 8ad1cbe11be..315ae6a9c5a 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + import pytest import xarray @@ -69,6 +71,19 @@ def test_nested_options() -> None: assert OPTIONS["display_width"] == original +def test_netcdf_engine_order() -> None: + original = OPTIONS["netcdf_engine_order"] + with pytest.raises( + ValueError, + match=re.escape( + "option 'netcdf_engine_order' given an invalid value: ['invalid']. " + "Expected a subset of ['h5netcdf', 'netcdf4', 'scipy']" + ), + ): + xarray.set_options(netcdf_engine_order=["invalid"]) + assert OPTIONS["netcdf_engine_order"] == original + + def test_display_style() -> None: original = "html" assert OPTIONS["display_style"] == original diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index c23a5487bd6..a961be74b82 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -8,6 +8,7 @@ import pytest from xarray.backends import common, plugins +from xarray.core.options import OPTIONS from xarray.tests import ( has_h5netcdf, has_netCDF4, @@ -171,7 +172,7 @@ def test_build_engines_sorted() -> None: backend_entrypoints = list(plugins.build_engines(dummy_pkg_entrypoints)) indices = [] - for be in plugins.NETCDF_BACKENDS_ORDER: + for be in OPTIONS["netcdf_engine_order"]: try: index = backend_entrypoints.index(be) backend_entrypoints.pop(index)