Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -591,8 +591,8 @@ The library ``h5netcdf`` allows writing some dtypes that aren't
allowed in netCDF4 (see
`h5netcdf documentation <https://github.com/h5netcdf/h5netcdf#invalid-netcdf-files>`_).
This feature is available through :py:meth:`DataArray.to_netcdf` and
:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
and currently raises a warning unless ``invalid_netcdf=True`` is set.
:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``, only if
``invalid_netcdf=True`` is explicitly set.

.. warning::

Expand Down
9 changes: 6 additions & 3 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ Breaking changes
dataset in-place. (:issue:`10167`)
By `Maximilian Roos <https://github.com/max-sixty>`_.

- The default ``engine`` when reading/writing netCDF files in-memory is now
netCDF4, consistent with Xarray's default ``engine`` when read/writing netCDF
files to disk (:pull:`10624`).
- The default ``engine`` when reading/writing netCDF files is now h5netcdf
or scipy, which are typically faster than the prior default of netCDF4-python.
You can control this default behavior explicitly via the new
``netcdf_engine_order`` parameter in :py:func:`~xarray.set_options`, e.g.,
``xr.set_options(netcdf_engine_order=['netcdf4', 'scipy', 'h5netcdf'])`` to
restore the prior defaults (:issue:`10657`).
By `Stephan Hoyer <https://github.com/shoyer>`_.

Deprecations
Expand Down
36 changes: 22 additions & 14 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,10 @@ def open_dataset(
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
is chosen based on available dependencies, by default preferring
"h5netcdf" over "scipy" over "netcdf4" (customizable via
``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend
class (a subclass of ``BackendEntrypoint``) can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.

Expand Down Expand Up @@ -658,8 +659,10 @@ def open_dataarray(
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4".
is chosen based on available dependencies, by default preferring
"h5netcdf" over "scipy" over "netcdf4" (customizable via
``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend
class (a subclass of ``BackendEntrypoint``) can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.

Expand Down Expand Up @@ -882,9 +885,10 @@ def open_datatree(
engine : {"netcdf4", "h5netcdf", "zarr", None}, \
installed backend or xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
is chosen based on available dependencies, by default preferring
"h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in
``xarray.set_options()``). A custom backend class (a subclass of
``BackendEntrypoint``) can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.

Expand Down Expand Up @@ -1040,7 +1044,7 @@ def open_datatree(
kwargs.update(backend_kwargs)

if engine is None:
engine = plugins.guess_engine(filename_or_obj)
engine = plugins.guess_engine(filename_or_obj, must_support_groups=True)

if from_array_kwargs is None:
from_array_kwargs = {}
Expand Down Expand Up @@ -1126,8 +1130,10 @@ def open_groups(
engine : {"netcdf4", "h5netcdf", "zarr", None}, \
installed backend or xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
is chosen based on available dependencies, by default preferring
"h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in
``xarray.set_options()``). A custom backend class (a subclass of
``BackendEntrypoint``) can also be used.
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
Expand Down Expand Up @@ -1283,7 +1289,7 @@ def open_groups(
kwargs.update(backend_kwargs)

if engine is None:
engine = plugins.guess_engine(filename_or_obj)
engine = plugins.guess_engine(filename_or_obj, must_support_groups=True)

if from_array_kwargs is None:
from_array_kwargs = {}
Expand Down Expand Up @@ -1443,8 +1449,10 @@ def open_mfdataset(
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4".
is chosen based on available dependencies, by default preferring
"h5netcdf" over "scipy" over "netcdf4" (customizable via
``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend
class (a subclass of ``BackendEntrypoint``) can also be used.
data_vars : {"minimal", "different", "all"} or list of str, default: "all"
These data variables will be concatenated together:
* "minimal": Only data variables in which the dimension already
Expand Down
18 changes: 15 additions & 3 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,18 @@


@overload
def _normalize_path(path: str | os.PathLike) -> str: ...
def _normalize_path(path: os.PathLike) -> str: ...


@overload
def _normalize_path(path: str) -> str: ...


@overload
def _normalize_path(path: T) -> T: ...


def _normalize_path(path: str | os.PathLike | T) -> str | T:
def _normalize_path(path: os.PathLike | str | T) -> str | T:
"""
Normalize pathlikes to string.

Expand All @@ -85,7 +89,7 @@ def _normalize_path(path: str | os.PathLike | T) -> str | T:
if isinstance(path, str) and not is_remote_uri(path):
path = os.path.abspath(os.path.expanduser(path))

return path # type:ignore [return-value]
return path # type: ignore[return-value]


@overload
Expand Down Expand Up @@ -749,11 +753,15 @@ class BackendEntrypoint:
url : str, default: ""
A string with the URL to the backend's documentation.
The setting of this attribute is not mandatory.
supports_groups : bool, default: False
Whether the backend supports opening groups (via open_datatree and
open_groups_as_dict) or not.
"""

open_dataset_parameters: ClassVar[tuple | None] = None
description: ClassVar[str] = ""
url: ClassVar[str] = ""
supports_groups: ClassVar[bool] = False

def __repr__(self) -> str:
txt = f"<{type(self).__name__}>"
Expand Down Expand Up @@ -808,6 +816,8 @@ def open_datatree(
) -> DataTree:
"""
Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`.

If implemented, set the class variable supports_groups to True.
"""

raise NotImplementedError()
Expand All @@ -830,6 +840,8 @@ def open_groups_as_dict(
This function exists to provide a universal way to open all groups in a file,
before applying any additional consistency checks or requirements necessary
to create a `DataTree` object (typically done using :py:meth:`~xarray.DataTree.from_dict`).

If implemented, set the class variable supports_groups to True.
"""

raise NotImplementedError()
Expand Down
1 change: 1 addition & 0 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint):
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray"
)
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.H5netcdfBackendEntrypoint.html"
supports_groups = True

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
filename_or_obj = _normalize_filename_or_obj(filename_or_obj)
Expand Down
1 change: 1 addition & 0 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray"
)
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html"
supports_groups = True

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
Expand Down
21 changes: 17 additions & 4 deletions xarray/backends/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import TYPE_CHECKING, Any

from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint
from xarray.core.options import OPTIONS
from xarray.core.utils import module_available

if TYPE_CHECKING:
Expand All @@ -18,8 +19,6 @@
from xarray.backends.common import AbstractDataStore
from xarray.core.types import ReadBuffer

NETCDF_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"]


def remove_duplicates(entrypoints: EntryPoints) -> list[EntryPoint]:
# sort and group entrypoints by name
Expand Down Expand Up @@ -91,8 +90,8 @@ def set_missing_parameters(
def sort_backends(
backend_entrypoints: dict[str, type[BackendEntrypoint]],
) -> dict[str, type[BackendEntrypoint]]:
ordered_backends_entrypoints = {}
for be_name in NETCDF_BACKENDS_ORDER:
ordered_backends_entrypoints: dict[str, type[BackendEntrypoint]] = {}
for be_name in OPTIONS["netcdf_engine_order"]:
if backend_entrypoints.get(be_name) is not None:
ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name)
ordered_backends_entrypoints.update(
Expand Down Expand Up @@ -144,10 +143,13 @@ def guess_engine(
| bytes
| memoryview
| AbstractDataStore,
must_support_groups: bool = False,
) -> str | type[BackendEntrypoint]:
engines = list_engines()

for engine, backend in engines.items():
if must_support_groups and not backend.supports_groups:
continue
try:
if backend.guess_can_open(store_spec):
return engine
Expand All @@ -162,6 +164,8 @@ def guess_engine(
for engine, (_, backend_cls) in BACKEND_ENTRYPOINTS.items():
try:
backend = backend_cls()
if must_support_groups and not backend.supports_groups:
continue
if backend.guess_can_open(store_spec):
compatible_engines.append(engine)
except Exception:
Expand All @@ -180,6 +184,15 @@ def guess_engine(
"https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n"
"https://docs.xarray.dev/en/stable/user-guide/io.html"
)
elif must_support_groups:
error_msg = (
"xarray is unable to open this file because it has no currently "
"installed IO backends that support reading groups (e.g., h5netcdf "
"or netCDF4-python). Xarray's read/write support requires "
"installing optional IO dependencies, see:\n"
"https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n"
"https://docs.xarray.dev/en/stable/user-guide/io"
)
else:
error_msg = (
"xarray is unable to open this file because it has no currently "
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def _normalize_filename_or_obj(
if isinstance(filename_or_obj, bytes | memoryview):
return io.BytesIO(filename_or_obj)
else:
return _normalize_path(filename_or_obj) # type: ignore[return-value]
return _normalize_path(filename_or_obj)


class ScipyBackendEntrypoint(BackendEntrypoint):
Expand Down
Loading
Loading