pydata · headtr1ck · Dec 4, 2023 · Dec 6, 2023 · Dec 6, 2023 · Dec 6, 2023
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -39,7 +39,7 @@
 from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk
 from xarray.core.indexes import Index
 from xarray.core.parallelcompat import guess_chunkmanager
-from xarray.core.types import ZarrWriteModes
+from xarray.core.types import ZarrOpenModes
 from xarray.core.utils import is_remote_uri
 
 if TYPE_CHECKING:
@@ -55,20 +55,20 @@
         CompatOptions,
         JoinOptions,
         NestedSequence,
+        NetcdfFormats,
         T_Chunks,
     )
 
     T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"]
     T_Engine = Union[
         T_NetcdfEngine,
         Literal["pydap", "pynio", "zarr"],
+        BackendEntrypoint,
         type[BackendEntrypoint],
         str,  # no nice typing support for custom backends
         None,
     ]
-    T_NetcdfTypes = Literal[
-        "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
-    ]
+
 
 DATAARRAY_NAME = "__xarray_dataarray_name__"
 DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
@@ -421,11 +421,10 @@ def open_dataset(
         objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
         "zarr", None}, installed backend \
-        or subclass of xarray.backends.BackendEntrypoint, optional
+        or instance or subclass of xarray.backends.BackendEntrypoint, optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
-        "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
-        can also be used.
+        "netcdf4".
     chunks : int, dict, 'auto' or None, optional
         If chunks is provided, it is used to load the new dataset into dask
         arrays. ``chunks=-1`` loads the dataset with dask using a single
@@ -595,8 +594,8 @@ def open_dataset(
 def open_dataarray(
     filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
     *,
-    engine: T_Engine | None = None,
-    chunks: T_Chunks | None = None,
+    engine: T_Engine = None,
+    chunks: T_Chunks = None,
     cache: bool | None = None,
     decode_cf: bool | None = None,
     mask_and_scale: bool | None = None,
@@ -628,7 +627,7 @@ def open_dataarray(
         objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
         "zarr", None}, installed backend \
-        or subclass of xarray.backends.BackendEntrypoint, optional
+        or instance or subclass of xarray.backends.BackendEntrypoint, optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
         "netcdf4".
@@ -707,16 +706,20 @@ def open_dataarray(
         in the values of the task graph. See :py:func:`dask.array.from_array`.
     chunked_array_type: str, optional
         Which chunked array type to coerce the underlying data array to.
-        Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
+        Defaults to 'dask' if installed, else whatever is registered via the
+        `ChunkManagerEnetryPoint` system.
         Experimental API that should not be relied upon.
     from_array_kwargs: dict
-        Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
-        chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
-        For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
-        to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
+        Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array`
+        method used to create chunked arrays, via whichever chunk manager is
+        specified through the `chunked_array_type` kwarg.
+        For example if :py:func:`dask.array.Array` objects are used for chunking,
+        additional kwargs will be passed to :py:func:`dask.array.from_array`.
+        Experimental API that should not be relied upon.
     backend_kwargs: dict
         Additional keyword arguments passed on to the engine open function,
-        equivalent to `**kwargs`.
+        equivalent to `**kwargs`. Alternatively pass a configured Backend object
+        as engine.
     **kwargs: dict
         Additional keyword arguments passed on to the engine open function.
         For example:
@@ -729,7 +732,8 @@ def open_dataarray(
           currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
           "scipy", "pynio".
 
-        See engine open function for kwargs accepted by each specific engine.
+        See engine open function for kwargs accepted by each specific engine or
+        create an instance of the Backend and configure it in the constructor.
 
     Notes
     -----
@@ -790,7 +794,7 @@ def open_dataarray(
 
 def open_mfdataset(
     paths: str | NestedSequence[str | os.PathLike],
-    chunks: T_Chunks | None = None,
+    chunks: T_Chunks = None,
     concat_dim: str
     | DataArray
     | Index
@@ -800,7 +804,7 @@ def open_mfdataset(
     | None = None,
     compat: CompatOptions = "no_conflicts",
     preprocess: Callable[[Dataset], Dataset] | None = None,
-    engine: T_Engine | None = None,
+    engine: T_Engine = None,
     data_vars: Literal["all", "minimal", "different"] | list[str] = "all",
     coords="different",
     combine: Literal["by_coords", "nested"] = "by_coords",
@@ -868,7 +872,7 @@ def open_mfdataset(
         ``ds.encoding["source"]``.
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
         "zarr", None}, installed backend \
-        or subclass of xarray.backends.BackendEntrypoint, optional
+        or instance or subclass of xarray.backends.BackendEntrypoint, optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
         "netcdf4".
@@ -1092,7 +1096,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike | None = None,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1111,7 +1115,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: None = None,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1129,7 +1133,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1148,7 +1152,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1167,7 +1171,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1186,7 +1190,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1204,7 +1208,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike | None,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1220,7 +1224,7 @@ def to_netcdf(
     dataset: Dataset,
     path_or_file: str | os.PathLike | None = None,
     mode: Literal["w", "a"] = "w",
-    format: T_NetcdfTypes | None = None,
+    format: NetcdfFormats | None = None,
     group: str | None = None,
     engine: T_NetcdfEngine | None = None,
     encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
@@ -1633,14 +1637,14 @@ def to_zarr(
     dataset: Dataset,
     store: MutableMapping | str | os.PathLike[str] | None = None,
     chunk_store: MutableMapping | str | os.PathLike | None = None,
-    mode: ZarrWriteModes | None = None,
+    mode: ZarrOpenModes | None = None,
     synchronizer=None,
     group: str | None = None,
     encoding: Mapping | None = None,
     *,
     compute: Literal[True] = True,
     consolidated: bool | None = None,
-    append_dim: Hashable | None = None,
+    append_dim: str | None = None,
     region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
@@ -1657,14 +1661,14 @@ def to_zarr(
     dataset: Dataset,
     store: MutableMapping | str | os.PathLike[str] | None = None,
     chunk_store: MutableMapping | str | os.PathLike | None = None,
-    mode: ZarrWriteModes | None = None,
+    mode: ZarrOpenModes | None = None,
     synchronizer=None,
     group: str | None = None,
     encoding: Mapping | None = None,
     *,
     compute: Literal[False],
     consolidated: bool | None = None,
-    append_dim: Hashable | None = None,
+    append_dim: str | None = None,
     region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
@@ -1679,14 +1683,14 @@ def to_zarr(
     dataset: Dataset,
     store: MutableMapping | str | os.PathLike[str] | None = None,
     chunk_store: MutableMapping | str | os.PathLike | None = None,
-    mode: ZarrWriteModes | None = None,
+    mode: ZarrOpenModes | None = None,
     synchronizer=None,
     group: str | None = None,
     encoding: Mapping | None = None,
     *,
     compute: bool = True,
     consolidated: bool | None = None,
-    append_dim: Hashable | None = None,
+    append_dim: str | None = None,
     region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -6,30 +6,41 @@
 import traceback
 from collections.abc import Iterable
 from glob import glob
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, ClassVar, TypeVar, overload
 
 import numpy as np
 
 from xarray.conventions import cf_encoder
 from xarray.core import indexing
 from xarray.core.parallelcompat import get_chunked_array_type
 from xarray.core.pycompat import is_chunked_array
+from xarray.core.types import T_BackendDatasetLike
 from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri
 
 if TYPE_CHECKING:
-    from io import BufferedIOBase
-
     from xarray.core.dataset import Dataset
-    from xarray.core.types import NestedSequence
+    from xarray.core.types import NestedSequence, T_XarrayCanOpen
 
 # Create a logger object, but don't add any handlers. Leave that to user code.
 logger = logging.getLogger(__name__)
 
 
 NONE_VAR_NAME = "__values__"
 
+T = TypeVar("T")
+
+
+@overload
+def _normalize_path(path: os.PathLike) -> str:  # type: ignore[overload-overlap]
+    ...
+
+
+@overload
+def _normalize_path(path: T) -> T:
+    ...
+
 
-def _normalize_path(path):
+def _normalize_path(path: os.PathLike | T) -> str | T:
     """
     Normalize pathlikes to string.
 
@@ -52,9 +63,9 @@ def _normalize_path(path):
         path = os.fspath(path)
 
     if isinstance(path, str) and not is_remote_uri(path):
-        path = os.path.abspath(os.path.expanduser(path))
+        return os.path.abspath(os.path.expanduser(path))
 
-    return path
+    return path  # type: ignore[return-value]
 
 
 def _find_absolute_paths(
@@ -127,9 +138,9 @@ def _decode_variable_name(name):
     return name
 
 
-def find_root_and_group(ds):
+def find_root_and_group(ds: T_BackendDatasetLike) -> tuple[T_BackendDatasetLike, str]:
     """Find the root and group name of a netCDF4/h5netcdf dataset."""
-    hierarchy = ()
+    hierarchy: tuple[str, ...] = ()
     while ds.parent is not None:
         hierarchy = (ds.name.split("/")[-1],) + hierarchy
         ds = ds.parent
@@ -462,20 +473,21 @@ class BackendEntrypoint:
     Attributes
     ----------
 
-    open_dataset_parameters : tuple, default: None
-        A list of ``open_dataset`` method parameters.
-        The setting of this attribute is not mandatory.
     description : str, default: ""
         A short string describing the engine.
         The setting of this attribute is not mandatory.
     url : str, default: ""
         A string with the URL to the backend's documentation.
         The setting of this attribute is not mandatory.
+    open_dataset_parameters : tuple, default: None
+        A list of ``open_dataset`` method parameters.
+        The setting of this attribute is only mandatory if the
+        open_dataset method contains *args or **kwargs.
     """
 
-    open_dataset_parameters: ClassVar[tuple | None] = None
     description: ClassVar[str] = ""
     url: ClassVar[str] = ""
+    open_dataset_parameters: ClassVar[tuple[str, ...] | None] = None
 
     def __repr__(self) -> str:
         txt = f"<{type(self).__name__}>"
@@ -487,10 +499,9 @@ def __repr__(self) -> str:
 
     def open_dataset(
         self,
-        filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
+        filename_or_obj: T_XarrayCanOpen,
         *,
         drop_variables: str | Iterable[str] | None = None,
-        **kwargs: Any,
     ) -> Dataset:
         """
         Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.
@@ -500,7 +511,7 @@ def open_dataset(
 
     def guess_can_open(
         self,
-        filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
+        filename_or_obj: T_XarrayCanOpen,
     ) -> bool:
         """
         Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.