From 986cd28388f22b05d58a06b22b9f0bd96f9b0665 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:53:48 -0500 Subject: [PATCH 01/14] Add glob pattern utility functions for group filtering Add _is_glob_pattern, _filter_group_paths, and _resolve_group_and_filter to common.py for detecting and applying glob patterns to group paths. --- xarray/backends/common.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f2580ea2a43..9f90a02b5bb 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -249,6 +249,37 @@ def _iter_nc_groups(root, parent="/"): yield from _iter_nc_groups(group, parent=gpath) +def _is_glob_pattern(pattern: str) -> bool: + return any(c in pattern for c in "*?[") + + +def _filter_group_paths(group_paths: Iterable[str], pattern: str) -> list[str]: + from xarray.core.treenode import NodePath + + matched: set[str] = {"/"} + for path in group_paths: + np_ = NodePath(path) + if np_.match(pattern): + matched.add(path) + for parent in np_.parents: + p = str(parent) + if p: + matched.add(p) + + return [p for p in group_paths if p in matched] + + +def _resolve_group_and_filter( + group: str | None, + all_group_paths: list[str], +) -> tuple[str | None, list[str]]: + if group is None: + return None, all_group_paths + if _is_glob_pattern(group): + return None, _filter_group_paths(all_group_paths, group) + return group, all_group_paths + + def find_root_and_group(ds): """Find the root and group name of a netCDF4/h5netcdf dataset.""" hierarchy = () From 6d29b6dd56ce592cd2cffb1e5553b927bb184d80 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:54:03 -0500 Subject: [PATCH 02/14] Add glob pattern filtering to h5netcdf backend Use _resolve_group_and_filter in open_groups_as_dict to support glob patterns in the group parameter for selective group loading. --- xarray/backends/h5netcdf_.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 9b828c8e236..006f41abeae 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -655,7 +655,11 @@ def open_groups_as_dict( open_kwargs: dict[str, Any] | None = None, **kwargs, ) -> dict[str, Dataset]: - from xarray.backends.common import _iter_nc_groups + from xarray.backends.common import ( + _is_glob_pattern, + _iter_nc_groups, + _resolve_group_and_filter, + ) from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error @@ -664,10 +668,12 @@ def open_groups_as_dict( emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims) filename_or_obj = _normalize_filename_or_obj(filename_or_obj) + + effective_group = None if (group and _is_glob_pattern(group)) else group store = H5NetCDFStore.open( filename_or_obj, format=format, - group=group, + group=effective_group, lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, @@ -678,15 +684,17 @@ def open_groups_as_dict( open_kwargs=open_kwargs, ) - # Check for a group and make it a parent if it exists - if group: - parent = NodePath("/") / NodePath(group) + if effective_group: + parent = NodePath("/") / NodePath(effective_group) else: parent = NodePath("/") manager = store._manager + all_group_paths = list(_iter_nc_groups(store.ds, parent=parent)) + _, filtered_paths = _resolve_group_and_filter(group, all_group_paths) + groups_dict = {} - for path_group in _iter_nc_groups(store.ds, parent=parent): + for path_group in filtered_paths: group_store = H5NetCDFStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): @@ -701,7 +709,7 @@ def open_groups_as_dict( decode_timedelta=decode_timedelta, ) - if group: + if effective_group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) From 6fe80b381bbdc601aef192d65cfd5c4be78a500f Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:54:45 -0500 Subject: [PATCH 03/14] Add glob pattern filtering to netCDF4 backend Use _resolve_group_and_filter in open_groups_as_dict to support glob patterns in the group parameter for selective group loading. --- xarray/backends/netCDF4_.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 39dedd139c0..277f8c91a92 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -859,13 +859,19 @@ def open_groups_as_dict( autoclose=False, **kwargs, ) -> dict[str, Dataset]: - from xarray.backends.common import _iter_nc_groups + from xarray.backends.common import ( + _is_glob_pattern, + _iter_nc_groups, + _resolve_group_and_filter, + ) from xarray.core.treenode import NodePath filename_or_obj = _normalize_path(filename_or_obj) + + effective_group = None if (group and _is_glob_pattern(group)) else group store = NetCDF4DataStore.open( filename_or_obj, - group=group, + group=effective_group, format=format, clobber=clobber, diskless=diskless, @@ -875,15 +881,17 @@ def open_groups_as_dict( autoclose=autoclose, ) - # Check for a group and make it a parent if it exists - if group: - parent = NodePath("/") / NodePath(group) + if effective_group: + parent = NodePath("/") / NodePath(effective_group) else: parent = NodePath("/") manager = store._manager + all_group_paths = list(_iter_nc_groups(store.ds, parent=parent)) + _, filtered_paths = _resolve_group_and_filter(group, all_group_paths) + groups_dict = {} - for path_group in _iter_nc_groups(store.ds, parent=parent): + for path_group in filtered_paths: group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): @@ -897,7 +905,7 @@ def open_groups_as_dict( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - if group: + if effective_group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) From 7a3e3bcf1edce845ee7abc9762007791cd50e3b2 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:54:56 -0500 Subject: [PATCH 04/14] Add glob pattern filtering to zarr backend Use _resolve_group_and_filter in open_groups_as_dict to support glob patterns in the group parameter for selective group loading. --- xarray/backends/zarr.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d9279dc2de9..bfbcd227f7e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1820,11 +1820,13 @@ def open_groups_as_dict( zarr_version=None, zarr_format=None, ) -> dict[str, Dataset]: + from xarray.backends.common import _is_glob_pattern, _resolve_group_and_filter + filename_or_obj = _normalize_path(filename_or_obj) - # Check for a group and make it a parent if it exists - if group: - parent = str(NodePath("/") / NodePath(group)) + effective_group = None if (group and _is_glob_pattern(group)) else group + if effective_group: + parent = str(NodePath("/") / NodePath(effective_group)) else: parent = str(NodePath("/")) @@ -1841,8 +1843,11 @@ def open_groups_as_dict( zarr_format=zarr_format, ) + _, filtered_paths = _resolve_group_and_filter(group, list(stores.keys())) + groups_dict = {} - for path_group, store in stores.items(): + for path_group in filtered_paths: + store = stores[path_group] store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): @@ -1856,7 +1861,7 @@ def open_groups_as_dict( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - if group: + if effective_group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) From 16f9e12963e4530f86c7cd1a51d3ea5eb9f6c8b4 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:55:04 -0500 Subject: [PATCH 05/14] Document glob pattern support in open_datatree and open_groups Update docstrings for the group kwarg in open_datatree and open_groups to describe glob metacharacter behavior. --- xarray/backends/api.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index fd992f3e5d8..a1e8773a6f7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1021,8 +1021,12 @@ def open_datatree( Additional keyword arguments passed on to the engine open function. For example: - - 'group': path to the group in the given file to open as the root group as - a str. + - 'group': path to the group in the given file to open as the root + group as a str. If the string contains glob metacharacters + (``*``, ``?``, ``[``), it is interpreted as a pattern and only + groups whose paths match are loaded (along with their ancestors). + For example, ``group="*/sweep_0"`` loads every ``sweep_0`` one + level deep while skipping sibling groups. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the @@ -1265,8 +1269,12 @@ def open_groups( Additional keyword arguments passed on to the engine open function. For example: - - 'group': path to the group in the given file to open as the root group as - a str. + - 'group': path to the group in the given file to open as the root + group as a str. If the string contains glob metacharacters + (``*``, ``?``, ``[``), it is interpreted as a pattern and only + groups whose paths match are loaded (along with their ancestors). + For example, ``group="*/sweep_0"`` loads every ``sweep_0`` one + level deep while skipping sibling groups. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the From c17f1345869bf55bdf3d2cf2aa3958844128f557 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:55:11 -0500 Subject: [PATCH 06/14] Add whats-new entry for glob pattern group filtering --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 93f335e625b..dfe97987f4f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,6 +67,10 @@ New Features or a fixed ``(width, height)`` tuple instead of computing figure size from ``size`` and ``aspect`` (:issue:`11103`). By `Kristian Kollsga `_. +- Added glob pattern support to the ``group`` parameter of :py:func:`open_datatree` + and :py:func:`open_groups`, allowing patterns like ``"*/sweep_0"`` to selectively + open matching groups (:issue:`11196`). + By `Alfonso Ladino `_. Breaking Changes ~~~~~~~~~~~~~~~~ From 5fb46e14fbcf7aa3e995cf5bcec19f55745d3bcc Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 11:55:17 -0500 Subject: [PATCH 07/14] Add tests for glob pattern group filtering Add integration tests for netCDF4, h5netcdf, and zarr backends, plus unit tests for _is_glob_pattern, _filter_group_paths, and _resolve_group_and_filter covering *, ?, and [] metacharacters. --- xarray/tests/test_backends_datatree.py | 192 +++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 32f224e89a6..261ed88214d 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -377,6 +377,79 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: assert subgroup_tree.root.parent is None assert_equal(subgroup_tree, expected_subtree) + def test_open_datatree_group_glob(self, tmpdir) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + "/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}), + "/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}), + "/B": xr.Dataset({"b_var": 3}), + "/B/sweep_0": xr.Dataset({"data": ("x", [5, 6])}), + } + ) + filepath = tmpdir / "glob_test.nc" + original_dt.to_netcdf(filepath, engine=self.engine) + + with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/A/sweep_0" in paths + assert "/B/sweep_0" in paths + assert "/A/sweep_1" not in paths + + def test_open_datatree_group_glob_no_match(self, tmpdir) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + } + ) + filepath = tmpdir / "glob_nomatch.nc" + original_dt.to_netcdf(filepath, engine=self.engine) + + with open_datatree(filepath, group="*/nonexistent", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert paths == {"/"} + + def test_open_datatree_group_glob_preserves_data(self, tmpdir) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + "/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}), + } + ) + filepath = tmpdir / "glob_data.nc" + original_dt.to_netcdf(filepath, engine=self.engine) + + with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree: + assert tree["/A"].dataset["a_var"].item() == 2 + np.testing.assert_array_equal( + tree["/A/sweep_0"].dataset["data"].values, [1, 2] + ) + + def test_open_groups_group_glob(self, tmpdir) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + "/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}), + "/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}), + } + ) + filepath = tmpdir / "glob_groups.nc" + original_dt.to_netcdf(filepath, engine=self.engine) + + groups = open_groups(filepath, group="*/sweep_0", engine=self.engine) + try: + assert "/" in groups + assert "/A" in groups + assert "/A/sweep_0" in groups + assert "/A/sweep_1" not in groups + finally: + for ds in groups.values(): + ds.close() + @requires_h5netcdf_or_netCDF4 class TestGenericNetCDFIO(NetCDFIOBase): @@ -1025,6 +1098,62 @@ def test_open_datatree_specific_group( assert subgroup_tree.root.parent is None assert_equal(subgroup_tree, expected_subtree) + def test_open_datatree_group_glob(self, tmpdir, zarr_format) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + "/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}), + "/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}), + "/B": xr.Dataset({"b_var": 3}), + "/B/sweep_0": xr.Dataset({"data": ("x", [5, 6])}), + } + ) + filepath = str(tmpdir / "glob_test.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) + + with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/A/sweep_0" in paths + assert "/B/sweep_0" in paths + assert "/A/sweep_1" not in paths + + def test_open_datatree_group_glob_no_match(self, tmpdir, zarr_format) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + } + ) + filepath = str(tmpdir / "glob_nomatch.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) + + with open_datatree(filepath, group="*/nonexistent", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert paths == {"/"} + + def test_open_groups_group_glob(self, tmpdir, zarr_format) -> None: + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/A": xr.Dataset({"a_var": 2}), + "/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}), + "/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}), + } + ) + filepath = str(tmpdir / "glob_groups.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) + + groups = open_groups(filepath, group="*/sweep_0", engine=self.engine) + try: + assert "/" in groups + assert "/A" in groups + assert "/A/sweep_0" in groups + assert "/A/sweep_1" not in groups + finally: + for ds in groups.values(): + ds.close() + @requires_dask def test_open_groups_chunks(self, tmpdir, zarr_format) -> None: """Test `open_groups` with chunks on a zarr store.""" @@ -1142,3 +1271,66 @@ def test_zarr_engine_recognised(self, tmpdir, zarr_format) -> None: with open_datatree(filepath) as roundtrip_dt: assert_identical(original_dt, roundtrip_dt) + + +class TestGlobPatternUtilities: + def test_is_glob_pattern(self) -> None: + from xarray.backends.common import _is_glob_pattern + + assert _is_glob_pattern("*/sweep_0") + assert _is_glob_pattern("VCP-34/sweep_[01]") + assert _is_glob_pattern("sweep_?") + assert not _is_glob_pattern("VCP-34") + assert not _is_glob_pattern("/group/subgroup") + + def test_filter_group_paths(self) -> None: + from xarray.backends.common import _filter_group_paths + + paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/B", "/B/sweep_0"] + result = _filter_group_paths(paths, "*/sweep_0") + assert result == ["/", "/A", "/A/sweep_0", "/B", "/B/sweep_0"] + + def test_filter_group_paths_no_match(self) -> None: + from xarray.backends.common import _filter_group_paths + + paths = ["/", "/A", "/B"] + result = _filter_group_paths(paths, "*/nonexistent") + assert result == ["/"] + + def test_filter_group_paths_question_mark(self) -> None: + from xarray.backends.common import _filter_group_paths + + paths = ["/", "/A", "/B", "/AB"] + result = _filter_group_paths(paths, "?") + assert result == ["/", "/A", "/B"] + + def test_filter_group_paths_bracket(self) -> None: + from xarray.backends.common import _filter_group_paths + + paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/A/sweep_2"] + result = _filter_group_paths(paths, "*/sweep_[01]") + assert result == ["/", "/A", "/A/sweep_0", "/A/sweep_1"] + + def test_resolve_group_and_filter_none(self) -> None: + from xarray.backends.common import _resolve_group_and_filter + + paths = ["/", "/A"] + effective, filtered = _resolve_group_and_filter(None, paths) + assert effective is None + assert filtered == paths + + def test_resolve_group_and_filter_literal(self) -> None: + from xarray.backends.common import _resolve_group_and_filter + + paths = ["/", "/A"] + effective, filtered = _resolve_group_and_filter("A", paths) + assert effective == "A" + assert filtered == paths + + def test_resolve_group_and_filter_glob(self) -> None: + from xarray.backends.common import _resolve_group_and_filter + + paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/B", "/B/sweep_0"] + effective, filtered = _resolve_group_and_filter("*/sweep_0", paths) + assert effective is None + assert filtered == ["/", "/A", "/A/sweep_0", "/B", "/B/sweep_0"] From c0300890bcd7623f2f36b041ee2c565ebb22dca7 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 16 Apr 2026 12:15:28 -0500 Subject: [PATCH 08/14] Simplify _filter_group_paths with set.update --- xarray/backends/common.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 9f90a02b5bb..f246f2f25ad 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -261,10 +261,7 @@ def _filter_group_paths(group_paths: Iterable[str], pattern: str) -> list[str]: np_ = NodePath(path) if np_.match(pattern): matched.add(path) - for parent in np_.parents: - p = str(parent) - if p: - matched.add(p) + matched.update(str(p) for p in np_.parents if str(p)) return [p for p in group_paths if p in matched] From cd5485d98d07027f098b736379f208497cb7dfc9 Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 08:40:39 -0500 Subject: [PATCH 09/14] Test glob-metachar escaping via character classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add coverage for group names containing literal ``*`` / ``?`` / ``[``. These are reachable with ``[*]`` / ``[?]`` / ``[[]`` character-class escaping (inherited from ``fnmatch`` / ``PurePath.match`` semantics). New tests: - ``test_open_datatree_glob_char_class_escape_literal_metachar`` on ``NetCDFIOBase`` and ``TestZarrDatatreeIO`` — end-to-end verification that groups with literal metacharacters in their names can be targeted across all supported backends. - ``test_filter_group_paths_literal_metachar_via_char_class`` on ``TestGlobPatternUtilities`` — unit-level check of the filter. --- xarray/tests/test_backends_datatree.py | 97 ++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 261ed88214d..5fa86b60a6e 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -450,6 +450,45 @@ def test_open_groups_group_glob(self, tmpdir) -> None: for ds in groups.values(): ds.close() + def test_open_datatree_glob_char_class_escape_literal_metachar( + self, tmpdir + ) -> None: + # Groups whose names contain glob metacharacters (*, ?, [) are + # reachable by character-class escaping (e.g. "[*]" matches a + # literal "*"), mirroring fnmatch / PurePath.match semantics. + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/group_*_01": xr.Dataset({"data": ("x", [1, 2])}), + "/group_*_02": xr.Dataset({"data": ("x", [3, 4])}), + "/group_?_01": xr.Dataset({"data": ("x", [5, 6])}), + "/plain_01": xr.Dataset({"data": ("x", [7, 8])}), + } + ) + filepath = tmpdir / "glob_escape.nc" + original_dt.to_netcdf(filepath, engine=self.engine) + + # Escape `*` as `[*]` — match only the literal-star group ending in _01. + with open_datatree(filepath, group="group_[*]_01", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_*_01" in paths + assert "/group_*_02" not in paths + assert "/group_?_01" not in paths + + # Escape `*` as `[*]` + `*` — match both literal-star groups. + with open_datatree(filepath, group="group_[*]_*", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_*_01" in paths + assert "/group_*_02" in paths + assert "/group_?_01" not in paths + assert "/plain_01" not in paths + + # Escape `?` as `[?]` — match only the literal-? group. + with open_datatree(filepath, group="group_[?]_01", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_?_01" in paths + assert "/group_*_01" not in paths + @requires_h5netcdf_or_netCDF4 class TestGenericNetCDFIO(NetCDFIOBase): @@ -1154,6 +1193,40 @@ def test_open_groups_group_glob(self, tmpdir, zarr_format) -> None: for ds in groups.values(): ds.close() + def test_open_datatree_glob_char_class_escape_literal_metachar( + self, tmpdir, zarr_format + ) -> None: + # Zarr variant of the NetCDF escape test: groups whose names + # contain literal glob metacharacters are reachable via + # character-class escaping. + original_dt = DataTree.from_dict( + { + "/": xr.Dataset({"root_var": 1}), + "/group_*_01": xr.Dataset({"data": ("x", [1, 2])}), + "/group_*_02": xr.Dataset({"data": ("x", [3, 4])}), + "/group_?_01": xr.Dataset({"data": ("x", [5, 6])}), + } + ) + filepath = str(tmpdir / "glob_escape.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) + + with open_datatree(filepath, group="group_[*]_01", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_*_01" in paths + assert "/group_*_02" not in paths + assert "/group_?_01" not in paths + + with open_datatree(filepath, group="group_[*]_*", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_*_01" in paths + assert "/group_*_02" in paths + assert "/group_?_01" not in paths + + with open_datatree(filepath, group="group_[?]_01", engine=self.engine) as tree: + paths = {node.path for node in tree.subtree} + assert "/group_?_01" in paths + assert "/group_*_01" not in paths + @requires_dask def test_open_groups_chunks(self, tmpdir, zarr_format) -> None: """Test `open_groups` with chunks on a zarr store.""" @@ -1311,6 +1384,30 @@ def test_filter_group_paths_bracket(self) -> None: result = _filter_group_paths(paths, "*/sweep_[01]") assert result == ["/", "/A", "/A/sweep_0", "/A/sweep_1"] + def test_filter_group_paths_literal_metachar_via_char_class(self) -> None: + from xarray.backends.common import _filter_group_paths + + # Groups whose names literally contain glob metacharacters are + # reachable via character-class escaping (inherited from + # fnmatch / PurePath.match semantics). + paths = ["/", "/group_*_01", "/group_*_02", "/group_?_01", "/plain_01"] + + # "[*]" matches a literal "*" + assert _filter_group_paths(paths, "group_[*]_01") == [ + "/", + "/group_*_01", + ] + assert _filter_group_paths(paths, "group_[*]_*") == [ + "/", + "/group_*_01", + "/group_*_02", + ] + # "[?]" matches a literal "?" + assert _filter_group_paths(paths, "group_[?]_01") == [ + "/", + "/group_?_01", + ] + def test_resolve_group_and_filter_none(self) -> None: from xarray.backends.common import _resolve_group_and_filter From 03406964e288f82eb0003d49866804958ebaa711 Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 08:52:54 -0500 Subject: [PATCH 10/14] Document glob-metachar escaping in open_datatree/open_groups docstrings Explain that matching follows ``fnmatch`` / :py:meth:`pathlib.PurePath.match` semantics and that literal ``*`` / ``?`` / ``[`` in group names can be targeted via character-class escapes (``[*]``, ``[?]``, ``[[]``), with a short example. Applied to both :py:func:`open_datatree` and :py:func:`open_groups` for consistency. --- xarray/backends/api.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a1e8773a6f7..ad3d9953755 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1026,7 +1026,13 @@ def open_datatree( (``*``, ``?``, ``[``), it is interpreted as a pattern and only groups whose paths match are loaded (along with their ancestors). For example, ``group="*/sweep_0"`` loads every ``sweep_0`` one - level deep while skipping sibling groups. + level deep while skipping sibling groups. Matching follows + ``fnmatch`` / :py:meth:`pathlib.PurePath.match` semantics, so + group names that contain literal glob metacharacters can be + targeted with character-class escapes: ``[*]`` matches a + literal ``*``, ``[?]`` a literal ``?``, and ``[[]`` a literal + ``[``. For example, ``group="group_[*]_01"`` matches a group + literally named ``group_*_01``. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the @@ -1274,7 +1280,13 @@ def open_groups( (``*``, ``?``, ``[``), it is interpreted as a pattern and only groups whose paths match are loaded (along with their ancestors). For example, ``group="*/sweep_0"`` loads every ``sweep_0`` one - level deep while skipping sibling groups. + level deep while skipping sibling groups. Matching follows + ``fnmatch`` / :py:meth:`pathlib.PurePath.match` semantics, so + group names that contain literal glob metacharacters can be + targeted with character-class escapes: ``[*]`` matches a + literal ``*``, ``[?]`` a literal ``?``, and ``[[]`` a literal + ``[``. For example, ``group="group_[*]_01"`` matches a group + literally named ``group_*_01``. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the From e2cf518b39a171347674d8e7efbe5599e313452c Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 08:53:03 -0500 Subject: [PATCH 11/14] Align zarr escape test with NetCDF parity (add plain_01 case) Add ``/plain_01`` to the zarr ``test_open_datatree_glob_char_class_escape_literal_metachar`` fixture so it matches the NetCDF version and confirms plain (no-metachar) group names are excluded when the pattern targets literal-metachar names. --- xarray/tests/test_backends_datatree.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 5fa86b60a6e..3b14e29d885 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1205,6 +1205,7 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( "/group_*_01": xr.Dataset({"data": ("x", [1, 2])}), "/group_*_02": xr.Dataset({"data": ("x", [3, 4])}), "/group_?_01": xr.Dataset({"data": ("x", [5, 6])}), + "/plain_01": xr.Dataset({"data": ("x", [7, 8])}), } ) filepath = str(tmpdir / "glob_escape.zarr") @@ -1221,6 +1222,7 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( assert "/group_*_01" in paths assert "/group_*_02" in paths assert "/group_?_01" not in paths + assert "/plain_01" not in paths with open_datatree(filepath, group="group_[?]_01", engine=self.engine) as tree: paths = {node.path for node in tree.subtree} From 612842d05747703d950179d8ab41f1df418f0901 Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 09:24:20 -0500 Subject: [PATCH 12/14] Skip zarr escape test on Windows (filesystem rejects * and ? in names) Windows forbids ``*`` and ``?`` in filesystem directory/file names, and zarr stores each group as an on-disk directory. That makes writing the fixture impossible before the test can exercise the filter. NetCDF4/H5 store groups inside the HDF5 container so they are unaffected. Skip the zarr variant on Windows with a clear reason; the NetCDF variants still cover the escape behavior on all platforms. --- xarray/tests/test_backends_datatree.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 3b14e29d885..96b134c77a7 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1193,6 +1193,11 @@ def test_open_groups_group_glob(self, tmpdir, zarr_format) -> None: for ds in groups.values(): ds.close() + @pytest.mark.skipif( + ON_WINDOWS, + reason="Windows filesystem rejects '*' and '?' in directory names, " + "which zarr uses for group storage.", + ) def test_open_datatree_glob_char_class_escape_literal_metachar( self, tmpdir, zarr_format ) -> None: From 7c7e9c0d0782dba0a4476fffa5d8c0dfa1f3d9ce Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 12:19:54 -0500 Subject: [PATCH 13/14] Use MemoryStore for zarr escape test instead of skipping on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit skipped the zarr variant on Windows because the filesystem rejects ``*`` and ``?`` in directory names. Using ``zarr.storage.MemoryStore`` side-steps the filesystem entirely, so the test now runs on every platform and still exercises the escape logic. This is also a more realistic target for the feature on Windows — users who hit group names with glob metacharacters are likely reading from cloud/icechunk stores (dict-keyed like ``MemoryStore``), not an on-disk zarr directory tree. --- xarray/tests/test_backends_datatree.py | 38 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 96b134c77a7..611453dfe5f 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1193,17 +1193,12 @@ def test_open_groups_group_glob(self, tmpdir, zarr_format) -> None: for ds in groups.values(): ds.close() - @pytest.mark.skipif( - ON_WINDOWS, - reason="Windows filesystem rejects '*' and '?' in directory names, " - "which zarr uses for group storage.", - ) def test_open_datatree_glob_char_class_escape_literal_metachar( - self, tmpdir, zarr_format + self, zarr_format ) -> None: - # Zarr variant of the NetCDF escape test: groups whose names - # contain literal glob metacharacters are reachable via - # character-class escaping. + # In-memory store: Windows disallows "*" and "?" in directory names. + from zarr.storage import MemoryStore + original_dt = DataTree.from_dict( { "/": xr.Dataset({"root_var": 1}), @@ -1213,23 +1208,38 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( "/plain_01": xr.Dataset({"data": ("x", [7, 8])}), } ) - filepath = str(tmpdir / "glob_escape.zarr") - original_dt.to_zarr(filepath, zarr_format=zarr_format) + store = MemoryStore() + original_dt.to_zarr(store, zarr_format=zarr_format) - with open_datatree(filepath, group="group_[*]_01", engine=self.engine) as tree: + with open_datatree( + store, + group="group_[*]_01", + engine=self.engine, + zarr_format=zarr_format, + ) as tree: paths = {node.path for node in tree.subtree} assert "/group_*_01" in paths assert "/group_*_02" not in paths assert "/group_?_01" not in paths - with open_datatree(filepath, group="group_[*]_*", engine=self.engine) as tree: + with open_datatree( + store, + group="group_[*]_*", + engine=self.engine, + zarr_format=zarr_format, + ) as tree: paths = {node.path for node in tree.subtree} assert "/group_*_01" in paths assert "/group_*_02" in paths assert "/group_?_01" not in paths assert "/plain_01" not in paths - with open_datatree(filepath, group="group_[?]_01", engine=self.engine) as tree: + with open_datatree( + store, + group="group_[?]_01", + engine=self.engine, + zarr_format=zarr_format, + ) as tree: paths = {node.path for node in tree.subtree} assert "/group_?_01" in paths assert "/group_*_01" not in paths From a1dd28f041c06816270ab595c4a3850ff309352c Mon Sep 17 00:00:00 2001 From: aladinor Date: Wed, 22 Apr 2026 12:37:19 -0500 Subject: [PATCH 14/14] Add type: ignore for MemoryStore in open_datatree calls ``open_datatree``'s static signature doesn't list zarr store objects (``MemoryStore`` etc.) among its accepted first-argument types, but the zarr backend handles them correctly at runtime. Apply a narrow ``# type: ignore[arg-type]`` on the three test calls rather than widening the public signature. --- xarray/tests/test_backends_datatree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 611453dfe5f..eee09c6c919 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1212,7 +1212,7 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( original_dt.to_zarr(store, zarr_format=zarr_format) with open_datatree( - store, + store, # type: ignore[arg-type] group="group_[*]_01", engine=self.engine, zarr_format=zarr_format, @@ -1223,7 +1223,7 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( assert "/group_?_01" not in paths with open_datatree( - store, + store, # type: ignore[arg-type] group="group_[*]_*", engine=self.engine, zarr_format=zarr_format, @@ -1235,7 +1235,7 @@ def test_open_datatree_glob_char_class_escape_literal_metachar( assert "/plain_01" not in paths with open_datatree( - store, + store, # type: ignore[arg-type] group="group_[?]_01", engine=self.engine, zarr_format=zarr_format,