From 62e2f31109bf62e6c33eceb98fcde11a9a68336e Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 11 Nov 2025 13:26:02 -0500 Subject: [PATCH 1/5] Ensure base Variable class is used when assigning --- xarray/core/coordinates.py | 2 +- xarray/structure/merge.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index ab5bf7408f3..9aa64a57ff2 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1265,7 +1265,7 @@ def create_coords_with_default_indexes( variables.update(idx_vars) all_variables.update(idx_vars) else: - variables[name] = variable + variables[name] = variable.to_base_variable() new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index e5f3c0959bd..d398a37c8ef 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -22,7 +22,12 @@ emit_user_level_warning, equivalent, ) -from xarray.core.variable import Variable, as_variable, calculate_dimensions +from xarray.core.variable import ( + IndexVariable, + Variable, + as_variable, + calculate_dimensions, +) from xarray.structure.alignment import deep_align from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, @@ -1206,7 +1211,11 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe if c not in value.dims and c in dataset.coords ] if coord_names: - other[key] = value.drop_vars(coord_names) + value = value.drop_vars(coord_names) + if isinstance(value.variable, IndexVariable): + variable = value.variable.to_base_variable() + value = value._replace(variable=variable) + other[key] = value return merge_core( [dataset, other], From 93281b737bf855f6e23718d847609409cd75cb65 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 11 Nov 2025 13:28:01 -0500 Subject: [PATCH 2/5] Add tests --- xarray/tests/test_dataarray.py | 5 +++++ xarray/tests/test_dataset.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5eec7b8a2fd..bc4f33fd8e1 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1702,6 +1702,11 @@ def should_add_coord_to_array(self, name, var, dims): assert_identical(actual, expected, check_default_indexes=False) assert "x_bnds" not in actual.dims + def test_assign_coords_uses_base_variable_class(self) -> None: + a = DataArray([0, 1, 2], dims=["x"], coords={"x": [0, 1, 2]}) + a = a.assign_coords(foo=a.x) + assert not isinstance(a["foo"].variable, IndexVariable) + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e677430dfbf..d823953297b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4779,6 +4779,11 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None: with pytest.raises(ValueError, match=error_regex): actual[var_list] = data + def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None: + ds = Dataset(coords={"x": [1, 2, 3]}) + ds["y"] = ds["x"] + assert not isinstance(ds["y"].variable, IndexVariable) + def test_assign(self) -> None: ds = Dataset() actual = ds.assign(x=[0, 1, 2], y=2) From 3370345457b799e554effbe8c34b88d757add610 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 11 Nov 2025 13:34:10 -0500 Subject: [PATCH 3/5] Update what's new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6dcbdd1062b..e81535a101b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,9 @@ Bug Fixes - Fix error handling issue in ``decode_cf_variables`` when decoding fails - the exception is now re-raised correctly, with a note added about the variable name that caused the error (:issue:`10873`, :pull:`10886`). By `Jonas L. Bertelsen `_ +- When assigning an indexed coordinate to a data variable or coordinate, coerce it from + ``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`) + By `Julia Signell `_ Performance ~~~~~~~~~~~ From 7062b3b9e48726ff3707b8cf71097d0f7b9b47fd Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 12 Nov 2025 16:54:27 -0500 Subject: [PATCH 4/5] Improve internal_invariants assertions --- xarray/testing/assertions.py | 45 +++++++++++++++++++--------------- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 8 +++--- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 39e6da6a83c..25690d3f425 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -3,6 +3,7 @@ import functools import warnings from collections.abc import Hashable +from typing import Any import numpy as np import pandas as pd @@ -401,11 +402,25 @@ def _assert_indexes_invariants_checks( ) -def _assert_variable_invariants(var: Variable, name: Hashable = None): +def _assert_variable_invariants( + var: Variable | Any, + name: Hashable = None, + check_default_indexes: bool = True, + is_index: bool = False, +): if name is None: name_or_empty: tuple = () else: name_or_empty = (name,) + + assert isinstance(var, Variable), {name: type(var)} + if name: + if var.dims == name_or_empty: + if check_default_indexes: + assert isinstance(var, IndexVariable), {name: type(var)} + elif not is_index: + assert not isinstance(var, IndexVariable), {name: type(var)} + assert isinstance(var._dims, tuple), name_or_empty + (var._dims,) assert len(var._dims) == len(var._data.shape), name_or_empty + ( var._dims, @@ -418,25 +433,20 @@ def _assert_variable_invariants(var: Variable, name: Hashable = None): def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool): - assert isinstance(da._variable, Variable), da._variable - _assert_variable_invariants(da._variable) + _assert_variable_invariants(da._variable, name=da.name, check_default_indexes=False) assert isinstance(da._coords, dict), da._coords - assert all(isinstance(v, Variable) for v in da._coords.values()), da._coords if check_default_indexes: assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), ( da.dims, {k: v.dims for k, v in da._coords.items()}, ) - assert all( - isinstance(v, IndexVariable) - for (k, v) in da._coords.items() - if v.dims == (k,) - ), {k: type(v) for k, v in da._coords.items()} for k, v in da._coords.items(): - _assert_variable_invariants(v, k) + _assert_variable_invariants( + v, k, check_default_indexes=check_default_indexes, is_index=k in da._indexes + ) if da._indexes is not None: _assert_indexes_invariants_checks( @@ -446,9 +456,11 @@ def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool): def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): assert isinstance(ds._variables, dict), type(ds._variables) - assert all(isinstance(v, Variable) for v in ds._variables.values()), ds._variables + for k, v in ds._variables.items(): - _assert_variable_invariants(v, k) + _assert_variable_invariants( + v, k, check_default_indexes=check_default_indexes, is_index=k in ds._indexes + ) assert isinstance(ds._coord_names, set), ds._coord_names assert ds._coord_names <= ds._variables.keys(), ( @@ -466,13 +478,6 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes ), (ds._dims, {k: v.sizes for k, v in ds._variables.items()}) - if check_default_indexes: - assert all( - isinstance(v, IndexVariable) - for (k, v) in ds._variables.items() - if v.dims == (k,) - ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} - if ds._indexes is not None: _assert_indexes_invariants_checks( ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes @@ -492,7 +497,7 @@ def _assert_internal_invariants( private APIs. """ if isinstance(xarray_obj, Variable): - _assert_variable_invariants(xarray_obj) + _assert_variable_invariants(xarray_obj, check_default_indexes=False) elif isinstance(xarray_obj, DataArray): _assert_dataarray_invariants( xarray_obj, check_default_indexes=check_default_indexes diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index bc4f33fd8e1..65668ae873c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1705,7 +1705,7 @@ def should_add_coord_to_array(self, name, var, dims): def test_assign_coords_uses_base_variable_class(self) -> None: a = DataArray([0, 1, 2], dims=["x"], coords={"x": [0, 1, 2]}) a = a.assign_coords(foo=a.x) - assert not isinstance(a["foo"].variable, IndexVariable) + _assert_internal_invariants(a, check_default_indexes=True) def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d823953297b..a9cfc6959d2 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4311,9 +4311,11 @@ def test_to_stacked_array_preserves_dtype(self) -> None: # coordinate created from variables names should be of string dtype data = np.array(["a", "a", "a", "b"], dtype=" None: @@ -4782,7 +4784,7 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None: def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None: ds = Dataset(coords={"x": [1, 2, 3]}) ds["y"] = ds["x"] - assert not isinstance(ds["y"].variable, IndexVariable) + _assert_internal_invariants(ds, check_default_indexes=True) def test_assign(self) -> None: ds = Dataset() From 6132204364aaf8d814114fe20d61a6c3c05be76c Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 13 Nov 2025 14:50:53 -0500 Subject: [PATCH 5/5] Add typing Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/testing/assertions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 25690d3f425..bfa159f5371 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -407,7 +407,7 @@ def _assert_variable_invariants( name: Hashable = None, check_default_indexes: bool = True, is_index: bool = False, -): +) -> None: if name is None: name_or_empty: tuple = () else: