Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ Bug Fixes
- Fix error handling issue in ``decode_cf_variables`` when decoding fails - the exception is now re-raised
correctly, with a note added about the variable name that caused the error (:issue:`10873`, :pull:`10886`).
By `Jonas L. Bertelsen <https://github.com/jonaslb>`_
- When assigning an indexed coordinate to a data variable or coordinate, coerce it from
``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`)
By `Julia Signell <https://github.com/jsignell>`_

Performance
~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@ def create_coords_with_default_indexes(
variables.update(idx_vars)
all_variables.update(idx_vars)
else:
variables[name] = variable
variables[name] = variable.to_base_variable()

new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes)

Expand Down
13 changes: 11 additions & 2 deletions xarray/structure/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
emit_user_level_warning,
equivalent,
)
from xarray.core.variable import Variable, as_variable, calculate_dimensions
from xarray.core.variable import (
IndexVariable,
Variable,
as_variable,
calculate_dimensions,
)
from xarray.structure.alignment import deep_align
from xarray.util.deprecation_helpers import (
_COMPAT_DEFAULT,
Expand Down Expand Up @@ -1206,7 +1211,11 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe
if c not in value.dims and c in dataset.coords
]
if coord_names:
other[key] = value.drop_vars(coord_names)
value = value.drop_vars(coord_names)
if isinstance(value.variable, IndexVariable):
variable = value.variable.to_base_variable()
value = value._replace(variable=variable)
other[key] = value

return merge_core(
[dataset, other],
Expand Down
45 changes: 25 additions & 20 deletions xarray/testing/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import warnings
from collections.abc import Hashable
from typing import Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -401,11 +402,25 @@ def _assert_indexes_invariants_checks(
)


def _assert_variable_invariants(var: Variable, name: Hashable = None):
def _assert_variable_invariants(
var: Variable | Any,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check now happens within this function.

name: Hashable = None,
check_default_indexes: bool = True,
is_index: bool = False,
) -> None:
if name is None:
name_or_empty: tuple = ()
else:
name_or_empty = (name,)

assert isinstance(var, Variable), {name: type(var)}
if name:
if var.dims == name_or_empty:
if check_default_indexes:
assert isinstance(var, IndexVariable), {name: type(var)}
elif not is_index:
assert not isinstance(var, IndexVariable), {name: type(var)}

assert isinstance(var._dims, tuple), name_or_empty + (var._dims,)
assert len(var._dims) == len(var._data.shape), name_or_empty + (
var._dims,
Expand All @@ -418,25 +433,20 @@ def _assert_variable_invariants(var: Variable, name: Hashable = None):


def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool):
assert isinstance(da._variable, Variable), da._variable
_assert_variable_invariants(da._variable)
_assert_variable_invariants(da._variable, name=da.name, check_default_indexes=False)

assert isinstance(da._coords, dict), da._coords
assert all(isinstance(v, Variable) for v in da._coords.values()), da._coords

if check_default_indexes:
assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), (
da.dims,
{k: v.dims for k, v in da._coords.items()},
)
assert all(
isinstance(v, IndexVariable)
for (k, v) in da._coords.items()
if v.dims == (k,)
), {k: type(v) for k, v in da._coords.items()}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now handled by _assert_variable_invariants


for k, v in da._coords.items():
_assert_variable_invariants(v, k)
_assert_variable_invariants(
v, k, check_default_indexes=check_default_indexes, is_index=k in da._indexes
)

if da._indexes is not None:
_assert_indexes_invariants_checks(
Expand All @@ -446,9 +456,11 @@ def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool):

def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
assert isinstance(ds._variables, dict), type(ds._variables)
assert all(isinstance(v, Variable) for v in ds._variables.values()), ds._variables

for k, v in ds._variables.items():
_assert_variable_invariants(v, k)
_assert_variable_invariants(
v, k, check_default_indexes=check_default_indexes, is_index=k in ds._indexes
)

assert isinstance(ds._coord_names, set), ds._coord_names
assert ds._coord_names <= ds._variables.keys(), (
Expand All @@ -466,13 +478,6 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes
), (ds._dims, {k: v.sizes for k, v in ds._variables.items()})

if check_default_indexes:
assert all(
isinstance(v, IndexVariable)
for (k, v) in ds._variables.items()
if v.dims == (k,)
), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now handled by _assert_variable_invariants

if ds._indexes is not None:
_assert_indexes_invariants_checks(
ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes
Expand All @@ -492,7 +497,7 @@ def _assert_internal_invariants(
private APIs.
"""
if isinstance(xarray_obj, Variable):
_assert_variable_invariants(xarray_obj)
_assert_variable_invariants(xarray_obj, check_default_indexes=False)
elif isinstance(xarray_obj, DataArray):
_assert_dataarray_invariants(
xarray_obj, check_default_indexes=check_default_indexes
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,11 @@ def should_add_coord_to_array(self, name, var, dims):
assert_identical(actual, expected, check_default_indexes=False)
assert "x_bnds" not in actual.dims

def test_assign_coords_uses_base_variable_class(self) -> None:
a = DataArray([0, 1, 2], dims=["x"], coords={"x": [0, 1, 2]})
a = a.assign_coords(foo=a.x)
_assert_internal_invariants(a, check_default_indexes=True)

def test_coords_alignment(self) -> None:
lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])])
rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])])
Expand Down
11 changes: 9 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4311,9 +4311,11 @@ def test_to_stacked_array_preserves_dtype(self) -> None:
# coordinate created from variables names should be of string dtype
data = np.array(["a", "a", "a", "b"], dtype="<U1")
expected_stacked_variable = DataArray(name="variable", data=data, dims="z")

# coerce from `IndexVariable` to `Variable` before comparing
assert_identical(
stacked.coords["variable"].drop_vars(["z", "variable", "y"]),
expected_stacked_variable,
stacked["variable"].variable.to_base_variable(),
expected_stacked_variable.variable,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the only test change that was needed. Basically stacked["variable"].variable is an IndexVariable because it is represents a coord that is part of a MultiIndex. But you can't know that if you only have access to the DataArray with no coords. So I just switched it to be comparing variable and to coerce to the Variable.

)

def test_to_stacked_array_transposed(self) -> None:
Expand Down Expand Up @@ -4779,6 +4781,11 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None:
with pytest.raises(ValueError, match=error_regex):
actual[var_list] = data

def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None:
ds = Dataset(coords={"x": [1, 2, 3]})
ds["y"] = ds["x"]
_assert_internal_invariants(ds, check_default_indexes=True)

def test_assign(self) -> None:
ds = Dataset()
actual = ds.assign(x=[0, 1, 2], y=2)
Expand Down
Loading