From f7e4b209e463353c8292b3e33259b661efee04a4 Mon Sep 17 00:00:00 2001 From: Ed Schofield Date: Fri, 26 Sep 2025 15:56:06 +0200 Subject: [PATCH 01/10] Generalize data input to support Narwhals-compatible Series or DataFrames --- pymc/data.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index cfade37910..c9d8dae3eb 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -20,12 +20,14 @@ from copy import copy from typing import Union, cast +import narwhals as nw import numpy as np import pandas as pd import pytensor import pytensor.tensor as pt import xarray as xr +from narwhals.typing import IntoFrameT, IntoSeriesT from pytensor.compile.builders import OpFromGraph from pytensor.compile.sharedvalue import SharedVariable from pytensor.graph.basic import Variable @@ -185,7 +187,7 @@ def determine_coords( if hasattr(value, "columns"): if dims is not None: dim_name = dims[1] - if dim_name is None and value.columns.name is not None: + if dim_name is None and nw.dependencies.is_pandas_dataframe(value) and value.columns.name is not None: dim_name = value.columns.name if dim_name is not None: coords[dim_name] = value.columns @@ -197,12 +199,12 @@ def determine_coords( # str is applied because dim entries may be None coords[str(dim_name)] = cast(xr.DataArray, value[dim]).to_numpy() - if isinstance(value, np.ndarray) and dims is not None: - if len(dims) != value.ndim: + elif (isinstance(value, np.ndarray) or nw.dependencies.is_polars_series(value)) and dims is not None: + if len(dims) != len(value.shape): # Polars objects have no .ndim ... raise ShapeError( "Invalid data shape. The rank of the dataset must match the length of `dims`.", actual=value.shape, - expected=value.ndim, + expected=len(value.shape), ) for size, dim in zip(value.shape, dims): coord = model.coords.get(dim, None) @@ -219,7 +221,7 @@ def determine_coords( def Data( name: str, - value, + value: IntoFrameT | IntoSeriesT | xr.DataArray | np.ndarray, *, dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, @@ -248,7 +250,7 @@ def Data( ---------- name : str The name for this variable. - value : array_like or pandas.Series, pandas.Dataframe + value : array_like or Narwhals-compatible Series or DataFrame A value to associate with this variable. dims : str, tuple of str or tuple of None, optional Dimension names of the random variables (as opposed to the shapes of these From 0bcb631e6bf88fe0753ac0ad006dc65b10f5e1ab Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 18:31:37 -0600 Subject: [PATCH 02/10] Narwhals compatibility layer in `pm.Data` --- .../environment-alternative-backends.yml | 1 + conda-envs/environment-dev.yml | 1 + conda-envs/environment-docs.yml | 1 + conda-envs/environment-test.yml | 1 + conda-envs/windows-environment-dev.yml | 1 + conda-envs/windows-environment-test.yml | 1 + pymc/data.py | 195 +++++++++++++----- pymc/pytensorf.py | 32 ++- requirements-dev.txt | 1 + requirements.txt | 1 + tests/test_data.py | 36 ++++ tests/test_pytensorf.py | 22 +- 12 files changed, 236 insertions(+), 57 deletions(-) diff --git a/conda-envs/environment-alternative-backends.yml b/conda-envs/environment-alternative-backends.yml index d7cb0fe4fc..6ccd5983da 100644 --- a/conda-envs/environment-alternative-backends.yml +++ b/conda-envs/environment-alternative-backends.yml @@ -18,6 +18,7 @@ dependencies: - jaxlib>=0.4.28 - libblas=*=*mkl - mkl-service +- narwhals>=2.11.0 - numpy>=1.25.0 - numpyro>=0.8.0 - pandas>=0.24.0 diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index 231dfa05cf..e2b0c8d1db 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -9,6 +9,7 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle +- narwhals>=2.11.0 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/environment-docs.yml b/conda-envs/environment-docs.yml index f85f8fc55b..324505de1d 100644 --- a/conda-envs/environment-docs.yml +++ b/conda-envs/environment-docs.yml @@ -8,6 +8,7 @@ dependencies: - arviz>=0.13.0 - cachetools>=4.2.1 - cloudpickle +- narwhals>=2.11.0 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index b6fd3f36e0..0c138bb15b 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -10,6 +10,7 @@ dependencies: - cachetools>=4.2.1 - cloudpickle - jax +- narwhals>=2.11.0 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 0c2ae00ce2..7e37d78054 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -9,6 +9,7 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle +- narwhals>=2.11.0 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index ee711e3a23..a84ed0995f 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -11,6 +11,7 @@ dependencies: - cloudpickle - libpython - mkl-service>=2.3.0 +- narwhals>=2.11.0 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/pymc/data.py b/pymc/data.py index c9d8dae3eb..bf497903f1 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -11,25 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import importlib import io import typing import urllib.request from collections.abc import Sequence from copy import copy +from functools import singledispatch from typing import Union, cast import narwhals as nw import numpy as np -import pandas as pd import pytensor import pytensor.tensor as pt import xarray as xr from narwhals.typing import IntoFrameT, IntoSeriesT +from pytensor.compile import SharedVariable from pytensor.compile.builders import OpFromGraph -from pytensor.compile.sharedvalue import SharedVariable from pytensor.graph.basic import Variable from pytensor.raise_op import Assert from pytensor.tensor.random.basic import IntegersRV @@ -163,60 +163,159 @@ def Minibatch(variable: TensorVariable, *variables: TensorVariable, batch_size: return mb_tensors if len(variables) else mb_tensors[0] +def _handle_none_dims(dims: Sequence[str] | None, ndim: int) -> Sequence[str] | Sequence[None]: + if dims is None: + return [None] * ndim + else: + return dims + + +@singledispatch def determine_coords( - model, - value: pd.DataFrame | pd.Series | xr.DataArray, + value, + model: "Model", + dims: Sequence[str | None] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, +): + """Determine coordinate values from data or the model (via ``dims``).""" + raise NotImplementedError( + f"Cannot determine coordinates for data of type {type(value)}, please provide `coords` explicitly or " + f"convert the data to a supported type" + ) + + +@determine_coords.register(np.ndarray) +def determine_array_coords( + value: np.ndarray, + model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, +): + if coords is None: + coords = {} + + if len(dims) != value.ndim: + raise ShapeError( + "Invalid data shape. The rank of the dataset must match the length of `dims`.", + actual=value.shape, + expected=len(value.shape), + ) + + for size, dim in zip(value.shape, dims): + coord = model.coords.get(dim, None) + if coord is None and dim is not None: + coords[dim] = range(size) + + return coords, _handle_none_dims(dims, value.ndim) + + +@determine_coords.register(xr.DataArray) +def determine_xarray_coords( + value: xr.DataArray, + model: "Model", + dims: Sequence[str | None] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: - """Determine coordinate values from data or the model (via ``dims``).""" if coords is None: coords = {} - dim_name = None - # If value is a df or a series, we interpret the index as coords: - if hasattr(value, "index"): - if dims is not None: - dim_name = dims[0] - if dim_name is None and value.index.name is not None: - dim_name = value.index.name - if dim_name is not None: - coords[dim_name] = value.index - - # If value is a df, we also interpret the columns as coords: - if hasattr(value, "columns"): - if dims is not None: - dim_name = dims[1] - if dim_name is None and nw.dependencies.is_pandas_dataframe(value) and value.columns.name is not None: - dim_name = value.columns.name - if dim_name is not None: - coords[dim_name] = value.columns - - if isinstance(value, xr.DataArray): - if dims is not None: - for dim in dims: - dim_name = dim - # str is applied because dim entries may be None - coords[str(dim_name)] = cast(xr.DataArray, value[dim]).to_numpy() - - elif (isinstance(value, np.ndarray) or nw.dependencies.is_polars_series(value)) and dims is not None: - if len(dims) != len(value.shape): # Polars objects have no .ndim ... - raise ShapeError( - "Invalid data shape. The rank of the dataset must match the length of `dims`.", - actual=value.shape, - expected=len(value.shape), - ) - for size, dim in zip(value.shape, dims): - coord = model.coords.get(dim, None) - if coord is None and dim is not None: - coords[dim] = range(size) + if dims is None: + return coords, _handle_none_dims(dims, value.ndim) + + for dim in dims: + dim_name = dim + # str is applied because dim entries may be None + coords[str(dim_name)] = cast(xr.DataArray, value[dim]).to_numpy() + + return coords, _handle_none_dims(dims, value.ndim) + + +def _dataframe_agnostic_coords( + value: IntoFrameT, + model: "Model", + ndim_in: int = 2, + dims: Sequence[str | None] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + if coords is None: + coords = {} + + value = nw.from_native(value, allow_series=False) + if isinstance(value, nw.LazyFrame): + value = value.collect() + + index = nw.maybe_get_index(value) + if index is not None: + value = value.with_columns(**{index.name: index.to_numpy()}) if dims is None: - # TODO: Also determine dim names from the index - new_dims: Sequence[str] | Sequence[None] = [None] * np.ndim(value) + return coords, _handle_none_dims(dims, ndim_in) + + if len(dims) != ndim_in: + raise ShapeError( + "Invalid data shape. The rank of the dataset must match the length of `dims`.", + actual=value.shape, + expected=len(dims), + ) + + index_dim = dims[0] + if index_dim is not None and index_dim in value.columns: + coords[index_dim] = value.select(nw.col(index_dim)).to_numpy() + elif index_dim in model.coords: + coords[index_dim] = model.coords[index_dim] else: - new_dims = dims - return coords, new_dims + raise ValueError( + f"Dimension '{index_dim}' not found in DataFrame columns or model coordinates. Cannot infer " + "index coordinates." + ) + + if len(dims) > 1: + column_dim = dims[1] + if column_dim is not None: + coords[column_dim] = value.select(nw.exclude(index_dim)).columns + + return coords, _handle_none_dims(dims, ndim_in) + + +def _series_agnostic_coords( + value: IntoSeriesT, + model: "Model", + dims: Sequence[str | None] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + value = nw.from_native(value, series_only=True).to_frame() + return _dataframe_agnostic_coords(value, ndim_in=1, model=model, dims=dims, coords=coords) + + +def _register_dataframe_backend(library_name: str): + try: + library = importlib.import_module(library_name) + + @determine_coords.register(library.Series) + def determine_series_coords( + value: library.DataFrame | library.Series, + model: "Model", + dims: Sequence[str] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, + ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + return _series_agnostic_coords(value, model=model, dims=dims, coords=coords) + + @determine_coords.register(library.DataFrame) + def determine_dataframe_coords( + value: library.DataFrame | library.Series, + model: "Model", + dims: Sequence[str] | None = None, + coords: dict[str, Sequence | np.ndarray] | None = None, + ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + return _dataframe_agnostic_coords(value, model=model, dims=dims, coords=coords) + + except ImportError: + pass + + +_register_dataframe_backend("pandas") +_register_dataframe_backend("polars") +_register_dataframe_backend("dask.dataframe") def Data( @@ -337,7 +436,7 @@ def Data( new_dims: Sequence[str] | Sequence[None] | None if infer_dims_and_coords: - coords, new_dims = determine_coords(model, value, dims) + coords, new_dims = determine_coords(value, model, dims) else: new_dims = dims diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index d7e097f6dc..7b1be7f619 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -11,13 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import importlib import warnings from collections.abc import Iterable, Sequence from typing import cast +import narwhals as nw import numpy as np -import pandas as pd import pytensor import pytensor.tensor as pt import scipy.sparse as sps @@ -128,11 +129,32 @@ def convert_data(data) -> np.ndarray | Variable: return smarttypeX(ret) -@_as_tensor_variable.register(pd.Series) -@_as_tensor_variable.register(pd.DataFrame) -def dataframe_to_tensor_variable(df: pd.DataFrame, *args, **kwargs) -> TensorVariable: - return pt.as_tensor_variable(df.to_numpy(), *args, **kwargs) +# Optional registrations for DataFrame packages +def _register_dataframe_backend(library_name: str): + try: + library = importlib.import_module(library_name) + + @_as_tensor_variable.register(library.Series) + def series_to_tensor_variable(s: library.Series, *args, **kwargs) -> TensorVariable: + s = nw.from_native(s, allow_series=False) + if isinstance(s, nw.LazyFrame): + s = s.collect() + return pt.as_tensor_variable(s.to_numpy(), *args, **kwargs) + + @_as_tensor_variable.register(library.DataFrame) + def dataframe_to_tensor_variable(df: library.DataFrame, *args, **kwargs) -> TensorVariable: + df = nw.from_native(df, allow_series=False) + if isinstance(df, nw.LazyFrame): + df = df.collect() + return pt.as_tensor_variable(df.to_numpy(), *args, **kwargs) + + except ImportError: + pass + +_register_dataframe_backend("pandas") +_register_dataframe_backend("polars") +_register_dataframe_backend("dask.dataframe") _cheap_eval_mode = Mode(linker="py", optimizer="minimum_compile") diff --git a/requirements-dev.txt b/requirements-dev.txt index 22bcdaf9ea..ad721036e1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,6 +9,7 @@ jupyter-sphinx mcbackend>=0.4.0 mypy==1.15.0 myst-nb<=1.0.0 +narwhals>=2.11.0 numdifftools>=0.9.40 numpy>=1.25.0 numpydoc diff --git a/requirements.txt b/requirements.txt index 8401b78a15..7aeb3d945f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ arviz>=0.13.0 cachetools>=4.2.1 cloudpickle +narwhals>=2.11.0 numpy>=1.25.0 pandas>=0.24.0 pytensor>=2.35.0,<2.36 diff --git a/tests/test_data.py b/tests/test_data.py index afca1831a7..aa087a489c 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -401,6 +401,42 @@ def test_implicit_coords_dataframe(self, seeded_test): assert "columns" in pmodel.coords assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")} + def test_implict_coords_polars_series(self): + pl = pytest.importorskip("polars") + + ser_sales = pl.Series( + "sales", + np.random.randint(low=0, high=30, size=22), + ) + + with pm.Model(coords={"date": range(22)}) as pmodel: + pm.Data("sales", ser_sales, dims=["date"], infer_dims_and_coords=True) + + with pytest.raises( + ValueError, + match="Dimension 'date2' not found in DataFrame columns or model coordinates", + ): + pm.Data("sales_invalid", ser_sales, dims=["date2"], infer_dims_and_coords=True) + + assert "date" in pmodel.coords + assert len(pmodel.coords["date"]) == 22 + + def test_implicit_coords_polars_dataframe(self): + pl = pytest.importorskip("polars") + + size = (5, 7) + df_data = pl.DataFrame( + np.random.normal(size=size), + schema={f"Column {c + 1}": pl.Float64 for c in range(size[1])}, + ).with_row_count("rows") + + with pm.Model() as pmodel: + pm.Data("observations", df_data, dims=("rows", "columns"), infer_dims_and_coords=True) + + assert "rows" in pmodel.coords + assert "columns" in pmodel.coords + assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")} + def test_implicit_coords_xarray(self): xr = pytest.importorskip("xarray") data = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y", "x")) diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index d172c61a4d..916a91b3b0 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -58,8 +58,20 @@ np.ones(shape=(10, 1)), ], ) -def test_pd_dataframe_as_tensor_variable(np_array: np.ndarray) -> None: - df = pd.DataFrame(np_array) +@pytest.mark.parametrize("library", ["pandas", "polars", "dask.dataframe"]) +def test_dataframe_as_tensor_variable(np_array: np.ndarray, library) -> None: + lib = pytest.importorskip(library) + col_names = [f"col_{i}" for i in range(np_array.shape[1])] + match library: + case "polars": + df = lib.DataFrame(np_array, schema=dict.fromkeys(col_names, float)) + case "dask.dataframe": + df = lib.DataFrame.from_dict({col: np_array[:, i] for i, col in enumerate(col_names)}) + case "pandas": + df = lib.DataFrame(np_array, columns=col_names) + case _: + raise ValueError(f"Unsupported library: {library}") + np.testing.assert_array_equal(pt.as_tensor_variable(df).eval(), np_array) @@ -67,8 +79,10 @@ def test_pd_dataframe_as_tensor_variable(np_array: np.ndarray) -> None: argnames="np_array", argvalues=[np.array([1.0, 2.0, -1.0]), np.ones(shape=4), np.zeros(shape=10), [1, 2, 3, 4]], ) -def test_pd_series_as_tensor_variable(np_array: np.ndarray) -> None: - df = pd.Series(np_array) +@pytest.mark.parametrize("library", ["pandas", "polars", "dask.dataframe"]) +def test_series_as_tensor_variable(np_array: np.ndarray, library) -> None: + lib = pytest.importorskip(library) + df = lib.Series(np_array) np.testing.assert_array_equal(pt.as_tensor_variable(df).eval(), np_array) From edc1fe79e92f907a742326fe71312b8791cbc9c8 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 18:50:44 -0600 Subject: [PATCH 03/10] fix typehint --- pymc/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index bf497903f1..a802c04676 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -293,7 +293,7 @@ def _register_dataframe_backend(library_name: str): @determine_coords.register(library.Series) def determine_series_coords( - value: library.DataFrame | library.Series, + value: IntoSeriesT, model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, @@ -302,7 +302,7 @@ def determine_series_coords( @determine_coords.register(library.DataFrame) def determine_dataframe_coords( - value: library.DataFrame | library.Series, + value: IntoFrameT, model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, From 86a7b0514210866c82ec41727ec374d9bbdd3299 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 19:28:12 -0600 Subject: [PATCH 04/10] mypy T___T --- pymc/data.py | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index a802c04676..608cba7c60 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -27,7 +27,7 @@ import pytensor.tensor as pt import xarray as xr -from narwhals.typing import IntoFrameT, IntoSeriesT +from narwhals.typing import IntoFrameT, IntoLazyFrameT, IntoSeriesT from pytensor.compile import SharedVariable from pytensor.compile.builders import OpFromGraph from pytensor.graph.basic import Variable @@ -163,7 +163,9 @@ def Minibatch(variable: TensorVariable, *variables: TensorVariable, batch_size: return mb_tensors if len(variables) else mb_tensors[0] -def _handle_none_dims(dims: Sequence[str] | None, ndim: int) -> Sequence[str] | Sequence[None]: +def _handle_none_dims( + dims: Sequence[str | None] | None, ndim: int +) -> Sequence[str | None] | Sequence[None]: if dims is None: return [None] * ndim else: @@ -176,7 +178,7 @@ def determine_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -): +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: """Determine coordinate values from data or the model (via ``dims``).""" raise NotImplementedError( f"Cannot determine coordinates for data of type {type(value)}, please provide `coords` explicitly or " @@ -190,10 +192,13 @@ def determine_array_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -): +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} + if dims is None: + return coords, _handle_none_dims(dims, value.ndim) + if len(dims) != value.ndim: raise ShapeError( "Invalid data shape. The rank of the dataset must match the length of `dims`.", @@ -215,7 +220,7 @@ def determine_xarray_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} @@ -231,16 +236,16 @@ def determine_xarray_coords( def _dataframe_agnostic_coords( - value: IntoFrameT, + value: IntoFrameT | IntoLazyFrameT | nw.DataFrame | nw.LazyFrame, model: "Model", ndim_in: int = 2, dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} - value = nw.from_native(value, allow_series=False) + value = cast(nw.DataFrame | nw.LazyFrame, nw.from_native(value, allow_series=False)) # type: ignore[type-var] if isinstance(value, nw.LazyFrame): value = value.collect() @@ -260,9 +265,9 @@ def _dataframe_agnostic_coords( index_dim = dims[0] if index_dim is not None and index_dim in value.columns: - coords[index_dim] = value.select(nw.col(index_dim)).to_numpy() + coords[index_dim] = tuple(value.select(nw.col(index_dim)).to_numpy()) elif index_dim in model.coords: - coords[index_dim] = model.coords[index_dim] + coords[index_dim] = model.coords[index_dim] # type: ignore[assignment] else: raise ValueError( f"Dimension '{index_dim}' not found in DataFrame columns or model coordinates. Cannot infer " @@ -282,9 +287,15 @@ def _series_agnostic_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: - value = nw.from_native(value, series_only=True).to_frame() - return _dataframe_agnostic_coords(value, ndim_in=1, model=model, dims=dims, coords=coords) +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: + value = cast(nw.Series, nw.from_native(value, series_only=True)) # type: ignore[assignment] + return _dataframe_agnostic_coords( + cast(nw.DataFrame | nw.LazyFrame, value.to_frame()), # type: ignore[attr-defined] + ndim_in=1, + model=model, + dims=dims, + coords=coords, + ) # type: ignore[arg-type] def _register_dataframe_backend(library_name: str): @@ -297,7 +308,7 @@ def determine_series_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, - ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: return _series_agnostic_coords(value, model=model, dims=dims, coords=coords) @determine_coords.register(library.DataFrame) @@ -306,7 +317,7 @@ def determine_dataframe_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, - ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str] | Sequence[None]]: + ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: return _dataframe_agnostic_coords(value, model=model, dims=dims, coords=coords) except ImportError: @@ -366,6 +377,9 @@ def Data( infer_dims_and_coords : bool, default=False If True, the ``Data`` container will try to infer what the coordinates and dimension names should be if there is an index in ``value``. + model : pymc.Model, optional + Model to which to add the data variable. If not specified, the data variable + will be added to the model on the context stack. **kwargs : dict, optional Extra arguments passed to :func:`pytensor.shared`. @@ -434,7 +448,7 @@ def Data( expected=x.ndim, ) - new_dims: Sequence[str] | Sequence[None] | None + new_dims: Sequence[str | None] | Sequence[None] | None if infer_dims_and_coords: coords, new_dims = determine_coords(value, model, dims) else: From d32abc8a48acd1ac22ecc74b58e5d0faa0eb259e Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:39:23 -0600 Subject: [PATCH 05/10] Helpful robot --- pymc/pytensorf.py | 3 ++- tests/test_data.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index 7b1be7f619..713a5bb72f 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -136,7 +136,7 @@ def _register_dataframe_backend(library_name: str): @_as_tensor_variable.register(library.Series) def series_to_tensor_variable(s: library.Series, *args, **kwargs) -> TensorVariable: - s = nw.from_native(s, allow_series=False) + s = nw.from_native(s, allow_series=True) if isinstance(s, nw.LazyFrame): s = s.collect() return pt.as_tensor_variable(s.to_numpy(), *args, **kwargs) @@ -149,6 +149,7 @@ def dataframe_to_tensor_variable(df: library.DataFrame, *args, **kwargs) -> Tens return pt.as_tensor_variable(df.to_numpy(), *args, **kwargs) except ImportError: + # Data backends are optional. Take no action if not installed. pass diff --git a/tests/test_data.py b/tests/test_data.py index aa087a489c..5e71158b6b 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -401,7 +401,7 @@ def test_implicit_coords_dataframe(self, seeded_test): assert "columns" in pmodel.coords assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")} - def test_implict_coords_polars_series(self): + def test_implicit_coords_polars_series(self): pl = pytest.importorskip("polars") ser_sales = pl.Series( From 8a10ba3db88ee3a09ef8d54a3ad71a4e5d848a63 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:45:49 -0600 Subject: [PATCH 06/10] Remove dask series test (no narwhals support) --- tests/test_pytensorf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index 916a91b3b0..9010dd14d0 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -79,7 +79,7 @@ def test_dataframe_as_tensor_variable(np_array: np.ndarray, library) -> None: argnames="np_array", argvalues=[np.array([1.0, 2.0, -1.0]), np.ones(shape=4), np.zeros(shape=10), [1, 2, 3, 4]], ) -@pytest.mark.parametrize("library", ["pandas", "polars", "dask.dataframe"]) +@pytest.mark.parametrize("library", ["pandas", "polars"]) def test_series_as_tensor_variable(np_array: np.ndarray, library) -> None: lib = pytest.importorskip(library) df = lib.Series(np_array) From f65ad779705576e5f19be00a42ee90b541ad4425 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:52:44 -0600 Subject: [PATCH 07/10] Respond to feedback --- pymc/data.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index 608cba7c60..68e6a31cf6 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -264,15 +264,16 @@ def _dataframe_agnostic_coords( ) index_dim = dims[0] - if index_dim is not None and index_dim in value.columns: - coords[index_dim] = tuple(value.select(nw.col(index_dim)).to_numpy()) - elif index_dim in model.coords: - coords[index_dim] = model.coords[index_dim] # type: ignore[assignment] - else: - raise ValueError( - f"Dimension '{index_dim}' not found in DataFrame columns or model coordinates. Cannot infer " - "index coordinates." - ) + if index_dim is not None: + if index_dim in value.columns: + coords[index_dim] = tuple(value.select(nw.col(index_dim)).to_numpy().flatten()) + elif index_dim in model.coords: + coords[index_dim] = model.coords[index_dim] # type: ignore[assignment] + else: + raise ValueError( + f"Dimension '{index_dim}' not found in DataFrame columns or model coordinates. Cannot infer " + "index coordinates." + ) if len(dims) > 1: column_dim = dims[1] @@ -321,6 +322,7 @@ def determine_dataframe_coords( return _dataframe_agnostic_coords(value, model=model, dims=dims, coords=coords) except ImportError: + # Dataframe backends are optional pass @@ -364,7 +366,7 @@ def Data( A value to associate with this variable. dims : str, tuple of str or tuple of None, optional Dimension names of the random variables (as opposed to the shapes of these - random variables). Use this when ``value`` is a pandas Series or DataFrame. The + random variables). Use this when ``value`` is a Series or DataFrame. The ``dims`` will then be the name of the Series / DataFrame's columns. See ArviZ documentation for more information about dimensions and coordinates: :ref:`arviz:quickstart`. From a5da840804edffb2ec000342c9eab81ad817aa86 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 21:02:36 -0600 Subject: [PATCH 08/10] mypy --- pymc/data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc/data.py b/pymc/data.py index 68e6a31cf6..c945eacd5f 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -278,7 +278,8 @@ def _dataframe_agnostic_coords( if len(dims) > 1: column_dim = dims[1] if column_dim is not None: - coords[column_dim] = value.select(nw.exclude(index_dim)).columns + select_expr = nw.exclude(index_dim) if index_dim is not None else nw.all() + coords[column_dim] = value.select(select_expr).columns return coords, _handle_none_dims(dims, ndim_in) From 3553f009f04c744eeb5028135fcf5e52d29d40e4 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Mon, 24 Nov 2025 21:12:27 -0600 Subject: [PATCH 09/10] Small bugfixes and test --- pymc/data.py | 49 +++++++++++++++++--------------- tests/test_data.py | 69 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 29 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index c945eacd5f..6a7c3184a4 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -27,7 +27,7 @@ import pytensor.tensor as pt import xarray as xr -from narwhals.typing import IntoFrameT, IntoLazyFrameT, IntoSeriesT +from narwhals.typing import IntoFrameT, IntoSeriesT from pytensor.compile import SharedVariable from pytensor.compile.builders import OpFromGraph from pytensor.graph.basic import Variable @@ -174,11 +174,11 @@ def _handle_none_dims( @singledispatch def determine_coords( - value, + value: typing.Any, model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[typing.Any, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: """Determine coordinate values from data or the model (via ``dims``).""" raise NotImplementedError( f"Cannot determine coordinates for data of type {type(value)}, please provide `coords` explicitly or " @@ -192,12 +192,12 @@ def determine_array_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[np.ndarray, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} if dims is None: - return coords, _handle_none_dims(dims, value.ndim) + return value, coords, _handle_none_dims(dims, value.ndim) if len(dims) != value.ndim: raise ShapeError( @@ -211,7 +211,7 @@ def determine_array_coords( if coord is None and dim is not None: coords[dim] = range(size) - return coords, _handle_none_dims(dims, value.ndim) + return value, coords, _handle_none_dims(dims, value.ndim) @determine_coords.register(xr.DataArray) @@ -220,28 +220,28 @@ def determine_xarray_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[xr.DataArray, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} if dims is None: - return coords, _handle_none_dims(dims, value.ndim) + return value, coords, _handle_none_dims(dims, value.ndim) for dim in dims: dim_name = dim # str is applied because dim entries may be None coords[str(dim_name)] = cast(xr.DataArray, value[dim]).to_numpy() - return coords, _handle_none_dims(dims, value.ndim) + return value, coords, _handle_none_dims(dims, value.ndim) def _dataframe_agnostic_coords( - value: IntoFrameT | IntoLazyFrameT | nw.DataFrame | nw.LazyFrame, + value: IntoFrameT, model: "Model", ndim_in: int = 2, dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[IntoFrameT, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} @@ -249,12 +249,12 @@ def _dataframe_agnostic_coords( if isinstance(value, nw.LazyFrame): value = value.collect() - index = nw.maybe_get_index(value) - if index is not None: - value = value.with_columns(**{index.name: index.to_numpy()}) - if dims is None: - return coords, _handle_none_dims(dims, ndim_in) + if ndim_in == 1: + value = value[value.columns[0]] + return value.to_native(), coords, _handle_none_dims(dims, ndim_in) + + index = nw.maybe_get_index(value) if len(dims) != ndim_in: raise ShapeError( @@ -265,13 +265,13 @@ def _dataframe_agnostic_coords( index_dim = dims[0] if index_dim is not None: - if index_dim in value.columns: - coords[index_dim] = tuple(value.select(nw.col(index_dim)).to_numpy().flatten()) + if index is not None: + coords[index_dim] = tuple(index) elif index_dim in model.coords: coords[index_dim] = model.coords[index_dim] # type: ignore[assignment] else: raise ValueError( - f"Dimension '{index_dim}' not found in DataFrame columns or model coordinates. Cannot infer " + f"Dimension '{index_dim}' not found in DataFrame index or model coordinates. Cannot infer " "index coordinates." ) @@ -281,7 +281,10 @@ def _dataframe_agnostic_coords( select_expr = nw.exclude(index_dim) if index_dim is not None else nw.all() coords[column_dim] = value.select(select_expr).columns - return coords, _handle_none_dims(dims, ndim_in) + if ndim_in == 1: + value = value[value.columns[0]] + + return value.to_native(), coords, _handle_none_dims(dims, ndim_in) def _series_agnostic_coords( @@ -319,7 +322,9 @@ def determine_dataframe_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, - ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: + ) -> tuple[ + IntoFrameT, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None] + ]: return _dataframe_agnostic_coords(value, model=model, dims=dims, coords=coords) except ImportError: @@ -453,7 +458,7 @@ def Data( new_dims: Sequence[str | None] | Sequence[None] | None if infer_dims_and_coords: - coords, new_dims = determine_coords(value, model, dims) + value, coords, new_dims = determine_coords(value, model, dims) else: new_dims = dims diff --git a/tests/test_data.py b/tests/test_data.py index 5e71158b6b..3cd579fc57 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -414,7 +414,7 @@ def test_implicit_coords_polars_series(self): with pytest.raises( ValueError, - match="Dimension 'date2' not found in DataFrame columns or model coordinates", + match="Dimension 'date2' not found in DataFrame index or model coordinates", ): pm.Data("sales_invalid", ser_sales, dims=["date2"], infer_dims_and_coords=True) @@ -428,14 +428,69 @@ def test_implicit_coords_polars_dataframe(self): df_data = pl.DataFrame( np.random.normal(size=size), schema={f"Column {c + 1}": pl.Float64 for c in range(size[1])}, - ).with_row_count("rows") + ) - with pm.Model() as pmodel: - pm.Data("observations", df_data, dims=("rows", "columns"), infer_dims_and_coords=True) + # We currently count on the presence of an index in the DataFrame to infer dims. Polars has no index, so + # this case errors because we can't find the 'rows' dim. - assert "rows" in pmodel.coords - assert "columns" in pmodel.coords - assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")} + with pytest.raises( + ValueError, match="Dimension 'rows' not found in DataFrame index or model coordinates" + ): + with pm.Model() as pmodel: + pm.Data( + "observations", df_data, dims=("rows", "columns"), infer_dims_and_coords=True + ) + + def test_implicit_coords_agnostic(self): + pl = pytest.importorskip("polars") + pd = pytest.importorskip("pandas") + + size = (5, 7) + data_np = np.random.normal(size=size) + columns = [f"C{c + 1}" for c in range(size[1])] + rows = [f"R{r + 1}" for r in range(size[0])] + df_pd = pd.DataFrame(data_np, columns=columns, index=rows) + df_pd.index.name = "rows" + df_pl = pl.DataFrame( + data_np, + schema=dict.fromkeys(columns, pl.Float64), + ) + + def make_model(coords, df, dims, infer_dims_and_coords) -> pm.Model: + with pm.Model(coords=coords) as pmodel: + pm.Data("X", df, dims=dims, infer_dims_and_coords=infer_dims_and_coords) + return pmodel + + expected_coords = {"rows": tuple(rows), "columns": tuple(columns)} + dims = ("rows", "columns") + + m = make_model(coords=None, df=df_pd, dims=dims, infer_dims_and_coords=True) + assert m.coords == expected_coords + np.testing.assert_allclose(m["X"].eval(), df_pd.values) + + # TODO: Is infer_dims_and_coords supposed to infer dims? The current behavior is that it doesn't, it only + # infers the dimension labels. + for df in [df_pd, df_pl]: + m = make_model(coords=None, df=df, dims=None, infer_dims_and_coords=True) + assert m.coords == {} + + m = make_model(coords=None, df=df, dims=dims, infer_dims_and_coords=False) + assert m.coords == {"rows": None, "columns": None} + + m = make_model(coords=None, df=df, dims=None, infer_dims_and_coords=False) + assert m.coords == {} + + # Pandas is special because we will infer the index dim from the DataFrame index, if one exists. + m = make_model(coords=None, df=df_pd, dims=dims, infer_dims_and_coords=True) + assert m.coords == expected_coords + + # Polars (and other dataframe backends with no index concept) won't infer dims from index. This case currently + # errors, because we can't find the 'rows' dim in either the DataFrame columns or the model coords. + with pytest.raises( + ValueError, + match="Dimension 'rows' not found in DataFrame index or model coordinates", + ): + make_model(coords=None, df=df_pl, dims=dims, infer_dims_and_coords=True) def test_implicit_coords_xarray(self): xr = pytest.importorskip("xarray") From ff7af5527c93987eb5994cc7637806dfcc0cbb8f Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Mon, 24 Nov 2025 21:27:28 -0600 Subject: [PATCH 10/10] code cleanup + mypy --- pymc/data.py | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index 6a7c3184a4..8b1a99bced 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -178,7 +178,7 @@ def determine_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[typing.Any, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: """Determine coordinate values from data or the model (via ``dims``).""" raise NotImplementedError( f"Cannot determine coordinates for data of type {type(value)}, please provide `coords` explicitly or " @@ -192,12 +192,12 @@ def determine_array_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[np.ndarray, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} if dims is None: - return value, coords, _handle_none_dims(dims, value.ndim) + return coords, _handle_none_dims(dims, value.ndim) if len(dims) != value.ndim: raise ShapeError( @@ -211,7 +211,7 @@ def determine_array_coords( if coord is None and dim is not None: coords[dim] = range(size) - return value, coords, _handle_none_dims(dims, value.ndim) + return coords, _handle_none_dims(dims, value.ndim) @determine_coords.register(xr.DataArray) @@ -220,46 +220,44 @@ def determine_xarray_coords( model: "Model", dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[xr.DataArray, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} if dims is None: - return value, coords, _handle_none_dims(dims, value.ndim) + return coords, _handle_none_dims(dims, value.ndim) for dim in dims: dim_name = dim # str is applied because dim entries may be None coords[str(dim_name)] = cast(xr.DataArray, value[dim]).to_numpy() - return value, coords, _handle_none_dims(dims, value.ndim) + return coords, _handle_none_dims(dims, value.ndim) def _dataframe_agnostic_coords( - value: IntoFrameT, + value: IntoFrameT | IntoSeriesT, model: "Model", ndim_in: int = 2, dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, -) -> tuple[IntoFrameT, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: +) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: if coords is None: coords = {} - value = cast(nw.DataFrame | nw.LazyFrame, nw.from_native(value, allow_series=False)) # type: ignore[type-var] + value = nw.from_native(value, allow_series=ndim_in == 1) # type: ignore[call-overload] if isinstance(value, nw.LazyFrame): value = value.collect() if dims is None: - if ndim_in == 1: - value = value[value.columns[0]] - return value.to_native(), coords, _handle_none_dims(dims, ndim_in) + return coords, _handle_none_dims(dims, ndim_in) - index = nw.maybe_get_index(value) + index = nw.maybe_get_index(value) # type: ignore[arg-type] if len(dims) != ndim_in: raise ShapeError( "Invalid data shape. The rank of the dataset must match the length of `dims`.", - actual=value.shape, + actual=value.shape, # type: ignore[union-attr] expected=len(dims), ) @@ -278,13 +276,9 @@ def _dataframe_agnostic_coords( if len(dims) > 1: column_dim = dims[1] if column_dim is not None: - select_expr = nw.exclude(index_dim) if index_dim is not None else nw.all() - coords[column_dim] = value.select(select_expr).columns - - if ndim_in == 1: - value = value[value.columns[0]] + coords[column_dim] = value.columns # type: ignore[union-attr] - return value.to_native(), coords, _handle_none_dims(dims, ndim_in) + return coords, _handle_none_dims(dims, ndim_in) def _series_agnostic_coords( @@ -293,14 +287,13 @@ def _series_agnostic_coords( dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: - value = cast(nw.Series, nw.from_native(value, series_only=True)) # type: ignore[assignment] return _dataframe_agnostic_coords( - cast(nw.DataFrame | nw.LazyFrame, value.to_frame()), # type: ignore[attr-defined] + value, ndim_in=1, model=model, dims=dims, coords=coords, - ) # type: ignore[arg-type] + ) def _register_dataframe_backend(library_name: str): @@ -322,9 +315,7 @@ def determine_dataframe_coords( model: "Model", dims: Sequence[str] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, - ) -> tuple[ - IntoFrameT, dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None] - ]: + ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None] | Sequence[None]]: return _dataframe_agnostic_coords(value, model=model, dims=dims, coords=coords) except ImportError: @@ -458,7 +449,7 @@ def Data( new_dims: Sequence[str | None] | Sequence[None] | None if infer_dims_and_coords: - value, coords, new_dims = determine_coords(value, model, dims) + coords, new_dims = determine_coords(value, model, dims) else: new_dims = dims