From 539e468fa4911548af49511af40c3d2a366a35b5 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sat, 23 May 2026 20:11:42 +0200 Subject: [PATCH 1/2] Fix two pandas 3.0 incompatibilities (StringDtype groupby, Series positional indexing) pandas 3.0 introduced two changes that scmdata 0.18 trips on for any multi-scenario ScmRun: 1. Default StringDtype inference. String columns now come back as pd.StringDtype rather than object. RunGroupBy.__init__ called numpy.issubdtype(col.dtype, numpy.number) to detect numeric meta columns; on StringDtype this raises 'TypeError: Cannot interpret as a data type'. Route the check through pd.api.types.is_numeric_dtype instead, which returns False for StringDtype and True for numeric dtypes. 2. Removal of Series positional integer indexing. _xarray._many_to_one ended with checker.groupby(col2).count().max()[0]. max() on a DataFrame returns a label-indexed Series and pandas 3.0 removed positional integer indexing on those, so [0] raises 'KeyError: 0'. Use .iloc[0]: same semantics, explicit positional. Both calls are exercised by every multi-scenario ScmRun. The second in particular blocks ScmRun.to_nc entirely on pandas 3.0, so any downstream that streams scenarios to disk (e.g. openscm-runner's NetCDFChunkWriter) currently cannot run. The fixes are backward-compatible: pd.api.types.is_numeric_dtype and Series.iloc[0] have been pandas's canonical APIs since well before pandas 2.0. --- src/scmdata/_xarray.py | 5 ++++- src/scmdata/groupby.py | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/scmdata/_xarray.py b/src/scmdata/_xarray.py index c38e4830..a2510a37 100644 --- a/src/scmdata/_xarray.py +++ b/src/scmdata/_xarray.py @@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2): # thanks https://stackoverflow.com/a/59091549 checker = df[[col1, col2]].drop_duplicates() - max_count = checker.groupby(col2).count().max()[0] + # ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional + # integer indexing on label-indexed Series, so ``[0]`` would raise + # ``KeyError: 0`` on the Series returned by the chained ``.max()``. + max_count = checker.groupby(col2).count().max().iloc[0] if max_count < 1: # pragma: no cover # emergency valve raise AssertionError diff --git a/src/scmdata/groupby.py b/src/scmdata/groupby.py index b261c5b4..83358205 100644 --- a/src/scmdata/groupby.py +++ b/src/scmdata/groupby.py @@ -57,8 +57,12 @@ def __init__( m = run.meta.reset_index(drop=True) self.na_fill_value = float(na_fill_value) - # Work around the bad handling of NaN values in groupbys - if any([np.issubdtype(m[c].dtype, np.number) for c in m]): + # Work around the bad handling of NaN values in groupbys. + # pd.api.types.is_numeric_dtype accepts every dtype scmdata + # ever emits; np.issubdtype(..., np.number) raises on + # pandas 3.0's default StringDtype with + # ``TypeError: Cannot interpret ''``. + if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]): if (m == na_fill_value).any(axis=None): raise ValueError( "na_fill_value conflicts with data value. Choose a na_fill_value " From ebeb601a736f606ab2f9a719f49029177eac0f86 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sat, 23 May 2026 20:12:18 +0200 Subject: [PATCH 2/2] Add changelog fragment for PR #321 --- changelog/321.fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/321.fix.md diff --git a/changelog/321.fix.md b/changelog/321.fix.md new file mode 100644 index 00000000..c4388cf9 --- /dev/null +++ b/changelog/321.fix.md @@ -0,0 +1 @@ +Restored compatibility with pandas 3.0 by replacing two calls that pandas 3 no longer accepts: `numpy.issubdtype(col.dtype, numpy.number)` in `RunGroupBy.__init__` (raised on `StringDtype` meta columns) and `Series[0]` positional indexing in `_xarray._many_to_one` (raised `KeyError: 0`). The previously-failing `ScmRun.groupby` and `ScmRun.to_nc` paths now run on both pandas 2 and pandas 3.