From 539e468fa4911548af49511af40c3d2a366a35b5 Mon Sep 17 00:00:00 2001
From: Ben Sanderson <ben.m.sanderson@gmail.com>
Date: Sat, 23 May 2026 20:11:42 +0200
Subject: [PATCH 1/2] Fix two pandas 3.0 incompatibilities (StringDtype
 groupby, Series positional indexing)

pandas 3.0 introduced two changes that scmdata 0.18 trips on for any
multi-scenario ScmRun:

1. Default StringDtype inference. String columns now come back as
   pd.StringDtype rather than object. RunGroupBy.__init__ called
   numpy.issubdtype(col.dtype, numpy.number) to detect numeric meta
   columns; on StringDtype this raises
   'TypeError: Cannot interpret <StringDtype(...)> as a data type'.
   Route the check through pd.api.types.is_numeric_dtype instead,
   which returns False for StringDtype and True for numeric dtypes.

2. Removal of Series positional integer indexing.
   _xarray._many_to_one ended with checker.groupby(col2).count().max()[0].
   max() on a DataFrame returns a label-indexed Series and pandas 3.0
   removed positional integer indexing on those, so [0] raises
   'KeyError: 0'. Use .iloc[0]: same semantics, explicit positional.

Both calls are exercised by every multi-scenario ScmRun. The second
in particular blocks ScmRun.to_nc entirely on pandas 3.0, so any
downstream that streams scenarios to disk (e.g. openscm-runner's
NetCDFChunkWriter) currently cannot run.

The fixes are backward-compatible: pd.api.types.is_numeric_dtype and
Series.iloc[0] have been pandas's canonical APIs since well before
pandas 2.0.
---
 src/scmdata/_xarray.py | 5 ++++-
 src/scmdata/groupby.py | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/scmdata/_xarray.py b/src/scmdata/_xarray.py
index c38e4830..a2510a37 100644
--- a/src/scmdata/_xarray.py
+++ b/src/scmdata/_xarray.py
@@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2):
     # thanks https://stackoverflow.com/a/59091549
     checker = df[[col1, col2]].drop_duplicates()
 
-    max_count = checker.groupby(col2).count().max()[0]
+    # ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional
+    # integer indexing on label-indexed Series, so ``[0]`` would raise
+    # ``KeyError: 0`` on the Series returned by the chained ``.max()``.
+    max_count = checker.groupby(col2).count().max().iloc[0]
     if max_count < 1:  # pragma: no cover # emergency valve
         raise AssertionError
 
diff --git a/src/scmdata/groupby.py b/src/scmdata/groupby.py
index b261c5b4..83358205 100644
--- a/src/scmdata/groupby.py
+++ b/src/scmdata/groupby.py
@@ -57,8 +57,12 @@ def __init__(
         m = run.meta.reset_index(drop=True)
         self.na_fill_value = float(na_fill_value)
 
-        # Work around the bad handling of NaN values in groupbys
-        if any([np.issubdtype(m[c].dtype, np.number) for c in m]):
+        # Work around the bad handling of NaN values in groupbys.
+        # pd.api.types.is_numeric_dtype accepts every dtype scmdata
+        # ever emits; np.issubdtype(..., np.number) raises on
+        # pandas 3.0's default StringDtype with
+        # ``TypeError: Cannot interpret '<StringDtype(...)>'``.
+        if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]):
             if (m == na_fill_value).any(axis=None):
                 raise ValueError(
                     "na_fill_value conflicts with data value. Choose a na_fill_value "

From ebeb601a736f606ab2f9a719f49029177eac0f86 Mon Sep 17 00:00:00 2001
From: Ben Sanderson <ben.m.sanderson@gmail.com>
Date: Sat, 23 May 2026 20:12:18 +0200
Subject: [PATCH 2/2] Add changelog fragment for PR #321

---
 changelog/321.fix.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/321.fix.md

diff --git a/changelog/321.fix.md b/changelog/321.fix.md
new file mode 100644
index 00000000..c4388cf9
--- /dev/null
+++ b/changelog/321.fix.md
@@ -0,0 +1 @@
+Restored compatibility with pandas 3.0 by replacing two calls that pandas 3 no longer accepts: `numpy.issubdtype(col.dtype, numpy.number)` in `RunGroupBy.__init__` (raised on `StringDtype` meta columns) and `Series[0]` positional indexing in `_xarray._many_to_one` (raised `KeyError: 0`). The previously-failing `ScmRun.groupby` and `ScmRun.to_nc` paths now run on both pandas 2 and pandas 3.