From 2dad23143096aa7182edf1ecb79f38cdabd8c75a Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 20:31:49 +0200 Subject: [PATCH 1/7] Add failing test --- ..._index_manipulation_ensure_index_is_multiindex.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py index 4d81928..3fe4d47 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py @@ -9,7 +9,10 @@ import pytest from pandas_openscm.index_manipulation import ensure_index_is_multiindex -from pandas_openscm.testing import create_test_df +from pandas_openscm.testing import ( + convert_to_desired_type, + create_test_df, +) @pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False))) @@ -71,12 +74,17 @@ def test_ensure_index_is_multiindex_no_op(copy, copy_exp): @pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False))) -def test_accessor(setup_pandas_accessors, copy, copy_exp): +@pytest.mark.parametrize( + "pobj_type", + ("DataFrame", "Series"), +) +def test_accessor(setup_pandas_accessors, copy, copy_exp, pobj_type): start = pd.DataFrame( [[1, 2], [3, 4]], columns=[10, 20], index=pd.Index(["a", "b"], name="variable"), ) + start = convert_to_desired_type(start, pobj_type) call_kwargs = {} if copy is not None: From 556aec75414b07ce554b35d423ccfb7100b2e852 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 20:33:18 +0200 Subject: [PATCH 2/7] Pass tests --- src/pandas_openscm/accessors/series.py | 81 +++++++++++++------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py index 91c5708..787f80c 100644 --- a/src/pandas_openscm/accessors/series.py +++ b/src/pandas_openscm/accessors/series.py @@ -10,6 +10,7 @@ import pandas as pd from pandas_openscm.index_manipulation import ( + ensure_index_is_multiindex, set_index_levels_func, ) from pandas_openscm.unit_conversion import convert_unit, convert_unit_like @@ -181,46 +182,46 @@ def convert_unit_like( # If users want correct type hints, they should use the functional form. return res # type: ignore - # def ensure_index_is_multiindex(self, copy: bool = True) -> pd.DataFrame: - # """ - # Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] - # - # Parameters - # ---------- - # copy - # Whether to copy `df` before manipulating the index name - # - # Returns - # ------- - # : - # `df` with a [pd.MultiIndex][pandas.MultiIndex] - # - # If the index was already a [pd.MultiIndex][pandas.MultiIndex], - # this is a no-op (although the value of copy is respected). - # """ - # return ensure_index_is_multiindex(self._df, copy=copy) - # - # def eiim(self, copy: bool = True) -> pd.DataFrame: - # """ - # Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] - # - # Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.] - # - # Parameters - # ---------- - # copy - # Whether to copy `df` before manipulating the index name - # - # Returns - # ------- - # : - # `df` with a [pd.MultiIndex][pandas.MultiIndex] - # - # If the index was already a [pd.MultiIndex][pandas.MultiIndex], - # this is a no-op (although the value of copy is respected). - # """ - # return self.ensure_index_is_multiindex(copy=copy) - # + def ensure_index_is_multiindex(self, copy: bool = True) -> S: + """ + Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + + Parameters + ---------- + copy + Whether to copy `series` before manipulating the index name + + Returns + ------- + : + `series` with a [pd.MultiIndex][pandas.MultiIndex] + + If the index was already a [pd.MultiIndex][pandas.MultiIndex], + this is a no-op (although the value of copy is respected). + """ + return ensure_index_is_multiindex(self._series, copy=copy) + + def eiim(self, copy: bool = True) -> pd.DataFrame: + """ + Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + + Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.] + + Parameters + ---------- + copy + Whether to copy `series` before manipulating the index name + + Returns + ------- + : + `series` with a [pd.MultiIndex][pandas.MultiIndex] + + If the index was already a [pd.MultiIndex][pandas.MultiIndex], + this is a no-op (although the value of copy is respected). + """ + return self.ensure_index_is_multiindex(copy=copy) + # def fix_index_name_after_groupby_quantile( # self, new_name: str = "quantile", copy: bool = False # ) -> pd.DataFrame: From f71e6e81d2fca661db3f87a6aa83f45015a6689e Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 20:44:54 +0200 Subject: [PATCH 3/7] mypy --- src/pandas_openscm/accessors/series.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py index 787f80c..5a3a3bd 100644 --- a/src/pandas_openscm/accessors/series.py +++ b/src/pandas_openscm/accessors/series.py @@ -199,9 +199,11 @@ def ensure_index_is_multiindex(self, copy: bool = True) -> S: If the index was already a [pd.MultiIndex][pandas.MultiIndex], this is a no-op (although the value of copy is respected). """ - return ensure_index_is_multiindex(self._series, copy=copy) + res = ensure_index_is_multiindex(self._series, copy=copy) - def eiim(self, copy: bool = True) -> pd.DataFrame: + return res # type: ignore # something wront with generic type hinting + + def eiim(self, copy: bool = True) -> S: """ Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] From 1118414b15508fdd24c79a87f56b3149908fdee4 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 20:55:21 +0200 Subject: [PATCH 4/7] Add failing tests for index --- ...manipulation_ensure_index_is_multiindex.py | 96 ++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py index 3fe4d47..f120118 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py @@ -8,7 +8,10 @@ import pandas as pd import pytest -from pandas_openscm.index_manipulation import ensure_index_is_multiindex +from pandas_openscm.index_manipulation import ( + ensure_index_is_multiindex, + ensure_is_multiindex, +) from pandas_openscm.testing import ( convert_to_desired_type, create_test_df, @@ -114,3 +117,94 @@ def test_accessor(setup_pandas_accessors, copy, copy_exp, pobj_type): else: # Same object returned assert id(start) == id(res_short) + + +def test_ensure_is_multiindex_index(): + start = pd.Index([1, 2, 3], name="id") + + res = ensure_is_multiindex(start) + + assert isinstance(res, pd.MultiIndex) + + exp = pd.MultiIndex.from_tuples( + [ + (1,), + (2,), + (3,), + ], + names=["id"], + ) + + pd.testing.assert_index_equal(res, exp) + + +def test_ensure_is_multiindex_index_no_name(): + start = pd.Index([1, 2, 3]) + + res = ensure_is_multiindex(start) + + assert isinstance(res, pd.MultiIndex) + + exp = pd.MultiIndex.from_tuples( + [ + (1,), + (2,), + (3,), + ], + names=[None], + ) + + pd.testing.assert_index_equal(res, exp) + + +def test_ensure_is_multiindex(): + start = pd.MultiIndex.from_tuples( + [ + ("a", "b"), + ("c", "d"), + ], + names=["mod", "scen"], + ) + + res = ensure_is_multiindex(start) + + # Same object returned + assert id(start) == id(res) + assert isinstance(res, pd.MultiIndex) + pd.testing.assert_index_equal(res, start) + + +def test_ensure_is_multiindex_accessor_index(setup_pandas_accessors): + start = pd.Index([1, 2, 3], name="id") + + res = start.openscm.ensure_is_multiindex() + + assert isinstance(res, pd.MultiIndex) + + exp = pd.MultiIndex.from_tuples( + [ + (1,), + (2,), + (3,), + ], + names=["id"], + ) + + pd.testing.assert_index_equal(res, exp) + + +def test_ensure_is_multiindex_accessor_multiindex(setup_pandas_accessors): + start = pd.MultiIndex.from_tuples( + [ + ("a", "b"), + ("c", "d"), + ], + names=["mod", "scen"], + ) + + res = start.openscm.ensure_is_multiindex() + + # Same object returned + assert id(start) == id(res) + assert isinstance(res, pd.MultiIndex) + pd.testing.assert_index_equal(res, start) From ad9e6192152a9496a9dfe792191d4b0462bdc280 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 21:02:01 +0200 Subject: [PATCH 5/7] Add index accessor --- src/pandas_openscm/accessors/__init__.py | 3 +- src/pandas_openscm/accessors/index.py | 74 +++++++++++++++++++ tests/conftest.py | 4 + ...manipulation_ensure_index_is_multiindex.py | 4 + 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 src/pandas_openscm/accessors/index.py diff --git a/src/pandas_openscm/accessors/__init__.py b/src/pandas_openscm/accessors/__init__.py index 4dbb880..4588923 100644 --- a/src/pandas_openscm/accessors/__init__.py +++ b/src/pandas_openscm/accessors/__init__.py @@ -38,6 +38,7 @@ import pandas as pd from pandas_openscm.accessors.dataframe import PandasDataFrameOpenSCMAccessor +from pandas_openscm.accessors.index import PandasIndexOpenSCMAccessor from pandas_openscm.accessors.series import PandasSeriesOpenSCMAccessor @@ -73,4 +74,4 @@ def register_pandas_accessors(namespace: str = "openscm") -> None: PandasDataFrameOpenSCMAccessor ) pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor) - # pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor) + pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor) diff --git a/src/pandas_openscm/accessors/index.py b/src/pandas_openscm/accessors/index.py new file mode 100644 index 0000000..a658d32 --- /dev/null +++ b/src/pandas_openscm/accessors/index.py @@ -0,0 +1,74 @@ +""" +Accessor for [pd.Index][pandas.Index] (and sub-classes) +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +import pandas as pd + +from pandas_openscm.index_manipulation import ensure_is_multiindex + +if TYPE_CHECKING: + # Hmm this is somehow not correct. + # Figuring it out is a job for another day + Idx = TypeVar("Idx", bound=pd.Index[Any]) + + +else: + Idx = TypeVar("Idx") + + +class PandasIndexOpenSCMAccessor(Generic[Idx]): + """ + [pd.Index][pandas.Index] accessor + + For details, see + [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). + """ + + def __init__(self, index: Idx): + """ + Initialise + + Parameters + ---------- + index + [pd.Index][pandas.Index] to use via the accessor + """ + # It is possible to validate here. + # However, it's probably better to do validation closer to the data use. + self._index = index + + def ensure_is_multiindex(self) -> pd.MultiIndex: + """ + Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + + Returns + ------- + : + `index` as a [pd.MultiIndex][pandas.MultiIndex] + + If the index was already a [pd.MultiIndex][pandas.MultiIndex], + this is a no-op. + """ + res = ensure_is_multiindex(self._index) + + return res + + def eim(self) -> pd.MultiIndex: + """ + Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + + Alias for [ensure_is_multiindex][pandas_openscm.index_manipulation.] + + Returns + ------- + : + `index` as a [pd.MultiIndex][pandas.MultiIndex] + + If the index was already a [pd.MultiIndex][pandas.MultiIndex], + this is a no-op (although the value of copy is respected). + """ + return self.ensure_is_multiindex() diff --git a/tests/conftest.py b/tests/conftest.py index a593786..839a350 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,3 +45,7 @@ def setup_pandas_accessors() -> None: pd.Series._accessors.discard("openscm") if hasattr(pd.Series, "openscm"): del pd.Series.openscm + + pd.Index._accessors.discard("openscm") + if hasattr(pd.Index, "openscm"): + del pd.Index.openscm diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py index f120118..d9c33b7 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py @@ -178,6 +178,8 @@ def test_ensure_is_multiindex_accessor_index(setup_pandas_accessors): start = pd.Index([1, 2, 3], name="id") res = start.openscm.ensure_is_multiindex() + res_short = start.openscm.eim() + pd.testing.assert_index_equal(res, res_short) assert isinstance(res, pd.MultiIndex) @@ -203,6 +205,8 @@ def test_ensure_is_multiindex_accessor_multiindex(setup_pandas_accessors): ) res = start.openscm.ensure_is_multiindex() + res_short = start.openscm.eim() + pd.testing.assert_index_equal(res, res_short) # Same object returned assert id(start) == id(res) From 22e536864a0f2d8874c1e326c998be580c4d4f39 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 21:06:39 +0200 Subject: [PATCH 6/7] CHANGELOG --- changelog/36.feature.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelog/36.feature.md diff --git a/changelog/36.feature.md b/changelog/36.feature.md new file mode 100644 index 0000000..0ba4d62 --- /dev/null +++ b/changelog/36.feature.md @@ -0,0 +1,2 @@ +- [pd.Index][pandas.Index] accessor [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor] +- ensure multiindex related functionality: [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor.ensure_index_is_multiindex] and [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor.ensure_is_multiindex] From 7b86075c305954147fd06477ff116bc71dbad9f0 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 21 Aug 2025 21:06:46 +0200 Subject: [PATCH 7/7] docs --- docs/pandas-accessors.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/pandas-accessors.md b/docs/pandas-accessors.md index 6855e63..b75d143 100644 --- a/docs/pandas-accessors.md +++ b/docs/pandas-accessors.md @@ -21,13 +21,13 @@ you will need to run something like: ```python from pandas_openscm.accessors import register_pandas_accessors -# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces +# The `pd.DataFrame.openscm` and `pd.Series.openscm` namespaces # will not be available at this point. # Register the accessors register_pandas_accessors() -# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces +# The `pd.DataFrame.openscm`, `pd.Series.openscm` and `pd.Index.openscm` namespaces # will now be available. # I.e. you could now do something like df = pd.DataFrame( @@ -53,9 +53,14 @@ df.openscm.to_long_data() register_pandas_accessors(namespace="my_custom_namespace") # Doing it this way will make the custom namespace available under -# 'pd.DataFrame.my_custom_namespace' and 'pd.Series.my_custom_namespace'. +# `pd.DataFrame.my_custom_namespace`, +# `pd.Series.my_custom_namespace` +# and `pd.Index.my_custom_namespace`. +df.my_custom_namespace.to_long_data() ``` The full accessor APIs are documented at -[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][] -and [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][]. +[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][], +[pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][] +and +[pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor][].