From 1bdb5899a1c0f5cab290a3d01eee3691169606ff Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 17:19:11 +0200 Subject: [PATCH 01/11] Shuffle and add test of unit conversion accessors --- .../openscmdb-backend-speed-tests.py | 2 +- .../how-to-guides/how-to-make-a-plume-plot.py | 4 +- docs/pandas-accessors.md | 6 +- docs/tutorials/unit-conversion.py | 4 +- src/pandas_openscm/__init__.py | 4 +- src/pandas_openscm/accessors/__init__.py | 76 ++++++++++ .../{accessors.py => accessors/dataframe.py} | 46 +----- tests/conftest.py | 4 +- .../test_integration_database_portability.py | 2 +- ...manipulation_ensure_index_is_multiindex.py | 2 +- ...tegration_index_manipulation_set_levels.py | 2 +- ...on_index_manipulation_to_category_index.py | 2 +- ...ration_index_manipulation_update_levels.py | 6 +- ...x_manipulation_update_levels_from_other.py | 6 +- .../integration/test_grouping_integration.py | 4 +- .../integration/test_indexing_integration.py | 2 +- .../integration/test_plotting_integration.py | 34 ++--- tests/integration/test_reshaping.py | 6 +- tests/integration/test_unit_conversion.py | 138 ++++++++++++++---- 19 files changed, 234 insertions(+), 116 deletions(-) create mode 100644 src/pandas_openscm/accessors/__init__.py rename src/pandas_openscm/{accessors.py => accessors/dataframe.py} (95%) diff --git a/docs/further-background/openscmdb-backend-speed/openscmdb-backend-speed-tests.py b/docs/further-background/openscmdb-backend-speed/openscmdb-backend-speed-tests.py index 06d3210..5c3d540 100644 --- a/docs/further-background/openscmdb-backend-speed/openscmdb-backend-speed-tests.py +++ b/docs/further-background/openscmdb-backend-speed/openscmdb-backend-speed-tests.py @@ -36,7 +36,7 @@ if GIT_REPO.is_dirty(): COMMIT = f"{COMMIT}-dirty" -pandas_openscm.register_pandas_accessor() +pandas_openscm.register_pandas_accessors() @define diff --git a/docs/how-to-guides/how-to-make-a-plume-plot.py b/docs/how-to-guides/how-to-make-a-plume-plot.py index dab33be..a2b6d24 100644 --- a/docs/how-to-guides/how-to-make-a-plume-plot.py +++ b/docs/how-to-guides/how-to-make-a-plume-plot.py @@ -29,7 +29,7 @@ import numpy as np import openscm_units -from pandas_openscm import register_pandas_accessor +from pandas_openscm import register_pandas_accessors from pandas_openscm.plotting import PlumePlotter from pandas_openscm.testing import create_test_df @@ -40,7 +40,7 @@ # Register the openscm accessor for pandas objects # (we don't do this on import # as we have had bad experiences with implicit behaviour like that) -register_pandas_accessor() +register_pandas_accessors() # %% [markdown] # ## Basics diff --git a/docs/pandas-accessors.md b/docs/pandas-accessors.md index 1af49fc..be9c0d0 100644 --- a/docs/pandas-accessors.md +++ b/docs/pandas-accessors.md @@ -8,7 +8,7 @@ The accessors must be registered before they can be used (we do this to avoid imports of any of our modules having side effects, which is a pattern we have had bad experiences with in the past). This is done with -[register_pandas_accessor][pandas_openscm.accessors.register_pandas_accessor], +[register_pandas_accessors][pandas_openscm.accessors.register_pandas_accessors], By default, the accessors are provided under the "openscm" namespace and this is how the accessors are documented below. @@ -20,12 +20,12 @@ For the avoidance of doubt, in order to register/activate the accessors, you will need to run something like: ```python -from pandas_openscm.accessors import register_pandas_accessor +from pandas_openscm.accessors import register_pandas_accessors # The 'pd.DataFrame.openscm' namespace will not be available at this point. # Register the accessors -register_pandas_accessor() +register_pandas_accessors() # The 'pd.DataFrame.openscm' namespace # (or whatever other custom namespace you chose to register) diff --git a/docs/tutorials/unit-conversion.py b/docs/tutorials/unit-conversion.py index 264e568..6cf4953 100644 --- a/docs/tutorials/unit-conversion.py +++ b/docs/tutorials/unit-conversion.py @@ -29,7 +29,7 @@ import pandas_indexing as pix import pint -from pandas_openscm import register_pandas_accessor +from pandas_openscm import register_pandas_accessors from pandas_openscm.testing import create_test_df from pandas_openscm.unit_conversion import ( AmbiguousTargetUnitError, @@ -45,7 +45,7 @@ # Register the openscm accessor for pandas objects # (we don't do this on import # as we have had bad experiences with implicit behaviour like that) -register_pandas_accessor() +register_pandas_accessors() # %% [markdown] # ## Basics diff --git a/src/pandas_openscm/__init__.py b/src/pandas_openscm/__init__.py index ab1c52c..f6915aa 100644 --- a/src/pandas_openscm/__init__.py +++ b/src/pandas_openscm/__init__.py @@ -4,8 +4,8 @@ import importlib.metadata -from pandas_openscm.accessors import register_pandas_accessor +from pandas_openscm.accessors import register_pandas_accessors __version__ = importlib.metadata.version("pandas_openscm") -__all__ = ["register_pandas_accessor"] +__all__ = ["register_pandas_accessors"] diff --git a/src/pandas_openscm/accessors/__init__.py b/src/pandas_openscm/accessors/__init__.py new file mode 100644 index 0000000..7156034 --- /dev/null +++ b/src/pandas_openscm/accessors/__init__.py @@ -0,0 +1,76 @@ +""" +API for [pandas][] accessors. + +Accessors for [pd.DataFrame][pandas.DataFrame]'s, +[pd.Series][pandas.Series]'s +and [pd.Index][pandas.Index]'s. + +**Notes for developers** + +We try and keep the accessors as a super-thin layer. +This makes it easier to re-use functionality in a functional way, +which is beneficial +(particularly if we one day need to switch to +a different kind of dataframe e.g. dask). + +As a result, we effectively duplicate our API in the accessor layer. +This is ok for now, because this repo is not so big. +Pandas and pandas-indexing use pandas' `pandas.util._decorators.docs` decorator +(see https://github.com/pandas-dev/pandas/blob/05de25381f71657bd425d2c4045d81a46b2d3740/pandas/util/_decorators.py#L342) +to avoid duplicating the docs. +We could use the same pattern, but I have found that this magic +almost always goes wrong so I would stay away from this as long as we can. + +We would like to move to a less error-prone, less manual solution. +We tried using mix-ins, but this is just a yuck pattern +that makes it really hard to see where functionality comes from +(a common issue with inheritance) +and makes the type hinting hard. +As a result, we aren't using it. + +Probably the next thing to try is auto-generating the code from some template. +This is basically the same idea as using a macro in C. +It likely wouldn't be that hard, and would be much more robust. +""" + +from __future__ import annotations + +import pandas as pd + +from pandas_openscm.accessors.dataframe import PandasDataFrameOpenSCMAccessor + + +# TODO: note change in name (now has trailing s) in changelog +def register_pandas_accessors(namespace: str = "openscm") -> None: + """ + Register the pandas accessors + + This registers accessors + for [DataFrame][pandas.DataFrame]'s, [Series][pandas.Series]'s + and [Index][pandas.Index]'s. + If you only want to register accessors for one of these, + we leave it up to you to copy the line(s) you need. + + For details of how these accessors work, see + [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). + + We provide this as a separate function + because we have had really bad experiences with imports having side effects + (which seems to be the more normal pattern) + and don't want to pass those bad experiences on. + + Parameters + ---------- + namespace + Namespace to use for the accessor + + E.g. if namespace is 'custom' + then the pandas-openscm API will be available under + `pd.DataFrame.custom.pandas_openscm_function` + e.g. `pd.DataFrame.custom.convert_unit`. + """ + pd.api.extensions.register_dataframe_accessor(namespace)( + PandasDataFrameOpenSCMAccessor + ) + # pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor) + # pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor) diff --git a/src/pandas_openscm/accessors.py b/src/pandas_openscm/accessors/dataframe.py similarity index 95% rename from src/pandas_openscm/accessors.py rename to src/pandas_openscm/accessors/dataframe.py index 22075a7..d7808f7 100644 --- a/src/pandas_openscm/accessors.py +++ b/src/pandas_openscm/accessors/dataframe.py @@ -1,20 +1,5 @@ """ -API for [pandas][] accessors. - -As a general note to developers, -we try and keep the accessors as a super-thin layer. -This makes it easier to re-use functionality in a more functional way, -which is beneficial -(particularly if we one day need to switch to -a different kind of dataframe e.g. dask). - -As a result, we effectively duplicate our API. -This is fine, because this repo is not so big. -Pandas and pandas-indexing use pandas' `pandas.util._decorators.docs` decorator -(see https://github.com/pandas-dev/pandas/blob/05de25381f71657bd425d2c4045d81a46b2d3740/pandas/util/_decorators.py#L342) -to avoid duplicating the docs. -We could use the same pattern, but I have found that this magic -almost always goes wrong so I would stay away from this as long as we can. +Accessor for [pd.DataFrame][pandas.DataFrame] """ from __future__ import annotations @@ -55,9 +40,9 @@ ) -class DataFramePandasOpenSCMAccessor: +class PandasDataFrameOpenSCMAccessor: """ - [pd.DataFrame][pandas.DataFrame] accessors + [pd.DataFrame][pandas.DataFrame] accessor For details, see [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). @@ -716,9 +701,9 @@ def to_long_data(self, time_col_name: str = "time") -> pd.DataFrame: >>> import numpy as np >>> import pandas as pd >>> - >>> from pandas_openscm.accessors import register_pandas_accessor + >>> from pandas_openscm.accessors import register_pandas_accessors >>> - >>> register_pandas_accessor() + >>> register_pandas_accessors() >>> >>> df = pd.DataFrame( ... [ @@ -895,24 +880,3 @@ def update_index_levels_from_other( copy=copy, remove_unused_levels=remove_unused_levels, ) - - -def register_pandas_accessor(namespace: str = "openscm") -> None: - """ - Register the pandas accessors - - For details, see - [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). - - We provide this as a separate function - because we have had really bad experiences with imports having side effects - and don't want to pass those on to our users. - - Parameters - ---------- - namespace - Namespace to use for the accessor - """ - pd.api.extensions.register_dataframe_accessor(namespace)( - DataFramePandasOpenSCMAccessor - ) diff --git a/tests/conftest.py b/tests/conftest.py index 9e58a57..fe0af3f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,9 +31,9 @@ def pandas_terminal_width(): @pytest.fixture() -def setup_pandas_accessor() -> None: +def setup_pandas_accessors() -> None: # Not parallel safe, but good enough - pandas_openscm.register_pandas_accessor() + pandas_openscm.register_pandas_accessors() yield None diff --git a/tests/integration/database/test_integration_database_portability.py b/tests/integration/database/test_integration_database_portability.py index e0537f8..06e79af 100644 --- a/tests/integration/database/test_integration_database_portability.py +++ b/tests/integration/database/test_integration_database_portability.py @@ -50,7 +50,7 @@ def test_move_db( # noqa: PLR0913 backend_data, backend_index, tmpdir, - setup_pandas_accessor, + setup_pandas_accessors, ): initial_db_dir = Path(tmpdir) / "initial" other_db_dir = Path(tmpdir) / "other" diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py index 7fd96a6..4d81928 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py @@ -71,7 +71,7 @@ def test_ensure_index_is_multiindex_no_op(copy, copy_exp): @pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False))) -def test_accessor(setup_pandas_accessor, copy, copy_exp): +def test_accessor(setup_pandas_accessors, copy, copy_exp): start = pd.DataFrame( [[1, 2], [3, 4]], columns=[10, 20], diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py b/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py index f09771e..5f7cbb2 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py @@ -221,7 +221,7 @@ def test_set_levels_raises_value_error(): set_levels(start, levels_to_set=levels_to_set) -def test_accessor(setup_pandas_accessor): +def test_accessor(setup_pandas_accessors): start = pd.DataFrame( np.arange(2 * 4).reshape((4, 2)), columns=[2010, 2020], diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_to_category_index.py b/tests/integration/index_manipulation/test_integration_index_manipulation_to_category_index.py index b21540b..d833924 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_to_category_index.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_to_category_index.py @@ -61,7 +61,7 @@ def test_to_category_index_series(): run_checks(res, start) -def test_accessor(setup_pandas_accessor): +def test_accessor(setup_pandas_accessors): units = ["Mt", "kg", "W"] # Biggish DataFrame diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels.py b/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels.py index afec0fe..3e98f23 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels.py @@ -123,7 +123,7 @@ def test_update_levels_missing_level(): update_levels(start, updates=updates) -def test_doesnt_trip_over_droped_levels(setup_pandas_accessor): +def test_doesnt_trip_over_droped_levels(setup_pandas_accessors): def update_func(in_v: int) -> int: if in_v < 0: msg = f"Value must be greater than zero, received {in_v}" @@ -191,7 +191,7 @@ def update_func(in_v: int) -> int: ) -def test_accessor(setup_pandas_accessor): +def test_accessor(setup_pandas_accessors): start = pd.DataFrame( np.arange(2 * 4).reshape((4, 2)), columns=[2010, 2020], @@ -233,7 +233,7 @@ def test_accessor(setup_pandas_accessor): pd.testing.assert_frame_equal(res, exp) -def test_accessor_not_multiindex(setup_pandas_accessor): +def test_accessor_not_multiindex(setup_pandas_accessors): start = pd.DataFrame(np.arange(2 * 4).reshape((4, 2))) error_msg = re.escape( diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels_from_other.py b/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels_from_other.py index 89439e1..b320414 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels_from_other.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_update_levels_from_other.py @@ -181,7 +181,7 @@ def test_update_levels_from_other_missing_level(): update_levels_from_other(start, update_sources=update_sources) -def test_doesnt_trip_over_droped_levels(setup_pandas_accessor): +def test_doesnt_trip_over_droped_levels(setup_pandas_accessors): def update_func(in_v: int) -> int: if in_v < 0: msg = f"Value must be greater than zero, received {in_v}" @@ -256,7 +256,7 @@ def update_func(in_v: int) -> int: ) -def test_accessor(setup_pandas_accessor): +def test_accessor(setup_pandas_accessors): start = pd.DataFrame( np.arange(2 * 4).reshape((4, 2)), columns=[2010, 2020], @@ -298,7 +298,7 @@ def test_accessor(setup_pandas_accessor): pd.testing.assert_frame_equal(res, exp) -def test_accessor_not_multiindex(setup_pandas_accessor): +def test_accessor_not_multiindex(setup_pandas_accessors): start = pd.DataFrame(np.arange(2 * 4).reshape((4, 2))) error_msg = re.escape( diff --git a/tests/integration/test_grouping_integration.py b/tests/integration/test_grouping_integration.py index 07f5267..7dd81bc 100644 --- a/tests/integration/test_grouping_integration.py +++ b/tests/integration/test_grouping_integration.py @@ -20,7 +20,7 @@ (["run", "scenario"], ["variable", "unit"]), ), ) -def test_groupby_except(non_groupers, expected_groups, setup_pandas_accessor): +def test_groupby_except(non_groupers, expected_groups, setup_pandas_accessors): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "W")), n_scenarios=5, @@ -46,7 +46,7 @@ def test_groupby_except(non_groupers, expected_groups, setup_pandas_accessor): "new_name, quantile_exp", ((None, "quantile"), ("percentile", "percentile")) ) def test_fix_index_name_after_groupby_quantile( - new_name, quantile_exp, setup_pandas_accessor + new_name, quantile_exp, setup_pandas_accessors ): fix_kwargs = {} if new_name is not None: diff --git a/tests/integration/test_indexing_integration.py b/tests/integration/test_indexing_integration.py index 46ad841..0c308a2 100644 --- a/tests/integration/test_indexing_integration.py +++ b/tests/integration/test_indexing_integration.py @@ -287,7 +287,7 @@ def test_index_name_aware_lookup(): ), ), ) -def test_mi_loc_same_as_pandas(locator, setup_pandas_accessor): +def test_mi_loc_same_as_pandas(locator, setup_pandas_accessors): """ Test pass through in the cases where pass through should happen diff --git a/tests/integration/test_plotting_integration.py b/tests/integration/test_plotting_integration.py index 83fae3d..f45b34c 100644 --- a/tests/integration/test_plotting_integration.py +++ b/tests/integration/test_plotting_integration.py @@ -112,7 +112,7 @@ def check_plots_incl_quantile_calculation( plt.close() -def test_plot_plume_default(tmp_path, image_regression, setup_pandas_accessor): +def test_plot_plume_default(tmp_path, image_regression, setup_pandas_accessors): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "K")), n_scenarios=5, @@ -131,7 +131,7 @@ def test_plot_plume_default(tmp_path, image_regression, setup_pandas_accessor): ) -def test_default_ax_auto_creation(tmp_path, image_regression, setup_pandas_accessor): +def test_default_ax_auto_creation(tmp_path, image_regression, setup_pandas_accessors): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "K")), n_scenarios=5, @@ -154,7 +154,7 @@ def test_default_ax_auto_creation(tmp_path, image_regression, setup_pandas_acces image_regression.check(out_file.read_bytes(), diff_threshold=0.01) -def test_plot_plume_no_labels(tmp_path, image_regression, setup_pandas_accessor): +def test_plot_plume_no_labels(tmp_path, image_regression, setup_pandas_accessors): df = create_test_df( variables=(("variable_1", "K"),), n_scenarios=5, @@ -178,7 +178,7 @@ def test_plot_plume_no_labels(tmp_path, image_regression, setup_pandas_accessor) def test_plot_plume_with_other_plot_calls( - tmp_path, image_regression, setup_pandas_accessor + tmp_path, image_regression, setup_pandas_accessors ): fig, ax = plt.subplots() @@ -280,7 +280,7 @@ def test_plot_plume_with_other_plot_calls( ), ) def test_plot_plume_quantiles( - quantiles_plumes, tmp_path, image_regression, setup_pandas_accessor + quantiles_plumes, tmp_path, image_regression, setup_pandas_accessors ): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "K")), @@ -328,7 +328,7 @@ def test_plot_plume_quantile_over( # noqa: PLR0913 kwargs, tmp_path, image_regression, - setup_pandas_accessor, + setup_pandas_accessors, ): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "W")), @@ -357,7 +357,7 @@ def test_plot_plume_quantile_over( # noqa: PLR0913 def test_plot_plume_extra_palette( tmp_path, image_regression, - setup_pandas_accessor, + setup_pandas_accessors, ): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "W")), @@ -393,7 +393,7 @@ def test_plot_plume_extra_palette( def test_plot_plume_missing_from_palette( tmp_path, image_regression, - setup_pandas_accessor, + setup_pandas_accessors, ): df = create_test_df( variables=(("variable_1", "K"),), @@ -434,7 +434,7 @@ def test_plot_plume_missing_from_palette( def test_plot_plume_extra_dashes( tmp_path, image_regression, - setup_pandas_accessor, + setup_pandas_accessors, ): df = create_test_df( variables=(("variable_1", "W"), ("variable_2", "W")), @@ -468,7 +468,7 @@ def test_plot_plume_extra_dashes( def test_plot_plume_missing_from_dashes( tmp_path, image_regression, - setup_pandas_accessor, + setup_pandas_accessors, ): df = create_test_df( variables=(("variable_1", "W"), ("variable_2", "W")), @@ -530,7 +530,7 @@ def test_plot_plume_missing_from_dashes( ), ) def test_plot_plume_missing_quantiles( # noqa: PLR0913 - quantiles, quantiles_plumes, exp, setup_pandas_accessor, image_regression, tmp_path + quantiles, quantiles_plumes, exp, setup_pandas_accessors, image_regression, tmp_path ): df = create_test_df( variables=(("variable_1", "K"), ("variable_2", "K")), @@ -552,7 +552,7 @@ def test_plot_plume_missing_quantiles( # noqa: PLR0913 def test_plot_plume_missing_multiple_quantiles( - setup_pandas_accessor, + setup_pandas_accessors, image_regression, tmp_path, recwarn, @@ -587,7 +587,7 @@ def test_plot_plume_missing_multiple_quantiles( ) -def test_plot_plume_option_passing(setup_pandas_accessor, image_regression, tmp_path): +def test_plot_plume_option_passing(setup_pandas_accessors, image_regression, tmp_path): openscm_units = pytest.importorskip("openscm_units") openscm_units.unit_registry.setup_matplotlib(enable=True) @@ -658,7 +658,7 @@ def create_legend(ax, handles) -> None: def test_plot_plume_after_calculating_quantiles_option_passing( - setup_pandas_accessor, image_regression, tmp_path + setup_pandas_accessors, image_regression, tmp_path ): openscm_units = pytest.importorskip("openscm_units") openscm_units.unit_registry.setup_matplotlib(enable=True) @@ -742,7 +742,7 @@ def create_legend(ax, handles) -> None: ), ) def test_plot_plume_unit_aware( - unit_aware, variables, setup_pandas_accessor, image_regression, tmp_path + unit_aware, variables, setup_pandas_accessors, image_regression, tmp_path ): """ Make sure that we can do unit-aware plots @@ -796,7 +796,7 @@ def test_plot_plume_unit_aware( ur.setup_matplotlib(enable=False) -def test_plot_plume_unit_aware_incompatible_units(setup_pandas_accessor): +def test_plot_plume_unit_aware_incompatible_units(setup_pandas_accessors): """ Make sure that we can do unit-aware plots and errors are caught @@ -931,7 +931,7 @@ def test_get_values_line_unit_aware_no_pint(): get_values_line(df, unit_aware=True, unit_var="unit", time_units="yr") -def test_get_values_plume_unit_aware_no_pint(setup_pandas_accessor): +def test_get_values_plume_unit_aware_no_pint(setup_pandas_accessors): df = ( create_test_df( variables=(("variable_1", "K"), ("variable_2", "K")), diff --git a/tests/integration/test_reshaping.py b/tests/integration/test_reshaping.py index 83e8368..69bf439 100644 --- a/tests/integration/test_reshaping.py +++ b/tests/integration/test_reshaping.py @@ -20,7 +20,7 @@ ("year", "year"), ), ) -def test_to_long_data_basic(setup_pandas_accessor, time_col_name, time_col_name_exp): +def test_to_long_data_basic(setup_pandas_accessors, time_col_name, time_col_name_exp): kwargs = {} if time_col_name is not None: kwargs["time_col_name"] = time_col_name_exp @@ -39,7 +39,7 @@ def test_to_long_data_basic(setup_pandas_accessor, time_col_name, time_col_name_ pd.testing.assert_frame_equal(res, exp, check_like=True) -def test_to_long_data_nan_handling(setup_pandas_accessor): +def test_to_long_data_nan_handling(setup_pandas_accessors): df = pd.DataFrame( [[1, np.nan, 1.2], [2.1, 10.2, np.nan]], columns=[2010.0, 2015.0, 2025.0], @@ -61,7 +61,7 @@ def test_to_long_data_nan_handling(setup_pandas_accessor): pd.testing.assert_frame_equal(res, exp) -def test_to_long_data_nan_handling_index(setup_pandas_accessor): +def test_to_long_data_nan_handling_index(setup_pandas_accessors): df = pd.DataFrame( [[1.1, 0.8, 1.2], [2.1, 10.2, 8.4]], columns=[2010.0, 2015.0, 2025.0], diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 960842a..60d66c7 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -87,13 +87,13 @@ def test_convert_unit_unknown_mapping_type(): @check_auto_index_casting_df @pytest.mark.parametrize( - "unit, exp_unit", + "unit_level, exp_unit_level", ( pytest.param(None, "unit", id="default"), ("units", "units"), ), ) -def test_convert_unit_single_unit(unit, exp_unit, only_two_index_levels_df): +def test_convert_unit_single_unit(unit_level, exp_unit_level, only_two_index_levels_df): pytest.importorskip("pint") start = create_test_df( @@ -113,17 +113,13 @@ def test_convert_unit_single_unit(unit, exp_unit, only_two_index_levels_df): ].reset_index(["scenario", "run"], drop=True) call_kwargs = {} - if unit is not None: - start = ( - start.reset_index("unit") - .rename({"unit": unit}, axis="columns") - .set_index(unit, append=True) - ) - call_kwargs["unit_level"] = unit + if unit_level is not None: + start = start.rename_axis(index={"unit": unit_level}) + call_kwargs["unit_level"] = unit_level res = convert_unit(start, "K", **call_kwargs) - assert (res.index.get_level_values(exp_unit) == "K").all() + assert (res.index.get_level_values(exp_unit_level) == "K").all() np.testing.assert_equal( res.loc[res.index.get_level_values("variable") == "Cold", :].values, @@ -747,34 +743,18 @@ def test_convert_unit_like_unit_level_handling( call_kwargs = {} if df_unit_level is not None: - start = ( - start.reset_index("unit") - .rename({"unit": df_unit_level}, axis="columns") - .set_index(df_unit_level, append=True) - ) + start = start.rename_axis(index={"unit": df_unit_level}) call_kwargs["df_unit_level"] = df_unit_level if target_unit_level is not None: - target = ( - target.reset_index("unit") - .rename({"unit": target_unit_level}, axis="columns") - .set_index(target_unit_level, append=True) - ) + target = target.rename_axis(index={"unit": target_unit_level}) call_kwargs["target_unit_level"] = target_unit_level else: - target = ( - target.reset_index("unit") - .rename({"unit": df_unit_level}, axis="columns") - .set_index(df_unit_level, append=True) - ) + target = target.rename_axis(index={"unit": df_unit_level}) elif target_unit_level is not None: - target = ( - target.reset_index("unit") - .rename({"unit": target_unit_level}, axis="columns") - .set_index(target_unit_level, append=True) - ) + target = target.rename_axis(index={"unit": target_unit_level}) call_kwargs["target_unit_level"] = target_unit_level res = convert_unit_like(start, target, **call_kwargs) @@ -848,3 +828,101 @@ def test_convert_unit_from_target_series_no_pint_error(): ), ): convert_unit_from_target_series(start, desired_unit) + + +def test_accessor_convert_unit(setup_pandas_accessors): + # Do most complex case: supply a series with different unit level + # and required unit registry + openscm_units = pytest.importorskip("openscm_units") + + start = create_test_df( + variables=[ + ("temperature", "K"), + ("erf", "W / m^2"), + ("ohc", "ZJ"), + ("emissions", "GtC"), + ], + n_scenarios=2, + n_runs=2, + timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), + ).rename_axis(index={"unit": "units"}) + + desired_units = ( + start.loc[start.index.get_level_values("variable") != "temperature"] + .reset_index("units")["units"] + .replace({"W / m^2": "ZJ / yr / m^2", "ZJ": "PJ", "GtC": "MtCO2"}) + ) + + res = start.openscm.convert_unit( + desired_units, unit_level="units", ur=openscm_units.unit_registry + ) + + np.testing.assert_allclose( + res.loc[res.index.get_level_values("variable") == "temperature", :].values, + start.loc[start.index.get_level_values("variable") == "temperature", :].values, + ) + + np.testing.assert_allclose( + res.loc[res.index.get_level_values("variable") == "erf", :].values, + (60.0 * 60.0 * 24.0 * 365.25) + * 1e-21 + * start.loc[start.index.get_level_values("variable") == "erf", :].values, + ) + + np.testing.assert_allclose( + res.loc[res.index.get_level_values("variable") == "ohc", :].values, + 1e6 * start.loc[start.index.get_level_values("variable") == "ohc", :].values, + ) + + np.testing.assert_allclose( + res.loc[res.index.get_level_values("variable") == "emissions", :].values, + 44.0 + / 12.0 + * 1000.0 + * start.loc[start.index.get_level_values("variable") == "emissions", :].values, + ) + + +def test_accessor_convert_unit_like(setup_pandas_accessors): + # Do most complex case: supply a series with different unit level + # and required unit registry + openscm_units = pytest.importorskip("openscm_units") + + start = create_test_df( + variables=[ + ("temperature", "K"), + ("erf", "W / m^2"), + ("ohc", "ZJ"), + ("emissions", "GtC"), + ], + n_scenarios=2, + n_runs=2, + timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), + ).rename_axis(index={"unit": "units"}) + + target = create_test_df( + variables=[ + ("temperature", "mK"), + ("erf", "W / m^2"), + ("ohc", "PJ"), + ("emissions", "MtC"), + ], + n_scenarios=2, + n_runs=2, + timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), + ).rename_axis(index={"unit": "unit_level"}) + + res = start.openscm.convert_unit_like( + target, + df_unit_level="units", + target_unit_level="unit_level", + ur=openscm_units.unit_registry, + ) + + exp = start.openscm.convert_unit( + {"K": "mK", "ZJ": "PJ", "GtC": "MtC"}, + unit_level="units", + ur=openscm_units.unit_registry, + ) + + assert_frame_alike(res, exp) From f1de5fd3e3d1c61503691d47426d6891169e5721 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 18:05:56 +0200 Subject: [PATCH 02/11] Add better support for series --- src/pandas_openscm/unit_conversion.py | 101 +++++++++------- tests/integration/test_unit_conversion.py | 140 +++++++++++++++------- 2 files changed, 153 insertions(+), 88 deletions(-) diff --git a/src/pandas_openscm/unit_conversion.py b/src/pandas_openscm/unit_conversion.py index 3aa1659..b3fee83 100644 --- a/src/pandas_openscm/unit_conversion.py +++ b/src/pandas_openscm/unit_conversion.py @@ -40,13 +40,14 @@ def __init__(self, missing_ts: pd.MultiIndex) -> None: def convert_unit_from_target_series( - df: pd.DataFrame, + # TODO: update type hint to pd.Series[supported numerical types here] + pobj: pd.DataFrame, desired_units: pd.Series[str], unit_level: str = "unit", ur: pint.facets.PlainRegistry | None = None, ) -> pd.DataFrame: """ - Convert `df`'s units based on a [pd.Series][pandas.Series] + Convert `pobj`'s units based on a [pd.Series][pandas.Series] `desired_uni` defines the units to convert to. This is a relatively low-level function, @@ -54,17 +55,17 @@ def convert_unit_from_target_series( Parameters ---------- - df - [pd.DataFrame][pandas.DataFrame] whose units should be converted + pobj + Supported [pandas][] object whose units should be converted desired_units - Desired unit(s) for `df` + Desired unit(s) for `pobj` This must be a [pd.Series][pandas.Series] - with an index that contains all the rows in `df`. + with an index that contains all the rows in `pobj`. unit_level - Level in `df`'s index which holds unit information + Level in `pobj`'s index which holds unit information ur Unit registry to use for the conversion. @@ -74,12 +75,12 @@ def convert_unit_from_target_series( Returns ------- : - `df` with converted units + `pobj` with converted units Raises ------ AssertionError - `desired_units`'s index does not contain all the rows in `df` + `desired_units`'s index does not contain all the rows in `pobj` MissingOptionalDependencyError `ur` is `None` and [pint](https://pint.readthedocs.io/) is not available. @@ -123,29 +124,30 @@ def convert_unit_from_target_series( """ desired_units = ensure_index_is_multiindex(desired_units) - df_rows_checker = ensure_is_multiindex(df.index.droplevel(unit_level)) + df_rows_checker = ensure_is_multiindex(pobj.index.droplevel(unit_level)) missing_rows = df_rows_checker.difference( # type: ignore # pandas-stubs missing API desired_units.index.reorder_levels(df_rows_checker.names) # type: ignore # pandas-stubs missing API ) if not missing_rows.empty: raise MissingDesiredUnitError(missing_rows) - df_reset_unit = ensure_index_is_multiindex(df.reset_index(unit_level), copy=False) - - df_units = df_reset_unit[unit_level] + pobj_units = pd.Series( + pobj.index.get_level_values(unit_level), + index=ensure_is_multiindex(pobj.index.droplevel(unit_level)), + ) - desired_units_in_df = multi_index_lookup(desired_units, df_units.index) # type: ignore # already checked that df_units.index is MultiIndex + desired_units_in_pobj = multi_index_lookup(desired_units, pobj_units.index) # type: ignore # already checked that df_units.index is MultiIndex # Don't need to align, pandas does that for us. # If you want to check, compare the below with # unit_map = pd.DataFrame([df_units, desired_units_in_df.sample(frac=1)]).T unit_map = pd.DataFrame( - [df_units.rename("df_unit"), desired_units_in_df.rename("target_unit")] + [pobj_units.rename("pobj_unit"), desired_units_in_pobj.rename("target_unit")] ).T - unit_changes = unit_map["df_unit"] != unit_map["target_unit"] + unit_changes = unit_map["pobj_unit"] != unit_map["target_unit"] if not unit_changes.any(): # Already all in desired unit - return df + return pobj if ur is None: try: @@ -157,35 +159,36 @@ def convert_unit_from_target_series( "convert_unit_from_target_series(..., ur=None, ...)", "pint" ) - df_no_unit = df_reset_unit.drop(unit_level, axis="columns") - for (df_unit, target_unit), conversion_df in unit_map[unit_changes].groupby( - ["df_unit", "target_unit"] + pobj_no_unit = ensure_index_is_multiindex(pobj.reset_index(unit_level, drop=True)) + for (pobj_unit, target_unit), conversion_df in unit_map[unit_changes].groupby( + ["pobj_unit", "target_unit"] ): - to_alter_loc = multi_index_match(df_no_unit.index, conversion_df.index) # type: ignore - df_no_unit.loc[to_alter_loc, :] = ( - ur.Quantity(df_no_unit.loc[to_alter_loc, :].values, df_unit) + to_alter_loc = multi_index_match(pobj_no_unit.index, conversion_df.index) # type: ignore + pobj_no_unit.loc[to_alter_loc, :] = ( + ur.Quantity(pobj_no_unit.loc[to_alter_loc, :].values, pobj_unit) .to(target_unit) .m ) - new_units = (unit_map.reorder_levels(df_no_unit.index.names).loc[df_no_unit.index])[ - "target_unit" - ] - res = set_index_levels_func(df_no_unit, {unit_level: new_units}).reorder_levels( - df.index.names + new_units = ( + unit_map.reorder_levels(pobj_no_unit.index.names).loc[pobj_no_unit.index] + )["target_unit"] + res = set_index_levels_func(pobj_no_unit, {unit_level: new_units}).reorder_levels( + pobj.index.names ) return res def convert_unit( - df: pd.DataFrame, + # TODO: update type hint to pd.Series[supported numerical types here] + pobj: pd.DataFrame, desired_units: str | Mapping[str, str] | pd.Series[str], unit_level: str = "unit", ur: pint.facets.PlainRegistry | None = None, ) -> pd.DataFrame: """ - Convert a [pd.DataFrame][pandas.DataFrame]'s units + Convert a supported [pandas][] object's units This uses [convert_unit_from_target_series][(m).]. If you want to understand the details of how the conversion works, @@ -193,31 +196,31 @@ def convert_unit( Parameters ---------- - df - [pd.DataFrame][pandas.DataFrame] whose units should be converted + pobj + Supported [pandas][] object whose units should be converted desired_units - Desired unit(s) for `df` + Desired unit(s) for `pobj` If this is a string, - we attempt to convert all timeseries in `df` to the given unit. + we attempt to convert all timeseries in `pobj` to the given unit. If this is a mapping, we convert the given units to the target units. - Be careful using this form - you need to be certain of the units in `df`. - If any of your keys don't match the units in `df` + Be careful using this form - you need to be certain of the units in `pobj`. + If any of your keys don't match the units in `pobj` (even by a single whitespace character) then the unit conversion will not happen. If this is a [pd.Series][pandas.Series], then it will be passed to [convert_unit_from_target_series][(m).] - after filling any rows in `df` that are not in `desired_units` - with the unit from `df` (i.e. unspecified rows are not converted). + after filling any rows in `pobj` that are not in `desired_units` + with the unit from `pobj` (i.e. unspecified rows are not converted). For further details, see examples unit_level - Level in `df`'s index which holds unit information + Level in `pobj`'s index which holds unit information Passed to [convert_unit_from_target_series][(m).]. @@ -229,7 +232,7 @@ def convert_unit( Returns ------- : - `df` with converted units + `pobj` with converted units Examples -------- @@ -256,6 +259,14 @@ def convert_unit( sb temperature K 1.100 1.200 1.300 body temperature K 310.150 311.250 311.050 >>> + >>> # Same thing with a series as input + >>> convert_unit(start[2030], "K") + 2030 + scenario variable unit + sa temperature K 0.002 + sb temperature K 1.200 + body temperature K 311.250 + >>> >>> # Convert using a mapping. >>> # Units that aren't specified in the mapping aren't converted. >>> convert_unit(start, {"mK": "K", "K": "kK"}) @@ -296,8 +307,8 @@ def convert_unit( body temperature degF 98.600 100.580 100.220 """ df_units_s = ensure_index_is_multiindex( - df.index.get_level_values(unit_level).to_series( - index=df.index.droplevel(unit_level), name="df_unit" + pobj.index.get_level_values(unit_level).to_series( + index=pobj.index.droplevel(unit_level), name="df_unit" ) ) @@ -307,7 +318,7 @@ def convert_unit( # hence I am ok with it. if isinstance(desired_units, str): desired_units_s = pd.Series( - [desired_units] * df.shape[0], + [desired_units] * pobj.shape[0], index=df_units_s.index, ) @@ -329,7 +340,7 @@ def convert_unit( raise NotImplementedError(type(desired_units)) res = convert_unit_from_target_series( - df=df, desired_units=desired_units_s, unit_level=unit_level, ur=ur + pobj=pobj, desired_units=desired_units_s, unit_level=unit_level, ur=ur ) return res @@ -509,7 +520,7 @@ def convert_unit_like( ).fillna(df_units_s) res = convert_unit_from_target_series( - df=df, desired_units=target_units_s, unit_level=df_unit_level, ur=ur + pobj=df, desired_units=target_units_s, unit_level=df_unit_level, ur=ur ) return res diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 60d66c7..8720eb0 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -6,6 +6,7 @@ import re import sys +from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload from unittest.mock import patch import numpy as np @@ -25,8 +26,11 @@ convert_unit_like, ) -check_auto_index_casting_df = pytest.mark.parametrize( - "only_two_index_levels_df", +if TYPE_CHECKING: + P = TypeVar("P", pd.DataFrame | pd.Series[Any]) + +check_auto_index_casting_pobj = pytest.mark.parametrize( + "only_two_index_levels_pobj", ( pytest.param(True, id="only_two_index_levels"), pytest.param(False, id="more_than_two_index_levels"), @@ -39,9 +43,52 @@ This parameterisation ensures that we check this edge case. """ +pobj_type = pytest.mark.parametrize( + "pobj_type", + ("DataFrame", "Series"), +) +""" +Parameterisation to use to check handling of both DataFrame and Series +""" + + +@overload +def convert_to_desired_type( + pobj: pd.DataFrame, pobj_type: Literal["DataFrame"] +) -> pd.DataFrame: ... + + +@overload +def convert_to_desired_type( + pobj: pd.DataFrame, pobj_type: Literal["Series"] +) -> pd.Series[Any]: ... + + +def convert_to_desired_type( + df: pd.DataFrame, pobj_type: Literal["DataFrame", "Series"] +) -> pd.DataFrame | pd.Series[Any]: + if pobj_type == "DataFrame": + return df -@check_auto_index_casting_df -def test_convert_unit_no_op(only_two_index_levels_df): + if pobj_type == "Series": + res = df[df.columns[0]] + return res + + raise NotImplementedError(pobj_type) + + +def check_result(res: P, exp: P) -> None: + if isinstance(res, pd.DataFrame): + pd.testing.assert_frame_equal(res, exp) + elif isinstance(res, pd.Series): + pd.testing.assert_series_equal(res, exp) + else: + raise NotImplementedError(type(res)) + + +@pobj_type +@check_auto_index_casting_pobj +def test_convert_unit_no_op(only_two_index_levels_pobj, pobj_type): start = create_test_df( variables=[ ("Cold", "mK"), @@ -52,17 +99,19 @@ def test_convert_unit_no_op(only_two_index_levels_df): n_runs=3, timepoints=np.array([1.0, 2.0, 3.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + res = convert_unit( start, start.index.to_frame()["unit"].reset_index("unit", drop=True) ) - pd.testing.assert_frame_equal(res, start) + check_result(res, start) def test_convert_unit_unknown_mapping_type(): @@ -85,7 +134,8 @@ def test_convert_unit_unknown_mapping_type(): ) -@check_auto_index_casting_df +@pobj_type +@check_auto_index_casting_pobj @pytest.mark.parametrize( "unit_level, exp_unit_level", ( @@ -93,7 +143,9 @@ def test_convert_unit_unknown_mapping_type(): ("units", "units"), ), ) -def test_convert_unit_single_unit(unit_level, exp_unit_level, only_two_index_levels_df): +def test_convert_unit_single_unit( + unit_level, exp_unit_level, only_two_index_levels_pobj, pobj_type +): pytest.importorskip("pint") start = create_test_df( @@ -106,12 +158,14 @@ def test_convert_unit_single_unit(unit_level, exp_unit_level, only_two_index_lev n_runs=3, timepoints=np.array([1.0, 2.0, 3.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + call_kwargs = {} if unit_level is not None: start = start.rename_axis(index={"unit": unit_level}) @@ -166,8 +220,8 @@ def test_convert_unit_ur_injection(): ) -@check_auto_index_casting_df -def test_convert_unit_mapping(only_two_index_levels_df): +@check_auto_index_casting_pobj +def test_convert_unit_mapping(only_two_index_levels_pobj): pytest.importorskip("pint") start = create_test_df( @@ -180,7 +234,7 @@ def test_convert_unit_mapping(only_two_index_levels_df): n_runs=2, timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -208,8 +262,8 @@ def test_convert_unit_mapping(only_two_index_levels_df): ) -@check_auto_index_casting_df -def test_convert_series(only_two_index_levels_df): +@check_auto_index_casting_pobj +def test_convert_series(only_two_index_levels_pobj): pytest.importorskip("pint") # Check that conversion works if user supplies a Series of target units @@ -223,7 +277,7 @@ def test_convert_series(only_two_index_levels_df): n_runs=2, timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -255,8 +309,8 @@ def test_convert_series(only_two_index_levels_df): ) -@check_auto_index_casting_df -def test_convert_series_all_rows(only_two_index_levels_df): +@check_auto_index_casting_pobj +def test_convert_series_all_rows(only_two_index_levels_pobj): pytest.importorskip("pint") start = create_test_df( @@ -269,7 +323,7 @@ def test_convert_series_all_rows(only_two_index_levels_df): n_runs=2, timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -299,8 +353,8 @@ def test_convert_series_all_rows(only_two_index_levels_df): ) -@check_auto_index_casting_df -def test_convert_series_extra_rows(only_two_index_levels_df): +@check_auto_index_casting_pobj +def test_convert_series_extra_rows(only_two_index_levels_pobj): pytest.importorskip("pint") start = create_test_df( @@ -313,7 +367,7 @@ def test_convert_series_extra_rows(only_two_index_levels_df): n_runs=2, timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -323,7 +377,7 @@ def test_convert_series_extra_rows(only_two_index_levels_df): {"W / m^2": "ZJ / yr / m^2", "ZJ": "PJ"} ) # Extra rows that aren't in start, should be ignored and not cause failures - if only_two_index_levels_df: + if only_two_index_levels_pobj: desired_units.loc[("carbon")] = "GtC" else: @@ -349,8 +403,8 @@ def test_convert_series_extra_rows(only_two_index_levels_df): ) -@check_auto_index_casting_df -def test_convert_unit_like_no_op(only_two_index_levels_df): +@check_auto_index_casting_pobj +def test_convert_unit_like_no_op(only_two_index_levels_pobj): start = create_test_df( variables=[ ("Cold", "mK"), @@ -361,7 +415,7 @@ def test_convert_unit_like_no_op(only_two_index_levels_df): n_runs=3, timepoints=np.array([1.0, 2.0, 3.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -387,10 +441,10 @@ def test_convert_unit_like_no_op(only_two_index_levels_df): """ -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): pytest.importorskip("pint") @@ -408,7 +462,7 @@ def test_convert_unit_like( ], **create_kwargs, ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -435,10 +489,10 @@ def test_convert_unit_like( assert_frame_alike(res, exp) -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_missing_levels( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): pytest.importorskip("pint") @@ -453,7 +507,7 @@ def test_convert_unit_like_missing_levels( n_runs=2, timepoints=np.array([2020.0, 2030.0, 2040.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -479,10 +533,10 @@ def test_convert_unit_like_missing_levels( assert_frame_alike(res, exp) -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_missing_specs( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): """ @@ -500,7 +554,7 @@ def test_convert_unit_like_missing_specs( n_runs=2, timepoints=np.array([2020.0, 2030.0, 2040.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -526,10 +580,10 @@ def test_convert_unit_like_missing_specs( assert_frame_alike(res, exp) -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_levels_ok( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): pytest.importorskip("pint") @@ -544,7 +598,7 @@ def test_convert_unit_like_extra_levels_ok( n_runs=2, timepoints=np.array([2020.0, 2030.0, 2040.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -573,10 +627,10 @@ def test_convert_unit_like_extra_levels_ok( assert_frame_alike(res, exp) -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_levels_ambiguous_error( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): start = create_test_df( @@ -589,7 +643,7 @@ def test_convert_unit_like_extra_levels_ambiguous_error( n_runs=2, timepoints=np.array([2020.0, 2030.0, 2040.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) @@ -623,10 +677,10 @@ def test_convert_unit_like_extra_levels_ambiguous_error( convert_unit_like(start, target) -@check_auto_index_casting_df +@check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_specs( - only_two_index_levels_df, + only_two_index_levels_pobj, only_two_index_levels_target, ): """ @@ -644,7 +698,7 @@ def test_convert_unit_like_extra_specs( n_runs=2, timepoints=np.array([2020.0, 2030.0, 2040.0]), ) - if only_two_index_levels_df: + if only_two_index_levels_pobj: start = start.loc[ (start.index.get_level_values("scenario") == "scenario_0") & (start.index.get_level_values("run") == 0) From 574942bd9aab82eae1505253927ceb58d91e2811 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 18:09:55 +0200 Subject: [PATCH 03/11] Update convert_unit --- tests/integration/test_unit_conversion.py | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 8720eb0..00a85b9 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -220,8 +220,9 @@ def test_convert_unit_ur_injection(): ) +@pobj_type @check_auto_index_casting_pobj -def test_convert_unit_mapping(only_two_index_levels_pobj): +def test_convert_unit_mapping(only_two_index_levels_pobj, pobj_type): pytest.importorskip("pint") start = create_test_df( @@ -240,6 +241,8 @@ def test_convert_unit_mapping(only_two_index_levels_pobj): & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + # Don't convert W / m^2 res = convert_unit(start, {"K": "degC", "ZJ": "J"}) @@ -262,8 +265,9 @@ def test_convert_unit_mapping(only_two_index_levels_pobj): ) +@pobj_type @check_auto_index_casting_pobj -def test_convert_series(only_two_index_levels_pobj): +def test_convert_series(only_two_index_levels_pobj, pobj_type): pytest.importorskip("pint") # Check that conversion works if user supplies a Series of target units @@ -283,6 +287,8 @@ def test_convert_series(only_two_index_levels_pobj): & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + desired_units = ( start.loc[start.index.get_level_values("variable") != "temperature"] .reset_index("unit")["unit"] @@ -309,8 +315,9 @@ def test_convert_series(only_two_index_levels_pobj): ) +@pobj_type @check_auto_index_casting_pobj -def test_convert_series_all_rows(only_two_index_levels_pobj): +def test_convert_series_all_rows(only_two_index_levels_pobj, pobj_type): pytest.importorskip("pint") start = create_test_df( @@ -329,6 +336,8 @@ def test_convert_series_all_rows(only_two_index_levels_pobj): & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + desired_units = start.reset_index("unit")["unit"].replace( {"W / m^2": "ZJ / yr / m^2", "ZJ": "PJ"} ) @@ -353,8 +362,9 @@ def test_convert_series_all_rows(only_two_index_levels_pobj): ) +@pobj_type @check_auto_index_casting_pobj -def test_convert_series_extra_rows(only_two_index_levels_pobj): +def test_convert_series_extra_rows(only_two_index_levels_pobj, pobj_type): pytest.importorskip("pint") start = create_test_df( @@ -373,6 +383,8 @@ def test_convert_series_extra_rows(only_two_index_levels_pobj): & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + desired_units = start.reset_index("unit")["unit"].replace( {"W / m^2": "ZJ / yr / m^2", "ZJ": "PJ"} ) @@ -403,8 +415,9 @@ def test_convert_series_extra_rows(only_two_index_levels_pobj): ) +@pobj_type @check_auto_index_casting_pobj -def test_convert_unit_like_no_op(only_two_index_levels_pobj): +def test_convert_unit_like_no_op(only_two_index_levels_pobj, pobj_type): start = create_test_df( variables=[ ("Cold", "mK"), @@ -421,9 +434,11 @@ def test_convert_unit_like_no_op(only_two_index_levels_pobj): & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + res = convert_unit_like(start, start) - pd.testing.assert_frame_equal(res, start) + check_result(res, start) check_auto_index_casting_target = pytest.mark.parametrize( From 3647524f332f0553fb6ae2e9b660d2bd22110cd9 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 18:35:26 +0200 Subject: [PATCH 04/11] Up to series accessor --- tests/integration/test_unit_conversion.py | 117 +++++++++++++++++++--- 1 file changed, 101 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 00a85b9..2ba41dc 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -79,7 +79,7 @@ def convert_to_desired_type( def check_result(res: P, exp: P) -> None: if isinstance(res, pd.DataFrame): - pd.testing.assert_frame_equal(res, exp) + assert_frame_alike(res, exp) elif isinstance(res, pd.Series): pd.testing.assert_series_equal(res, exp) else: @@ -455,12 +455,27 @@ def test_convert_unit_like_no_op(only_two_index_levels_pobj, pobj_type): This parameterisation ensures that we check this edge case. """ +target_type = pytest.mark.parametrize( + "target_type", + ( + pytest.param("DataFrame", id="target_DataFrame"), + pytest.param("Series", id="target_Series"), + ), +) +""" +Parameterisation to use to check handling of both DataFrame and Series as the target +""" + +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): pytest.importorskip("pint") @@ -483,6 +498,8 @@ def test_convert_unit_like( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[ ("Cold", "microK"), @@ -497,18 +514,24 @@ def test_convert_unit_like( & (target.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + target = convert_to_desired_type(target, target_type) + res = convert_unit_like(start, target) exp = convert_unit(start, {"mK": "microK", "kK": "MK", "degC": "degF"}) - assert_frame_alike(res, exp) + check_result(res, exp) +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_missing_levels( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): pytest.importorskip("pint") @@ -528,6 +551,8 @@ def test_convert_unit_like_missing_levels( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[ ("Cold", "K"), @@ -541,18 +566,24 @@ def test_convert_unit_like_missing_levels( if only_two_index_levels_target: target = target.reset_index("scenario", drop=True) + target = convert_to_desired_type(target, target_type) + res = convert_unit_like(start, target) exp = convert_unit(start, {"mK": "K", "kK": "K", "degC": "degF"}) - assert_frame_alike(res, exp) + check_result(res, exp) +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_missing_specs( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): """ Test conversion when the target doesn't specify a unit for all rows in start @@ -575,6 +606,8 @@ def test_convert_unit_like_missing_specs( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[ ("Cold", "K"), @@ -588,18 +621,24 @@ def test_convert_unit_like_missing_specs( if only_two_index_levels_target: target = target.reset_index("scenario", drop=True) + target = convert_to_desired_type(target, target_type) + res = convert_unit_like(start, target) exp = convert_unit(start, {"mK": "K", "degC": "degF"}) - assert_frame_alike(res, exp) + check_result(res, exp) +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_levels_ok( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): pytest.importorskip("pint") @@ -619,6 +658,8 @@ def test_convert_unit_like_extra_levels_ok( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = set_index_levels_func( create_test_df( variables=[ @@ -635,18 +676,24 @@ def test_convert_unit_like_extra_levels_ok( if only_two_index_levels_target: target = target.reset_index("scenario", drop=True) + target = convert_to_desired_type(target, target_type) + res = convert_unit_like(start, target) exp = convert_unit(start, {"mK": "K", "kK": "K", "degC": "degF"}) - assert_frame_alike(res, exp) + check_result(res, exp) +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_levels_ambiguous_error( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): start = create_test_df( variables=[ @@ -664,6 +711,8 @@ def test_convert_unit_like_extra_levels_ambiguous_error( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = pd.DataFrame( np.arange(3 * 6).reshape((6, 3)), columns=np.array([1.0, 10.0, 100.0]), @@ -688,15 +737,21 @@ def test_convert_unit_like_extra_levels_ambiguous_error( :, ].reset_index(["model", "scenario"], drop=True) + target = convert_to_desired_type(target, target_type) + with pytest.raises(AmbiguousTargetUnitError): convert_unit_like(start, target) +@pobj_type +@target_type @check_auto_index_casting_pobj @check_auto_index_casting_target def test_convert_unit_like_extra_specs( only_two_index_levels_pobj, only_two_index_levels_target, + target_type, + pobj_type, ): """ Test conversion when the target has a unit for rows that aren't in start @@ -719,6 +774,8 @@ def test_convert_unit_like_extra_specs( & (start.index.get_level_values("run") == 0) ].reset_index(["scenario", "run"], drop=True) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[ ("Cold", "K"), @@ -733,11 +790,13 @@ def test_convert_unit_like_extra_specs( if only_two_index_levels_target: target = target.reset_index("scenario", drop=True) + target = convert_to_desired_type(target, target_type) + res = convert_unit_like(start, target) exp = convert_unit(start, {"mK": "K", "kK": "K", "degC": "degF"}) - assert_frame_alike(res, exp) + check_result(res, exp) def test_convert_unit_like_ur_injection(): @@ -782,17 +841,24 @@ def test_convert_unit_like_ur_injection(): assert_frame_alike(res, exp) +@pobj_type +@target_type @pytest.mark.parametrize( "df_unit_level, df_unit_level_exp, target_unit_level, target_unit_level_exp", ( pytest.param(None, "unit", None, "unit", id="default"), - pytest.param("units", "units", None, "units", id="target-inferred-from-df"), - pytest.param("units", "units", "unit", "unit", id="target-df-differ"), + pytest.param("units", "units", None, "units", id="target-inferred-from-other"), + pytest.param("units", "units", "unit", "unit", id="target-other-differ"), pytest.param(None, "unit", "units", "units", id="target-specified-only"), ), ) -def test_convert_unit_like_unit_level_handling( - df_unit_level, df_unit_level_exp, target_unit_level, target_unit_level_exp +def test_convert_unit_like_unit_level_handling( # noqa: PLR0913 + df_unit_level, + df_unit_level_exp, + target_unit_level, + target_unit_level_exp, + target_type, + pobj_type, ): pytest.importorskip("pint") @@ -803,6 +869,8 @@ def test_convert_unit_like_unit_level_handling( timepoints=np.array([1.0, 2.0, 3.0]), ) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[(f"variable_{i}", "g") for i in range(2)], n_scenarios=2, @@ -810,6 +878,8 @@ def test_convert_unit_like_unit_level_handling( timepoints=np.array([10.0, 11.0, 12.0]), ) + target = convert_to_desired_type(target, target_type) + call_kwargs = {} if df_unit_level is not None: start = start.rename_axis(index={"unit": df_unit_level}) @@ -830,10 +900,11 @@ def test_convert_unit_like_unit_level_handling( exp = convert_unit(start, "g", unit_level=df_unit_level_exp) - assert_frame_alike(res, exp) + check_result(res, exp) -def test_convert_unit_from_target_series_missing_desired_unit_error(): +@pobj_type +def test_convert_unit_from_target_series_missing_desired_unit_error(pobj_type): start = pd.DataFrame( np.arange(2 * 3).reshape((2, 3)), columns=np.array([1.0, 10.0, 100.0]), @@ -846,6 +917,8 @@ def test_convert_unit_from_target_series_missing_desired_unit_error(): ), ) + start = convert_to_desired_type(start, pobj_type) + desired_unit = pd.Series( [ "K", @@ -864,7 +937,8 @@ def test_convert_unit_from_target_series_missing_desired_unit_error(): convert_unit_from_target_series(start, desired_unit) -def test_convert_unit_from_target_series_no_pint_error(): +@pobj_type +def test_convert_unit_from_target_series_no_pint_error(pobj_type): start = pd.DataFrame( np.arange(2 * 3).reshape((2, 3)), columns=np.array([1.0, 10.0, 100.0]), @@ -877,6 +951,8 @@ def test_convert_unit_from_target_series_no_pint_error(): ), ) + start = convert_to_desired_type(start, pobj_type) + desired_unit = pd.Series( ["K", "K"], index=pd.MultiIndex.from_tuples( @@ -899,7 +975,8 @@ def test_convert_unit_from_target_series_no_pint_error(): convert_unit_from_target_series(start, desired_unit) -def test_accessor_convert_unit(setup_pandas_accessors): +@pobj_type +def test_accessor_convert_unit(setup_pandas_accessors, pobj_type): # Do most complex case: supply a series with different unit level # and required unit registry openscm_units = pytest.importorskip("openscm_units") @@ -916,6 +993,8 @@ def test_accessor_convert_unit(setup_pandas_accessors): timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ).rename_axis(index={"unit": "units"}) + start = convert_to_desired_type(start, pobj_type) + desired_units = ( start.loc[start.index.get_level_values("variable") != "temperature"] .reset_index("units")["units"] @@ -952,7 +1031,9 @@ def test_accessor_convert_unit(setup_pandas_accessors): ) -def test_accessor_convert_unit_like(setup_pandas_accessors): +@pobj_type +@target_type +def test_accessor_convert_unit_like(setup_pandas_accessors, pobj_type, target_type): # Do most complex case: supply a series with different unit level # and required unit registry openscm_units = pytest.importorskip("openscm_units") @@ -969,6 +1050,8 @@ def test_accessor_convert_unit_like(setup_pandas_accessors): timepoints=np.array([1850.0, 2000.0, 2050.0, 2100.0]), ).rename_axis(index={"unit": "units"}) + start = convert_to_desired_type(start, pobj_type) + target = create_test_df( variables=[ ("temperature", "mK"), @@ -988,10 +1071,12 @@ def test_accessor_convert_unit_like(setup_pandas_accessors): ur=openscm_units.unit_registry, ) + target = convert_to_desired_type(target, target_type) + exp = start.openscm.convert_unit( {"K": "mK", "ZJ": "PJ", "GtC": "MtC"}, unit_level="units", ur=openscm_units.unit_registry, ) - assert_frame_alike(res, exp) + check_result(res, exp) From cd2726d92bf225a36cf8eaee631e4db5748da49c Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 19:13:27 +0200 Subject: [PATCH 05/11] Add series accessor for unit conversion --- src/pandas_openscm/accessors/__init__.py | 3 +- src/pandas_openscm/accessors/dataframe.py | 18 +- src/pandas_openscm/accessors/series.py | 436 ++++++++++++++++++++++ src/pandas_openscm/index_manipulation.py | 15 +- src/pandas_openscm/unit_conversion.py | 92 ++--- tests/conftest.py | 8 +- tests/integration/test_unit_conversion.py | 18 +- 7 files changed, 516 insertions(+), 74 deletions(-) create mode 100644 src/pandas_openscm/accessors/series.py diff --git a/src/pandas_openscm/accessors/__init__.py b/src/pandas_openscm/accessors/__init__.py index 7156034..00f7607 100644 --- a/src/pandas_openscm/accessors/__init__.py +++ b/src/pandas_openscm/accessors/__init__.py @@ -38,6 +38,7 @@ import pandas as pd from pandas_openscm.accessors.dataframe import PandasDataFrameOpenSCMAccessor +from pandas_openscm.accessors.series import PandasSeriesOpenSCMAccessor # TODO: note change in name (now has trailing s) in changelog @@ -72,5 +73,5 @@ def register_pandas_accessors(namespace: str = "openscm") -> None: pd.api.extensions.register_dataframe_accessor(namespace)( PandasDataFrameOpenSCMAccessor ) - # pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor) + pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor) # pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor) diff --git a/src/pandas_openscm/accessors/dataframe.py b/src/pandas_openscm/accessors/dataframe.py index d7808f7..fefefd7 100644 --- a/src/pandas_openscm/accessors/dataframe.py +++ b/src/pandas_openscm/accessors/dataframe.py @@ -48,18 +48,18 @@ class PandasDataFrameOpenSCMAccessor: [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). """ - def __init__(self, pandas_obj: pd.DataFrame): + def __init__(self, df: pd.DataFrame): """ Initialise Parameters ---------- - pandas_obj - Pandas object to use via the accessor + df + [pd.DataFrame][pandas.DataFrame] to use via the accessor """ # It is possible to validate here. # However, it's probably better to do validation closer to the data use. - self._df = pandas_obj + self._df = df def convert_unit( self, @@ -120,8 +120,8 @@ def convert_unit( def convert_unit_like( self, - target: pd.DataFrame, - df_unit_level: str = "unit", + target: pd.DataFrame | pd.Series[Any], + unit_level: str = "unit", target_unit_level: str | None = None, ur: pint.facets.PlainRegistry | None = None, ) -> pd.DataFrame: @@ -143,9 +143,9 @@ def convert_unit_like( Parameters ---------- target - [pd.DataFrame][pandas.DataFrame] whose units should be matched + Supported [pandas][] object whose units should be matched - df_unit_level + unit_level Level in the data's index which holds unit information target_unit_level @@ -166,7 +166,7 @@ def convert_unit_like( return convert_unit_like( self._df, target=target, - df_unit_level=df_unit_level, + unit_level=unit_level, target_unit_level=target_unit_level, ur=ur, ) diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py new file mode 100644 index 0000000..7b31c74 --- /dev/null +++ b/src/pandas_openscm/accessors/series.py @@ -0,0 +1,436 @@ +""" +Accessor for [pd.Series][pandas.Series] +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +import pandas as pd + +from pandas_openscm.unit_conversion import convert_unit, convert_unit_like + +if TYPE_CHECKING: + S = TypeVar("S", bound=pd.Series[Any]) + + import pint + +else: + S = TypeVar("S") + + +class PandasSeriesOpenSCMAccessor(Generic[S]): + """ + [pd.Series][pandas.Series] accessor + + For details, see + [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). + """ + + def __init__(self, series: S): + """ + Initialise + + Parameters + ---------- + series + [pd.Series][pandas.Series] to use via the accessor + """ + # It is possible to validate here. + # However, it's probably better to do validation closer to the data use. + self._series = series + + def convert_unit( + self, + desired_units: str | Mapping[str, str] | pd.Series[str], + unit_level: str = "unit", + ur: pint.facets.PlainRegistry | None = None, + ) -> S: + """ + Convert units + + This uses [convert_unit_from_target_series][(p).unit_conversion.]. + If you want to understand the details of how the conversion works, + see that function's docstring. + + Parameters + ---------- + desired_units + Desired unit(s) for `series` + + If this is a string, + we attempt to convert all timeseries to the given unit. + + If this is a mapping, + we convert the given units to the target units. + Be careful using this form - you need to be certain of the units. + If any of your keys don't match the existing units + (even by a single whitespace character) + then the unit conversion will not happen. + + If this is a [pd.Series][pandas.Series], + then it will be passed to + [convert_unit_from_target_series][(p).unit_conversion.] + after filling any rows in the [pd.Series][pandas.Series] + that are not in `desired_units` + with the existing unit (i.e. unspecified rows are not converted). + + For further details, see the examples + in [convert_unit][(p).unit_conversion.]. + + unit_level + Level in the index which holds unit information + + Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + + ur + Unit registry to use for the conversion. + + Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + + Returns + ------- + : + Data with converted units + """ + res = convert_unit( + self._series, desired_units=desired_units, unit_level=unit_level, ur=ur + ) + + # The type hinting is impossible to get right here + # because the casting doesn't work to match the return type + # (the return type is the same as the input, + # but we would have to cast to make sure it's numeric + # and we can't do a runtime check because pd.Series + # is not subscriptable at runtime). + # Hence just ignore the type stuff, + # it's impossible to get right with pandas' accessor pattern. + # If users want correct type hints, they should use the functional form. + return res # type: ignore + + def convert_unit_like( + self, + target: pd.DataFrame | pd.Series[Any], + unit_level: str = "unit", + target_unit_level: str | None = None, + ur: pint.facets.PlainRegistry | None = None, + ) -> S: + """ + Convert units to match another supported pandas object + + For further details, see the examples + in [convert_unit_like][(p).unit_conversion.]. + + This is essentially a helper for + [convert_unit_from_target_series][(p).unit_conversion.]. + It implements one set of logic for extracting desired units + and tries to be clever, handling differences in index levels + between the data and `target` sensibly wherever possible. + + If you want behaviour other than what is implemented here, + use [convert_unit_from_target_series][(p).unit_conversion.] directly. + + Parameters + ---------- + target + Supported [pandas][] object whose units should be matched + + unit_level + Level in the data's index which holds unit information + + target_unit_level + Level in `target`'s index which holds unit information + + If not supplied, we use `unit_level`. + + ur + Unit registry to use for the conversion. + + Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + + Returns + ------- + : + Data with converted units + """ + res = convert_unit_like( + self._series, + target=target, + unit_level=unit_level, + target_unit_level=target_unit_level, + ur=ur, + ) + + # The type hinting is impossible to get right here + # because the casting doesn't work to match the return type + # (the return type is the same as the input, + # but we would have to cast to make sure it's numeric + # and we can't do a runtime check because pd.Series + # is not subscriptable at runtime). + # Hence just ignore the type stuff, + # it's impossible to get right with pandas' accessor pattern. + # If users want correct type hints, they should use the functional form. + return res # type: ignore + + # def ensure_index_is_multiindex(self, copy: bool = True) -> pd.DataFrame: + # """ + # Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + # + # Parameters + # ---------- + # copy + # Whether to copy `df` before manipulating the index name + # + # Returns + # ------- + # : + # `df` with a [pd.MultiIndex][pandas.MultiIndex] + # + # If the index was already a [pd.MultiIndex][pandas.MultiIndex], + # this is a no-op (although the value of copy is respected). + # """ + # return ensure_index_is_multiindex(self._df, copy=copy) + # + # def eiim(self, copy: bool = True) -> pd.DataFrame: + # """ + # Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] + # + # Alias for [ensure_index_is_multiindex][(p).index_manipulation.] + # + # Parameters + # ---------- + # copy + # Whether to copy `df` before manipulating the index name + # + # Returns + # ------- + # : + # `df` with a [pd.MultiIndex][pandas.MultiIndex] + # + # If the index was already a [pd.MultiIndex][pandas.MultiIndex], + # this is a no-op (although the value of copy is respected). + # """ + # return self.ensure_index_is_multiindex(copy=copy) + # + # def fix_index_name_after_groupby_quantile( + # self, new_name: str = "quantile", copy: bool = False + # ) -> pd.DataFrame: + # """ + # Fix the index name after performing a `groupby(...).quantile(...)` operation + # + # By default, pandas doesn't assign a name to the quantile level + # when doing an operation of the form given above. + # This fixes this, but it does assume + # that the quantile level is the only unnamed level in the index. + # + # Parameters + # ---------- + # new_name + # New name to give to the quantile column + # + # copy + # Whether to copy `df` before manipulating the index name + # + # Returns + # ------- + # : + # `df`, with the last level in its index renamed to `new_name`. + # """ + # return fix_index_name_after_groupby_quantile( + # self._df, new_name=new_name, copy=copy + # ) + # + # def groupby_except( + # self, non_groupers: str | list[str], observed: bool = True + # ) -> pd.core.groupby.generic.DataFrameGroupBy[Any]: + # """ + # Group by all index levels except specified levels + # + # This is the inverse of [pd.DataFrame.groupby][pandas.DataFrame.groupby]. + # + # Parameters + # ---------- + # non_groupers + # Columns to exclude from the grouping + # + # observed + # Whether to only return observed combinations or not + # + # Returns + # ------- + # : + # The [pd.DataFrame][pandas.DataFrame], + # grouped by all columns except `non_groupers`. + # """ + # return groupby_except(df=self._df, non_groupers=non_groupers, observed=observed) # noqa: E501 + # + # def mi_loc( + # self, + # locator: pd.Index[Any] | pd.MultiIndex | pix.selectors.Selector, + # ) -> pd.DataFrame: + # """ + # Select data, being slightly smarter than the default [pandas.DataFrame.loc][]. + # + # Parameters + # ---------- + # locator + # Locator to apply + # + # If this is a multi-index, we use + # [multi_index_lookup][(p).indexing.] to ensure correct alignment. + # + # If this is an index that has a name, + # we use the name to ensure correct alignment. + # + # Returns + # ------- + # : + # Selected data + # + # Notes + # ----- + # If you have [pandas_indexing][] installed, + # you can get the same (perhaps even better) functionality + # using something like the following instead + # + # ```python + # ... + # pandas_obj.loc[pandas_indexing.isin(locator)] + # ... + # ``` + # """ + # return mi_loc(self._df, locator) + + # def set_index_levels( + # self, + # levels_to_set: dict[str, Any | Collection[Any]], + # copy: bool = True, + # ) -> pd.DataFrame: + # """ + # Set the index levels + # + # Parameters + # ---------- + # levels_to_set + # Mapping of level names to values to set + # + # copy + # Should the [pd.DataFrame][pandas.DataFrame] be copied before returning? + # + # Returns + # ------- + # : + # [pd.DataFrame][pandas.DataFrame] with updates applied to its index + # """ + # return set_index_levels_func( + # self._df, + # levels_to_set=levels_to_set, + # copy=copy, + # ) + # + # def to_category_index(self) -> pd.DataFrame: + # """ + # Convert the index's values to categories + # + # This can save a lot of memory and improve the speed of processing. + # However, it comes with some pitfalls. + # For a nice discussion of some of them, + # see [this article](https://towardsdatascience.com/staying-sane-while-adopting-pandas-categorical-datatypes-78dbd19dcd8a/). + # + # Returns + # ------- + # : + # [pd.DataFrame][pandas.DataFrame] with all index columns + # converted to category type. + # """ + # return convert_index_to_category_index(self._df) + + # def update_index_levels( + # self, + # updates: dict[Any, Callable[[Any], Any]], + # copy: bool = True, + # remove_unused_levels: bool = True, + # ) -> pd.DataFrame: + # """ + # Update the index levels + # + # Parameters + # ---------- + # updates + # Updates to apply to the index levels + # + # Each key is the index level to which the updates will be applied. + # Each value is a function which updates the levels to their new values. + # + # copy + # Should the [pd.DataFrame][pandas.DataFrame] be copied before returning? + # + # remove_unused_levels + # Remove unused levels before applying the update + # + # Specifically, call + # [pd.MultiIndex.remove_unused_levels][pandas.MultiIndex.remove_unused_levels]. # noqa: E501 + # + # This avoids trying to update levels that aren't being used. + # + # Returns + # ------- + # : + # [pd.DataFrame][pandas.DataFrame] with updates applied to its index + # """ + # return update_index_levels_func( + # self._df, + # updates=updates, + # copy=copy, + # remove_unused_levels=remove_unused_levels, + # ) + # + # def update_index_levels_from_other( + # self, + # update_sources: dict[ + # Any, tuple[Any, Callable[[Any], Any] | dict[Any, Any] | pd.Series[Any]] + # ], + # copy: bool = True, + # remove_unused_levels: bool = True, + # ) -> pd.DataFrame: + # """ + # Update the index levels based on other index levels + # + # Parameters + # ---------- + # update_sources + # Updates to apply to `df`'s index + # + # Each key is the level to which the updates will be applied + # (or the level that will be created if it doesn't already exist). + # + # Each value is a tuple of which the first element + # is the level to use to generate the values (the 'source level') + # and the second is mapper of the form used by + # [pd.Index.map][pandas.Index.map] + # which will be applied to the source level + # to update/create the level of interest. + # + # copy + # Should the [pd.DataFrame][pandas.DataFrame] be copied before returning? + # + # remove_unused_levels + # Remove unused levels before applying the update + # + # Specifically, call + # [pd.MultiIndex.remove_unused_levels][pandas.MultiIndex.remove_unused_levels]. # noqa: E501 + # + # This avoids trying to update levels that aren't being used. + # + # Returns + # ------- + # : + # [pd.DataFrame][pandas.DataFrame] with updates applied to its index + # """ + # return update_index_levels_from_other_func( + # self._df, + # update_sources=update_sources, + # copy=copy, + # remove_unused_levels=remove_unused_levels, + # ) diff --git a/src/pandas_openscm/index_manipulation.py b/src/pandas_openscm/index_manipulation.py index dd57f13..d753399 100644 --- a/src/pandas_openscm/index_manipulation.py +++ b/src/pandas_openscm/index_manipulation.py @@ -910,10 +910,11 @@ def set_levels( def set_index_levels_func( - df: pd.DataFrame, + # TODO: check support for series and add accessors + pobj: P, levels_to_set: dict[str, Any | Collection[Any]], copy: bool = True, -) -> pd.DataFrame: +) -> P: """ Set the index levels of a [pd.DataFrame][pandas.DataFrame] @@ -934,17 +935,17 @@ def set_index_levels_func( : `df` with updates applied to its index """ - if not isinstance(df.index, pd.MultiIndex): + if not isinstance(pobj.index, pd.MultiIndex): msg = ( "This function is only intended to be used " "when `df`'s index is an instance of `MultiIndex`. " - f"Received {type(df.index)=}" + f"Received {type(pobj.index)=}" ) raise TypeError(msg) if copy: - df = df.copy() + pobj = pobj.copy() - df.index = set_levels(df.index, levels_to_set=levels_to_set) # type: ignore + pobj.index = set_levels(pobj.index, levels_to_set=levels_to_set) # type: ignore - return df + return pobj diff --git a/src/pandas_openscm/unit_conversion.py b/src/pandas_openscm/unit_conversion.py index b3fee83..2ba68b6 100644 --- a/src/pandas_openscm/unit_conversion.py +++ b/src/pandas_openscm/unit_conversion.py @@ -5,7 +5,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, TypeVar import pandas as pd @@ -18,6 +18,8 @@ from pandas_openscm.indexing import multi_index_lookup, multi_index_match if TYPE_CHECKING: + P = TypeVar("P", pd.Series[float], pd.Series[int], pd.DataFrame) + import pint.facets @@ -40,12 +42,13 @@ def __init__(self, missing_ts: pd.MultiIndex) -> None: def convert_unit_from_target_series( - # TODO: update type hint to pd.Series[supported numerical types here] - pobj: pd.DataFrame, + # # TODO: update type hint to pd.Series[supported numerical types here] + # pobj: pd.DataFrame, + pobj: P, desired_units: pd.Series[str], unit_level: str = "unit", ur: pint.facets.PlainRegistry | None = None, -) -> pd.DataFrame: +) -> P: """ Convert `pobj`'s units based on a [pd.Series][pandas.Series] @@ -124,9 +127,9 @@ def convert_unit_from_target_series( """ desired_units = ensure_index_is_multiindex(desired_units) - df_rows_checker = ensure_is_multiindex(pobj.index.droplevel(unit_level)) - missing_rows = df_rows_checker.difference( # type: ignore # pandas-stubs missing API - desired_units.index.reorder_levels(df_rows_checker.names) # type: ignore # pandas-stubs missing API + pobj_rows_checker = ensure_is_multiindex(pobj.index.droplevel(unit_level)) + missing_rows = pobj_rows_checker.difference( # type: ignore # pandas-stubs missing API + desired_units.index.reorder_levels(pobj_rows_checker.names) # type: ignore # pandas-stubs missing API ) if not missing_rows.empty: raise MissingDesiredUnitError(missing_rows) @@ -136,11 +139,11 @@ def convert_unit_from_target_series( index=ensure_is_multiindex(pobj.index.droplevel(unit_level)), ) - desired_units_in_pobj = multi_index_lookup(desired_units, pobj_units.index) # type: ignore # already checked that df_units.index is MultiIndex + desired_units_in_pobj = multi_index_lookup(desired_units, pobj_units.index) # type: ignore # already checked that pobj.index is MultiIndex # Don't need to align, pandas does that for us. # If you want to check, compare the below with - # unit_map = pd.DataFrame([df_units, desired_units_in_df.sample(frac=1)]).T + # unit_map = pd.DataFrame([pobj_units, desired_units_in_pobj.sample(frac=1)]).T unit_map = pd.DataFrame( [pobj_units.rename("pobj_unit"), desired_units_in_pobj.rename("target_unit")] ).T @@ -181,12 +184,13 @@ def convert_unit_from_target_series( def convert_unit( - # TODO: update type hint to pd.Series[supported numerical types here] - pobj: pd.DataFrame, + # # TODO: update type hint to pd.Series[supported numerical types here] + # pobj: pd.DataFrame, + pobj: P, desired_units: str | Mapping[str, str] | pd.Series[str], unit_level: str = "unit", ur: pint.facets.PlainRegistry | None = None, -) -> pd.DataFrame: +) -> P: """ Convert a supported [pandas][] object's units @@ -306,9 +310,9 @@ def convert_unit( sb temperature K 1.100 1.200 1.300 body temperature degF 98.600 100.580 100.220 """ - df_units_s = ensure_index_is_multiindex( + pobj_units_s = ensure_index_is_multiindex( pobj.index.get_level_values(unit_level).to_series( - index=pobj.index.droplevel(unit_level), name="df_unit" + index=pobj.index.droplevel(unit_level), name="pobj_unit" ) ) @@ -319,21 +323,21 @@ def convert_unit( if isinstance(desired_units, str): desired_units_s = pd.Series( [desired_units] * pobj.shape[0], - index=df_units_s.index, + index=pobj_units_s.index, ) elif isinstance(desired_units, Mapping): - desired_units_s = df_units_s.replace(desired_units) # type: ignore # pandas-stubs missing Mapping option + desired_units_s = pobj_units_s.replace(desired_units) # type: ignore # pandas-stubs missing Mapping option - elif isinstance(desired_units, pd.Series): # type: ignore # isinstance confused by pd.Series without generic type annotation - desired_units = ensure_index_is_multiindex(desired_units) # type: ignore # as above + elif isinstance(desired_units, pd.Series): + desired_units = ensure_index_is_multiindex(desired_units) - missing = df_units_s.index.difference(desired_units.index) + missing = pobj_units_s.index.difference(desired_units.index) if missing.empty: desired_units_s = desired_units else: desired_units_s = pd.concat( - [desired_units, multi_index_lookup(df_units_s, missing)] + [desired_units, multi_index_lookup(pobj_units_s, missing)] ) else: @@ -364,38 +368,38 @@ def __init__(self, msg: str) -> None: def convert_unit_like( - df: pd.DataFrame, - target: pd.DataFrame, - df_unit_level: str = "unit", + pobj: P, + target: pd.DataFrame | pd.Series[Any], + unit_level: str = "unit", target_unit_level: str | None = None, ur: pint.facets.PlainRegistry | None = None, -) -> pd.DataFrame: +) -> P: """ Convert units to match another [pd.DataFrame][pandas.DataFrame] This is essentially a helper function for [convert_unit_from_target_series][(m).]. It implements one set of logic for extracting desired units and tries to be clever, handling differences in index levels - between `df` and `target` sensibly wherever possible. + between `pobj` and `target` sensibly wherever possible. If you want behaviour other than what is implemented here, use [convert_unit_from_target_series][(m).] directly. Parameters ---------- - df - [pd.DataFrame][pandas.DataFrame] whose units should be converted + pobj + Supported [pandas][] object whose units should be converted target - [pd.DataFrame][pandas.DataFrame] whose units should be matched + Supported [pandas][] object whose units should be matched - df_unit_level - Level in `df`'s index which holds unit information + unit_level + Level in `pobj`'s index which holds unit information target_unit_level Level in `target`'s index which holds unit information - If not supplied, we use `df_unit_level`. + If not supplied, we use `unit_level`. ur Unit registry to use for the conversion. @@ -405,7 +409,7 @@ def convert_unit_like( Returns ------- : - `df` with converted units + `pobj` with converted units Examples -------- @@ -451,18 +455,18 @@ def convert_unit_like( body temperature degC 36.850000 37.850000 37.148000 """ if target_unit_level is None: - target_unit_level_use = df_unit_level + target_unit_level_use = unit_level else: target_unit_level_use = target_unit_level - df_units_s = ensure_index_is_multiindex( - df.index.get_level_values(df_unit_level).to_series( - index=df.index.droplevel(df_unit_level) + pobj_units_s = ensure_index_is_multiindex( + pobj.index.get_level_values(unit_level).to_series( + index=pobj.index.droplevel(unit_level) ) ) extra_index_levels_target = target.index.names.difference( # type: ignore # pandas-stubs API out of date - [*df.index.names, target_unit_level_use] + [*pobj.index.names, target_unit_level_use] ) if extra_index_levels_target: # Drop out the extra levels and duplicates, @@ -496,9 +500,9 @@ def convert_unit_like( ambiguous_drivers = target.index[multi_index_match(target.index, ambiguous_idx)] msg = ( - f"`df` has {df.index.names=}. " + f"`pobj` has {pobj.index.names=}. " f"`target` has {target.index.names=}. " - "The index levels in `target` that are also in `df` are " + "The index levels in `target` that are also in `pobj` are " f"{target_units_s.index.names}. " "When we only look at these levels, the desired unit looks like:\n" f"{target_units_s}\n" @@ -510,17 +514,17 @@ def convert_unit_like( ) raise AmbiguousTargetUnitError(msg) - target_units_s, _ = target_units_s.align(df_units_s) - target_units_s = target_units_s.reorder_levels(df_units_s.index.names) + target_units_s, _ = target_units_s.align(pobj_units_s) + target_units_s = target_units_s.reorder_levels(pobj_units_s.index.names) if target_units_s.isnull().any(): # Fill rows that don't get a spec with their existing units target_units_s = multi_index_lookup( target_units_s, - df_units_s.index, # type: ignore # checked that index is MultiIndex above - ).fillna(df_units_s) + pobj_units_s.index, # type: ignore # checked that index is MultiIndex above + ).fillna(pobj_units_s) res = convert_unit_from_target_series( - pobj=df, desired_units=target_units_s, unit_level=df_unit_level, ur=ur + pobj=pobj, desired_units=target_units_s, unit_level=unit_level, ur=ur ) return res diff --git a/tests/conftest.py b/tests/conftest.py index fe0af3f..a593786 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,7 +41,7 @@ def setup_pandas_accessors() -> None: pd.DataFrame._accessors.discard("openscm") if hasattr(pd.DataFrame, "openscm"): del pd.DataFrame.openscm - # In future - # pd.Series._accessors.discard("openscm") - # if hasattr(pd.Series, "openscm"): - # del pd.Series.openscm + + pd.Series._accessors.discard("openscm") + if hasattr(pd.Series, "openscm"): + del pd.Series.openscm diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 2ba41dc..97a9cb8 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -844,7 +844,7 @@ def test_convert_unit_like_ur_injection(): @pobj_type @target_type @pytest.mark.parametrize( - "df_unit_level, df_unit_level_exp, target_unit_level, target_unit_level_exp", + "unit_level, unit_level_exp, target_unit_level, target_unit_level_exp", ( pytest.param(None, "unit", None, "unit", id="default"), pytest.param("units", "units", None, "units", id="target-inferred-from-other"), @@ -853,8 +853,8 @@ def test_convert_unit_like_ur_injection(): ), ) def test_convert_unit_like_unit_level_handling( # noqa: PLR0913 - df_unit_level, - df_unit_level_exp, + unit_level, + unit_level_exp, target_unit_level, target_unit_level_exp, target_type, @@ -881,16 +881,16 @@ def test_convert_unit_like_unit_level_handling( # noqa: PLR0913 target = convert_to_desired_type(target, target_type) call_kwargs = {} - if df_unit_level is not None: - start = start.rename_axis(index={"unit": df_unit_level}) - call_kwargs["df_unit_level"] = df_unit_level + if unit_level is not None: + start = start.rename_axis(index={"unit": unit_level}) + call_kwargs["unit_level"] = unit_level if target_unit_level is not None: target = target.rename_axis(index={"unit": target_unit_level}) call_kwargs["target_unit_level"] = target_unit_level else: - target = target.rename_axis(index={"unit": df_unit_level}) + target = target.rename_axis(index={"unit": unit_level}) elif target_unit_level is not None: target = target.rename_axis(index={"unit": target_unit_level}) @@ -898,7 +898,7 @@ def test_convert_unit_like_unit_level_handling( # noqa: PLR0913 res = convert_unit_like(start, target, **call_kwargs) - exp = convert_unit(start, "g", unit_level=df_unit_level_exp) + exp = convert_unit(start, "g", unit_level=unit_level_exp) check_result(res, exp) @@ -1066,7 +1066,7 @@ def test_accessor_convert_unit_like(setup_pandas_accessors, pobj_type, target_ty res = start.openscm.convert_unit_like( target, - df_unit_level="units", + unit_level="units", target_unit_level="unit_level", ur=openscm_units.unit_registry, ) From e976cd8f817f96e9be45026909d5402eb321f28b Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 3 Aug 2025 19:21:13 +0200 Subject: [PATCH 06/11] Fix up doctest --- src/pandas_openscm/unit_conversion.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/pandas_openscm/unit_conversion.py b/src/pandas_openscm/unit_conversion.py index 2ba68b6..021b1a5 100644 --- a/src/pandas_openscm/unit_conversion.py +++ b/src/pandas_openscm/unit_conversion.py @@ -42,8 +42,6 @@ def __init__(self, missing_ts: pd.MultiIndex) -> None: def convert_unit_from_target_series( - # # TODO: update type hint to pd.Series[supported numerical types here] - # pobj: pd.DataFrame, pobj: P, desired_units: pd.Series[str], unit_level: str = "unit", @@ -184,8 +182,6 @@ def convert_unit_from_target_series( def convert_unit( - # # TODO: update type hint to pd.Series[supported numerical types here] - # pobj: pd.DataFrame, pobj: P, desired_units: str | Mapping[str, str] | pd.Series[str], unit_level: str = "unit", @@ -265,11 +261,11 @@ def convert_unit( >>> >>> # Same thing with a series as input >>> convert_unit(start[2030], "K") - 2030 - scenario variable unit - sa temperature K 0.002 - sb temperature K 1.200 - body temperature K 311.250 + scenario variable unit + sa temperature K 0.002 + sb temperature K 1.200 + body temperature K 311.250 + Name: 2030, dtype: float64 >>> >>> # Convert using a mapping. >>> # Units that aren't specified in the mapping aren't converted. From 786ad536569a99c39586867c30e4a05e384a6771 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 4 Aug 2025 19:46:32 +0200 Subject: [PATCH 07/11] Fix docs --- docs/changelog.md | 14 ++++++------- docs/pandas-accessors.md | 16 +++++++++++---- src/pandas_openscm/accessors/dataframe.py | 25 +++++++++++++---------- src/pandas_openscm/accessors/series.py | 25 +++++++++++++---------- src/pandas_openscm/index_manipulation.py | 7 +++---- 5 files changed, 50 insertions(+), 37 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index ffad678..04c8794 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -25,8 +25,8 @@ from the examples given in that link. ### 🆕 Features -- - Added unit conversion APIs: [pandas_openscm.unit_conversion.convert_unit] and [pandas_openscm.unit_conversion.convert_unit_like] and the corresponding accessors [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.convert_unit] and [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.convert_unit_like] - - Added the helper: [pandas_openscm.index_manipulation.ensure_is_multiindex] and the corresponding accessors [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.ensure_index_is_multiindex] and [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.eiim] +- - Added unit conversion APIs: [pandas_openscm.unit_conversion.convert_unit] and [pandas_openscm.unit_conversion.convert_unit_like] and the corresponding accessors [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.convert_unit] and [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.convert_unit_like] + - Added the helper: [pandas_openscm.index_manipulation.ensure_is_multiindex] and the corresponding accessors [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.ensure_index_is_multiindex] and [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.eiim] ([#23](https://github.com/openscm/pandas-openscm/pull/23)) @@ -46,7 +46,7 @@ from the examples given in that link. ### 🆕 Features -- Added [pandas_openscm.index_manipulation.set_levels][] and the corresponding accessor [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.set_index_levels][] ([#18](https://github.com/openscm/pandas-openscm/pull/18)) +- Added [pandas_openscm.index_manipulation.set_levels][] and the corresponding accessor [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.set_index_levels][] ([#18](https://github.com/openscm/pandas-openscm/pull/18)) ## Pandas-OpenSCM v0.5.0 (2025-05-10) @@ -84,7 +84,7 @@ from the examples given in that link. ### 🆕 Features - Add compare_close function to compare two dataframes. ([#16](https://github.com/openscm/pandas-openscm/pull/16)) -- Added [pandas_openscm.index_manipulation.update_levels_from_other][] and the corresponding accessor [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.update_index_levels_from_other][] ([#17](https://github.com/openscm/pandas-openscm/pull/17)) +- Added [pandas_openscm.index_manipulation.update_levels_from_other][] and the corresponding accessor [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.update_index_levels_from_other][] ([#17](https://github.com/openscm/pandas-openscm/pull/17)) ## Pandas-OpenSCM v0.4.1 (2025-04-12) @@ -93,21 +93,21 @@ from the examples given in that link. - Fixed up [pandas_openscm.index_manipulation.update_levels][]. It now drops unused levels by default first, to avoid applying the updates to values that aren't being used. - The same fixes are propagated to [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.update_index_levels][] and [pandas_openscm.index_manipulation.update_index_levels_func][]. ([#14](https://github.com/openscm/pandas-openscm/pull/14)) + The same fixes are propagated to [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.update_index_levels][] and [pandas_openscm.index_manipulation.update_index_levels_func][]. ([#14](https://github.com/openscm/pandas-openscm/pull/14)) ## Pandas-OpenSCM v0.4.0 (2025-04-11) ### 🆕 Features -- Added [pandas_openscm.index_manipulation.update_levels][] and the corresponding accessor [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.update_index_levels][] ([#13](https://github.com/openscm/pandas-openscm/pull/13)) +- Added [pandas_openscm.index_manipulation.update_levels][] and the corresponding accessor [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.update_index_levels][] ([#13](https://github.com/openscm/pandas-openscm/pull/13)) ## Pandas-OpenSCM v0.3.3 (2025-03-30) ### 🆕 Features -- - Added a method for converting to long data, see [pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.to_long_data][pandas_openscm.accessors.DataFramePandasOpenSCMAccessor.to_long_data] ([#12](https://github.com/openscm/pandas-openscm/pull/12)) +- - Added a method for converting to long data, see [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.to_long_data][pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.to_long_data] ([#12](https://github.com/openscm/pandas-openscm/pull/12)) ## Pandas-OpenSCM v0.3.2 (2025-03-27) diff --git a/docs/pandas-accessors.md b/docs/pandas-accessors.md index be9c0d0..5e35134 100644 --- a/docs/pandas-accessors.md +++ b/docs/pandas-accessors.md @@ -13,7 +13,7 @@ This is done with By default, the accessors are provided under the "openscm" namespace and this is how the accessors are documented below. However, the namespace can be customised when using -[register_pandas_accessor][pandas_openscm.accessors.register_pandas_accessor], +[register_pandas_accessors][pandas_openscm.accessors.register_pandas_accessors], should you wish to use a different namespace for the accessor. For the avoidance of doubt, in order to register/activate the accessors, @@ -22,21 +22,29 @@ you will need to run something like: ```python from pandas_openscm.accessors import register_pandas_accessors -# The 'pd.DataFrame.openscm' namespace will not be available at this point. +# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespace +# will not be available at this point. # Register the accessors register_pandas_accessors() -# The 'pd.DataFrame.openscm' namespace +# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespace # (or whatever other custom namespace you chose to register) # will now be available. ``` The full accessor API is documented below. -::: pandas_openscm.accessors.DataFramePandasOpenSCMAccessor +::: pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor handler: python_accessors options: namespace: "pd.DataFrame.openscm" show_root_full_path: false show_root_heading: true + +::: pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor + handler: python_accessors + options: + namespace: "pd.Series.openscm" + show_root_full_path: false + show_root_heading: true diff --git a/src/pandas_openscm/accessors/dataframe.py b/src/pandas_openscm/accessors/dataframe.py index fefefd7..4344721 100644 --- a/src/pandas_openscm/accessors/dataframe.py +++ b/src/pandas_openscm/accessors/dataframe.py @@ -70,7 +70,7 @@ def convert_unit( """ Convert units - This uses [convert_unit_from_target_series][(p).unit_conversion.]. + This uses [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. If you want to understand the details of how the conversion works, see that function's docstring. @@ -91,23 +91,25 @@ def convert_unit( If this is a [pd.Series][pandas.Series], then it will be passed to - [convert_unit_from_target_series][(p).unit_conversion.] + [convert_unit_from_target_series][pandas_openscm.unit_conversion.] after filling any rows in the [pd.DataFrame][pandas.DataFrame] that are not in `desired_units` with the existing unit (i.e. unspecified rows are not converted). For further details, see the examples - in [convert_unit][(p).unit_conversion.]. + in [convert_unit][pandas_openscm.unit_conversion.]. unit_level Level in the index which holds unit information - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. ur Unit registry to use for the conversion. - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. Returns ------- @@ -129,16 +131,16 @@ def convert_unit_like( Convert units to match another [pd.DataFrame][pandas.DataFrame] For further details, see the examples - in [convert_unit_like][(p).unit_conversion.]. + in [convert_unit_like][pandas_openscm.unit_conversion.]. This is essentially a helper for - [convert_unit_from_target_series][(p).unit_conversion.]. + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. It implements one set of logic for extracting desired units and tries to be clever, handling differences in index levels between the data and `target` sensibly wherever possible. If you want behaviour other than what is implemented here, - use [convert_unit_from_target_series][(p).unit_conversion.] directly. + use [convert_unit_from_target_series][pandas_openscm.unit_conversion.] directly. Parameters ---------- @@ -156,7 +158,8 @@ def convert_unit_like( ur Unit registry to use for the conversion. - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. Returns ------- @@ -194,7 +197,7 @@ def eiim(self, copy: bool = True) -> pd.DataFrame: """ Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] - Alias for [ensure_index_is_multiindex][(p).index_manipulation.] + Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.] Parameters ---------- @@ -276,7 +279,7 @@ def mi_loc( Locator to apply If this is a multi-index, we use - [multi_index_lookup][(p).indexing.] to ensure correct alignment. + [multi_index_lookup][pandas_openscm.indexing.] to ensure correct alignment. If this is an index that has a name, we use the name to ensure correct alignment. diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py index 7b31c74..b29adcd 100644 --- a/src/pandas_openscm/accessors/series.py +++ b/src/pandas_openscm/accessors/series.py @@ -50,7 +50,7 @@ def convert_unit( """ Convert units - This uses [convert_unit_from_target_series][(p).unit_conversion.]. + This uses [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. If you want to understand the details of how the conversion works, see that function's docstring. @@ -71,23 +71,25 @@ def convert_unit( If this is a [pd.Series][pandas.Series], then it will be passed to - [convert_unit_from_target_series][(p).unit_conversion.] + [convert_unit_from_target_series][pandas_openscm.unit_conversion.] after filling any rows in the [pd.Series][pandas.Series] that are not in `desired_units` with the existing unit (i.e. unspecified rows are not converted). For further details, see the examples - in [convert_unit][(p).unit_conversion.]. + in [convert_unit][pandas_openscm.unit_conversion.]. unit_level Level in the index which holds unit information - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. ur Unit registry to use for the conversion. - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. Returns ------- @@ -120,16 +122,16 @@ def convert_unit_like( Convert units to match another supported pandas object For further details, see the examples - in [convert_unit_like][(p).unit_conversion.]. + in [convert_unit_like][pandas_openscm.unit_conversion.]. This is essentially a helper for - [convert_unit_from_target_series][(p).unit_conversion.]. + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. It implements one set of logic for extracting desired units and tries to be clever, handling differences in index levels between the data and `target` sensibly wherever possible. If you want behaviour other than what is implemented here, - use [convert_unit_from_target_series][(p).unit_conversion.] directly. + use [convert_unit_from_target_series][pandas_openscm.unit_conversion.] directly. Parameters ---------- @@ -147,7 +149,8 @@ def convert_unit_like( ur Unit registry to use for the conversion. - Passed to [convert_unit_from_target_series][(p).unit_conversion.]. + Passed to + [convert_unit_from_target_series][pandas_openscm.unit_conversion.]. Returns ------- @@ -196,7 +199,7 @@ def convert_unit_like( # """ # Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex] # - # Alias for [ensure_index_is_multiindex][(p).index_manipulation.] + # Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.] # # Parameters # ---------- @@ -278,7 +281,7 @@ def convert_unit_like( # Locator to apply # # If this is a multi-index, we use - # [multi_index_lookup][(p).indexing.] to ensure correct alignment. + # [multi_index_lookup][pandas_openscm.indexing.] to ensure correct alignment. # noqa: E501 # # If this is an index that has a name, # we use the name to ensure correct alignment. diff --git a/src/pandas_openscm/index_manipulation.py b/src/pandas_openscm/index_manipulation.py index d753399..ffa6ade 100644 --- a/src/pandas_openscm/index_manipulation.py +++ b/src/pandas_openscm/index_manipulation.py @@ -920,20 +920,19 @@ def set_index_levels_func( Parameters ---------- - df + pobj [pd.DataFrame][pandas.DataFrame] to update levels_to_set Mapping of level names to values to set copy - Should `df` be copied before returning? - + Should `pobj` be copied before returning? Returns ------- : - `df` with updates applied to its index + `pobj` with updates applied to its index """ if not isinstance(pobj.index, pd.MultiIndex): msg = ( From 723c49f0612f24c4b6994ca59056c187fac07f8a Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 4 Aug 2025 21:55:42 +0200 Subject: [PATCH 08/11] Add accessor for set levels --- src/pandas_openscm/accessors/series.py | 59 ++++++++------- src/pandas_openscm/index_manipulation.py | 1 - src/pandas_openscm/testing.py | 74 ++++++++++++++++++- ...tegration_index_manipulation_set_levels.py | 68 +++++++++++++++-- tests/integration/test_unit_conversion.py | 59 ++++----------- 5 files changed, 181 insertions(+), 80 deletions(-) diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py index b29adcd..5496551 100644 --- a/src/pandas_openscm/accessors/series.py +++ b/src/pandas_openscm/accessors/series.py @@ -4,11 +4,14 @@ from __future__ import annotations -from collections.abc import Mapping +from collections.abc import Collection, Mapping from typing import TYPE_CHECKING, Any, Generic, TypeVar import pandas as pd +from pandas_openscm.index_manipulation import ( + set_index_levels_func, +) from pandas_openscm.unit_conversion import convert_unit, convert_unit_like if TYPE_CHECKING: @@ -305,33 +308,33 @@ def convert_unit_like( # """ # return mi_loc(self._df, locator) - # def set_index_levels( - # self, - # levels_to_set: dict[str, Any | Collection[Any]], - # copy: bool = True, - # ) -> pd.DataFrame: - # """ - # Set the index levels - # - # Parameters - # ---------- - # levels_to_set - # Mapping of level names to values to set - # - # copy - # Should the [pd.DataFrame][pandas.DataFrame] be copied before returning? - # - # Returns - # ------- - # : - # [pd.DataFrame][pandas.DataFrame] with updates applied to its index - # """ - # return set_index_levels_func( - # self._df, - # levels_to_set=levels_to_set, - # copy=copy, - # ) - # + def set_index_levels( + self, + levels_to_set: dict[str, Any | Collection[Any]], + copy: bool = True, + ) -> S: + """ + Set the index levels + + Parameters + ---------- + levels_to_set + Mapping of level names to values to set + + copy + Should the [pd.Series][pandas.Series] be copied before returning? + + Returns + ------- + : + [pd.Series][pandas.Series] with updates applied to its index + """ + return set_index_levels_func( + self._series, + levels_to_set=levels_to_set, + copy=copy, + ) + # def to_category_index(self) -> pd.DataFrame: # """ # Convert the index's values to categories diff --git a/src/pandas_openscm/index_manipulation.py b/src/pandas_openscm/index_manipulation.py index ffa6ade..fc4267c 100644 --- a/src/pandas_openscm/index_manipulation.py +++ b/src/pandas_openscm/index_manipulation.py @@ -910,7 +910,6 @@ def set_levels( def set_index_levels_func( - # TODO: check support for series and add accessors pobj: P, levels_to_set: dict[str, Any | Collection[Any]], copy: bool = True, diff --git a/src/pandas_openscm/testing.py b/src/pandas_openscm/testing.py index c00a81a..857e647 100644 --- a/src/pandas_openscm/testing.py +++ b/src/pandas_openscm/testing.py @@ -10,7 +10,7 @@ import itertools from collections.abc import Collection -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload import numpy as np import pandas as pd @@ -22,6 +22,8 @@ if TYPE_CHECKING: import pytest + P = TypeVar("P", pd.DataFrame | pd.Series[Any]) + def get_db_data_backends() -> tuple[type[object], ...]: return tuple(v[1] for v in DATA_BACKENDS.options) @@ -97,6 +99,76 @@ def assert_frame_alike( ) +@overload +def convert_to_desired_type( + pobj: pd.DataFrame, pobj_type: Literal["DataFrame"] +) -> pd.DataFrame: ... + + +@overload +def convert_to_desired_type( + pobj: pd.DataFrame, pobj_type: Literal["Series"] +) -> pd.Series[Any]: ... + + +def convert_to_desired_type( + df: pd.DataFrame, pobj_type: Literal["DataFrame", "Series"] +) -> pd.DataFrame | pd.Series[Any]: + """ + Convert a `df` to the desired type for testing + + Parameters + ---------- + df + [pd.DataFrame][pandas.DataFrame] to convert + + pobj_type + Type to convert to + + If "DataFrame", then `df` is simply returned. + If "Series", then the first column of `df` is returned. + + Returns + ------- + : + `df` converted to the desired type + """ + if pobj_type == "DataFrame": + return df + + if pobj_type == "Series": + res = df[df.columns[0]] + return res + + raise NotImplementedError(pobj_type) + + +def check_result(res: P, exp: P) -> None: + """ + Check result in the case where it could be multiple types + + Specifically, [pd.DataFrame][pandas.DataFrame] + or [pd.Series][pandas.Series]. + + This is a thin wrapper, if you want specific functionality, + use the underlying function. + + Parameters + ---------- + res + Result + + exp + Expected + """ + if isinstance(res, pd.DataFrame): + assert_frame_alike(res, exp) + elif isinstance(res, pd.Series): + pd.testing.assert_series_equal(res, exp) + else: + raise NotImplementedError(type(res)) + + def create_test_df( *, variables: Collection[tuple[str, str]], diff --git a/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py b/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py index 5f7cbb2..ef3b505 100644 --- a/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py +++ b/tests/integration/index_manipulation/test_integration_index_manipulation_set_levels.py @@ -9,6 +9,15 @@ import pytest from pandas_openscm.index_manipulation import set_index_levels_func, set_levels +from pandas_openscm.testing import convert_to_desired_type + +pobj_type = pytest.mark.parametrize( + "pobj_type", + ("DataFrame", "Series"), +) +""" +Parameterisation to use to check handling of both DataFrame and Series +""" @pytest.mark.parametrize( @@ -160,7 +169,8 @@ def test_set_levels(start, levels_to_set, exp): pd.testing.assert_index_equal(res, exp) -def test_set_levels_with_a_dataframe(): +@pobj_type +def test_set_levels_with_a_dataframe(pobj_type): start = pd.MultiIndex.from_tuples( [ ("sa", "va", "kg", 0), @@ -170,11 +180,14 @@ def test_set_levels_with_a_dataframe(): ], names=["scenario", "variable", "unit", "run_id"], ) - start_df = pd.DataFrame( - np.zeros((start.shape[0], 3)), columns=[2010, 2020, 2030], index=start + start_pobj = convert_to_desired_type( + pd.DataFrame( + np.zeros((start.shape[0], 3)), columns=[2010, 2020, 2030], index=start + ), + pobj_type, ) - res = set_index_levels_func(start_df, levels_to_set={"new_variable": "test"}) + res = set_index_levels_func(start_pobj, levels_to_set={"new_variable": "test"}) exp = pd.MultiIndex.from_tuples( [ @@ -189,11 +202,13 @@ def test_set_levels_with_a_dataframe(): pd.testing.assert_index_equal(res.index, exp) -def test_set_levels_raises_type_error(): +@pobj_type +def test_set_levels_raises_type_error(pobj_type): start = pd.DataFrame( np.arange(2 * 4).reshape((4, 2)), columns=[2010, 2020], ) + start = convert_to_desired_type(start, pobj_type) levels_to_set = {"new_variable": "test"} @@ -221,7 +236,7 @@ def test_set_levels_raises_value_error(): set_levels(start, levels_to_set=levels_to_set) -def test_accessor(setup_pandas_accessors): +def test_accessor_df(setup_pandas_accessors): start = pd.DataFrame( np.arange(2 * 4).reshape((4, 2)), columns=[2010, 2020], @@ -262,3 +277,44 @@ def test_accessor(setup_pandas_accessors): # Test function too res = set_index_levels_func(start, levels_to_set=levels_to_set) pd.testing.assert_frame_equal(res, exp) + + +def test_accessor_series(setup_pandas_accessors): + start = pd.Series( + np.arange(4), + index=pd.MultiIndex.from_tuples( + [ + ("sa", "va", "kg", 0), + ("sb", "vb", "m", -1), + ("sa", "va", "kg", -2), + ("sa", "vb", "kg", 2), + ], + names=["scenario", "variable", "unit", "run_id"], + ), + ) + + levels_to_set = { + "model_id": "674", + "unit": ["t", "km", "g", "kg"], + "scenario": 1, + } + + exp = pd.Series( + start.values, + index=pd.MultiIndex.from_tuples( + [ + (1, "va", "t", 0, "674"), + (1, "vb", "km", -1, "674"), + (1, "va", "g", -2, "674"), + (1, "vb", "kg", 2, "674"), + ], + names=["scenario", "variable", "unit", "run_id", "model_id"], + ), + ) + + res = start.openscm.set_index_levels(levels_to_set=levels_to_set) + pd.testing.assert_series_equal(res, exp) + + # Test function too + res = set_index_levels_func(start, levels_to_set=levels_to_set) + pd.testing.assert_series_equal(res, exp) diff --git a/tests/integration/test_unit_conversion.py b/tests/integration/test_unit_conversion.py index 97a9cb8..e0cd9a2 100644 --- a/tests/integration/test_unit_conversion.py +++ b/tests/integration/test_unit_conversion.py @@ -6,7 +6,7 @@ import re import sys -from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload +from typing import TYPE_CHECKING, Any, TypeVar from unittest.mock import patch import numpy as np @@ -17,7 +17,12 @@ from pandas_openscm.index_manipulation import ( set_index_levels_func, ) -from pandas_openscm.testing import assert_frame_alike, create_test_df +from pandas_openscm.testing import ( + assert_frame_alike, + check_result, + convert_to_desired_type, + create_test_df, +) from pandas_openscm.unit_conversion import ( AmbiguousTargetUnitError, MissingDesiredUnitError, @@ -29,6 +34,14 @@ if TYPE_CHECKING: P = TypeVar("P", pd.DataFrame | pd.Series[Any]) +pobj_type = pytest.mark.parametrize( + "pobj_type", + ("DataFrame", "Series"), +) +""" +Parameterisation to use to check handling of both DataFrame and Series +""" + check_auto_index_casting_pobj = pytest.mark.parametrize( "only_two_index_levels_pobj", ( @@ -43,48 +56,6 @@ This parameterisation ensures that we check this edge case. """ -pobj_type = pytest.mark.parametrize( - "pobj_type", - ("DataFrame", "Series"), -) -""" -Parameterisation to use to check handling of both DataFrame and Series -""" - - -@overload -def convert_to_desired_type( - pobj: pd.DataFrame, pobj_type: Literal["DataFrame"] -) -> pd.DataFrame: ... - - -@overload -def convert_to_desired_type( - pobj: pd.DataFrame, pobj_type: Literal["Series"] -) -> pd.Series[Any]: ... - - -def convert_to_desired_type( - df: pd.DataFrame, pobj_type: Literal["DataFrame", "Series"] -) -> pd.DataFrame | pd.Series[Any]: - if pobj_type == "DataFrame": - return df - - if pobj_type == "Series": - res = df[df.columns[0]] - return res - - raise NotImplementedError(pobj_type) - - -def check_result(res: P, exp: P) -> None: - if isinstance(res, pd.DataFrame): - assert_frame_alike(res, exp) - elif isinstance(res, pd.Series): - pd.testing.assert_series_equal(res, exp) - else: - raise NotImplementedError(type(res)) - @pobj_type @check_auto_index_casting_pobj From f8b82cf85bdbab24e2ecd144973af4be39abc199 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 4 Aug 2025 22:01:36 +0200 Subject: [PATCH 09/11] mypy --- src/pandas_openscm/accessors/series.py | 9 ++++++++- src/pandas_openscm/testing.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pandas_openscm/accessors/series.py b/src/pandas_openscm/accessors/series.py index 5496551..91c5708 100644 --- a/src/pandas_openscm/accessors/series.py +++ b/src/pandas_openscm/accessors/series.py @@ -15,6 +15,8 @@ from pandas_openscm.unit_conversion import convert_unit, convert_unit_like if TYPE_CHECKING: + # Hmm this is somehow not correct. + # Figuring it out is a job for another day S = TypeVar("S", bound=pd.Series[Any]) import pint @@ -329,12 +331,17 @@ def set_index_levels( : [pd.Series][pandas.Series] with updates applied to its index """ - return set_index_levels_func( + res = set_index_levels_func( self._series, levels_to_set=levels_to_set, copy=copy, ) + # Ignore return type + # because I've done something wrong with how I've set this up. + # Figuring this out is a job for another day + return res # type: ignore + # def to_category_index(self) -> pd.DataFrame: # """ # Convert the index's values to categories diff --git a/src/pandas_openscm/testing.py b/src/pandas_openscm/testing.py index 857e647..8b44cdd 100644 --- a/src/pandas_openscm/testing.py +++ b/src/pandas_openscm/testing.py @@ -22,7 +22,7 @@ if TYPE_CHECKING: import pytest - P = TypeVar("P", pd.DataFrame | pd.Series[Any]) + P = TypeVar("P", pd.DataFrame, pd.Series[Any]) def get_db_data_backends() -> tuple[type[object], ...]: @@ -101,13 +101,13 @@ def assert_frame_alike( @overload def convert_to_desired_type( - pobj: pd.DataFrame, pobj_type: Literal["DataFrame"] + df: pd.DataFrame, pobj_type: Literal["DataFrame"] ) -> pd.DataFrame: ... @overload def convert_to_desired_type( - pobj: pd.DataFrame, pobj_type: Literal["Series"] + df: pd.DataFrame, pobj_type: Literal["Series"] ) -> pd.Series[Any]: ... From 392a2a94d9c8a79b5727915a1e49072058146c2d Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 5 Aug 2025 08:14:49 +0200 Subject: [PATCH 10/11] Add no cover statements --- src/pandas_openscm/testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pandas_openscm/testing.py b/src/pandas_openscm/testing.py index 8b44cdd..3071626 100644 --- a/src/pandas_openscm/testing.py +++ b/src/pandas_openscm/testing.py @@ -140,7 +140,7 @@ def convert_to_desired_type( res = df[df.columns[0]] return res - raise NotImplementedError(pobj_type) + raise NotImplementedError(pobj_type) # pragma: no cover def check_result(res: P, exp: P) -> None: @@ -165,7 +165,7 @@ def check_result(res: P, exp: P) -> None: assert_frame_alike(res, exp) elif isinstance(res, pd.Series): pd.testing.assert_series_equal(res, exp) - else: + else: # pragma: no cover raise NotImplementedError(type(res)) From e84d1b0d54f3be7814a3d9796db720b58dcf2567 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 5 Aug 2025 10:29:55 +0200 Subject: [PATCH 11/11] CHANGELOG --- changelog/24.breaking.md | 4 ++++ changelog/24.feature.md | 1 + changelog/24.improvement.md | 1 + changelog/24.trivial.md | 1 + src/pandas_openscm/accessors/__init__.py | 1 - 5 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changelog/24.breaking.md create mode 100644 changelog/24.feature.md create mode 100644 changelog/24.improvement.md create mode 100644 changelog/24.trivial.md diff --git a/changelog/24.breaking.md b/changelog/24.breaking.md new file mode 100644 index 0000000..7379617 --- /dev/null +++ b/changelog/24.breaking.md @@ -0,0 +1,4 @@ +- Renamed pandas_openscm.register_pandas_accessor to [pandas_openscm.register_pandas_accessors][] (with a trailing 's') as accessors are now also registered for [pandas Series][pandas.Series] +- Renamed pandas_openscm.accessors.DataFramePandasOpenSCMAccessor to [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor][] +- Renamed `df_unit_level` to `unit_level` in [pandas_openscm.accessors.PandasDataFrameOpenSCMAccessor.convert_unit_like][] +- Renamed `df` to `pobj` in [pandas_openscm.index_manipulation.set_index_levels_func][], [pandas_openscm.unit_conversion.convert_unit_from_target_series][], [pandas_openscm.unit_conversion.convert_unit][] and [pandas_openscm.unit_conversion.convert_unit_like][] diff --git a/changelog/24.feature.md b/changelog/24.feature.md new file mode 100644 index 0000000..0e0e4f9 --- /dev/null +++ b/changelog/24.feature.md @@ -0,0 +1 @@ +Added some accessors for [pandas Series][pandas.Series] via [pandas_openscm.accessors.PandasSeriesOpenSCMAccessor][]. Note that this is not feature complete yet, tracking in [#25](https://github.com/openscm/pandas-openscm/issues/25) diff --git a/changelog/24.improvement.md b/changelog/24.improvement.md new file mode 100644 index 0000000..f50fbbe --- /dev/null +++ b/changelog/24.improvement.md @@ -0,0 +1 @@ +[pandas_openscm.index_manipulation.set_index_levels_func][], [pandas_openscm.unit_conversion.convert_unit_from_target_series][], [pandas_openscm.unit_conversion.convert_unit][] and [pandas_openscm.unit_conversion.convert_unit_like][] now explicitly support [pd.Series][pandas.Series] diff --git a/changelog/24.trivial.md b/changelog/24.trivial.md new file mode 100644 index 0000000..d4f944a --- /dev/null +++ b/changelog/24.trivial.md @@ -0,0 +1 @@ +Added [pandas_openscm.testing.convert_to_desired_type] and [pandas_openscm.testing.check_result] to help with testing support for [pd.DataFrame][pandas.DataFrame] and [pd.Series][pandas.Series] diff --git a/src/pandas_openscm/accessors/__init__.py b/src/pandas_openscm/accessors/__init__.py index 00f7607..4dbb880 100644 --- a/src/pandas_openscm/accessors/__init__.py +++ b/src/pandas_openscm/accessors/__init__.py @@ -41,7 +41,6 @@ from pandas_openscm.accessors.series import PandasSeriesOpenSCMAccessor -# TODO: note change in name (now has trailing s) in changelog def register_pandas_accessors(namespace: str = "openscm") -> None: """ Register the pandas accessors