From f492ba9888fc05ff7c224db8a22faac8c0106a4b Mon Sep 17 00:00:00 2001 From: Billy2551 Date: Thu, 27 Oct 2022 12:05:53 -0700 Subject: [PATCH] FIX-#5112: allows empty partition to be passed into `query_compiler.dt_prop_map` (#5133) Signed-off-by: Bill Wang --- .../storage_formats/pandas/query_compiler.py | 6 +++++- modin/pandas/test/test_series.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index a3650bc26e8..254e4de9145 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -152,7 +152,11 @@ def _dt_prop_map(property_name): def dt_op_builder(df, *args, **kwargs): """Access specified date-time property of the passed frame.""" - prop_val = getattr(df.squeeze(axis=1).dt, property_name) + squeezed_df = df.squeeze(axis=1) + if isinstance(squeezed_df, pandas.DataFrame) and len(squeezed_df.columns) == 0: + return squeezed_df + assert isinstance(squeezed_df, pandas.Series) + prop_val = getattr(squeezed_df.dt, property_name) if isinstance(prop_val, pandas.Series): return prop_val.to_frame() elif isinstance(prop_val, pandas.DataFrame): diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 4e4c3ec37d3..22e551d6a7a 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -1733,6 +1733,22 @@ def test_dt(timezone): df_equals(modin_series.dt.end_time, pandas_series.dt.end_time) df_equals(modin_series.dt.to_timestamp(), pandas_series.dt.to_timestamp()) + def dt_with_empty_partition(lib): + # For context, see https://github.com/modin-project/modin/issues/5112 + df_a = lib.DataFrame({"A": [lib.to_datetime("26/10/2020")]}) + df_b = lib.DataFrame({"B": [lib.to_datetime("27/10/2020")]}) + df = lib.concat([df_a, df_b], axis=1) + eval_result = df.eval("B - A", engine="python") + # BaseOnPython had a single partition after the concat, and it + # maintains that partition after eval. In other execution modes, + # eval() should re-split the result into two column partitions, + # one of which is empty. + if isinstance(df, pd.DataFrame) and get_current_execution() != "BaseOnPython": + assert eval_result._query_compiler._modin_frame._partitions.shape == (1, 2) + return eval_result.dt.days + + eval_general(pd, pandas, dt_with_empty_partition) + @pytest.mark.parametrize( "data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys