Skip to content

Commit

Permalink
FIX-#5112: allows empty partition to be passed into `query_compiler.d…
Browse files Browse the repository at this point in the history
…t_prop_map` (#5133)

Signed-off-by: Bill Wang <billiam@ponder.io>
  • Loading branch information
billiam-wang committed Oct 27, 2022
1 parent 87c8f70 commit f492ba9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
6 changes: 5 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ def _dt_prop_map(property_name):

def dt_op_builder(df, *args, **kwargs):
"""Access specified date-time property of the passed frame."""
prop_val = getattr(df.squeeze(axis=1).dt, property_name)
squeezed_df = df.squeeze(axis=1)
if isinstance(squeezed_df, pandas.DataFrame) and len(squeezed_df.columns) == 0:
return squeezed_df
assert isinstance(squeezed_df, pandas.Series)
prop_val = getattr(squeezed_df.dt, property_name)
if isinstance(prop_val, pandas.Series):
return prop_val.to_frame()
elif isinstance(prop_val, pandas.DataFrame):
Expand Down
16 changes: 16 additions & 0 deletions modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1733,6 +1733,22 @@ def test_dt(timezone):
df_equals(modin_series.dt.end_time, pandas_series.dt.end_time)
df_equals(modin_series.dt.to_timestamp(), pandas_series.dt.to_timestamp())

def dt_with_empty_partition(lib):
# For context, see https://github.com/modin-project/modin/issues/5112
df_a = lib.DataFrame({"A": [lib.to_datetime("26/10/2020")]})
df_b = lib.DataFrame({"B": [lib.to_datetime("27/10/2020")]})
df = lib.concat([df_a, df_b], axis=1)
eval_result = df.eval("B - A", engine="python")
# BaseOnPython had a single partition after the concat, and it
# maintains that partition after eval. In other execution modes,
# eval() should re-split the result into two column partitions,
# one of which is empty.
if isinstance(df, pd.DataFrame) and get_current_execution() != "BaseOnPython":
assert eval_result._query_compiler._modin_frame._partitions.shape == (1, 2)
return eval_result.dt.days

eval_general(pd, pandas, dt_with_empty_partition)


@pytest.mark.parametrize(
"data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys
Expand Down

0 comments on commit f492ba9

Please sign in to comment.