Skip to content

Commit

Permalink
dealing with empty partitions in broadcast_apply_full_axis
Browse files Browse the repository at this point in the history
  • Loading branch information
arunjose696 committed Feb 19, 2024
1 parent 323ad9c commit 1202f53
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
13 changes: 12 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3348,10 +3348,21 @@ def broadcast_apply_full_axis(
PandasDataframe
New Modin DataFrame.
"""

def get_partitions(df):
"""Deal with the corner case if the "other" dataframe has no partitions."""
if df._partitions.size > 0:
return df._partitions
else:
empty_partition = self._partition_mgr_cls.create_partition_from_data(
pandas.DataFrame(index=df.index, columns=df.columns)
)
return empty_partition

if other is not None:
if not isinstance(other, list):
other = [other]
other = [o._partitions for o in other] if len(other) else None
other = [get_partitions(o) for o in other] if len(other) else None

if apply_indices is not None:
numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)
Expand Down
21 changes: 19 additions & 2 deletions modin/core/dataframe/pandas/partitioning/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,23 @@ def preprocess_func(cls, map_func):

# END Abstract Methods

@classmethod
def create_partition_from_data(cls, data):
"""
Create NumPy array of partitions that wrapps the given data.
Parameters
----------
data : pandas.DataFrame or pandas.Series
Data that has to be wrapped in partition.
Returns
-------
np.ndarray
A NumPy 2D array of a single partition which contains the data.
"""
return np.array([[cls._partition_class.put(data)]])

@classmethod
def column_partitions(cls, partitions, full_axis=True):
"""
Expand Down Expand Up @@ -1120,8 +1137,8 @@ def to_pandas_remote(df, partition_shape, *dfs):
(df,) + dfs, partition_shape, called_from_remote=True
)

if partitions.size == 0:
return np.array([[]])
if partitions.size <= 1:
return partitions

preprocessed_func = cls.preprocess_func(to_pandas_remote)
partition_shape = partitions.shape
Expand Down
14 changes: 14 additions & 0 deletions modin/pandas/test/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,20 @@ def test_merge(test_data, test_data2):
modin_df.merge("Non-valid type")


def test_merge_empty():
data = np.random.uniform(0, 100, size=(2**6, 2**6))
pandas_df = pandas.DataFrame(data)
pandas_df2 = pandas_df.iloc[:0]
modin_df = pd.DataFrame(data)
modin_df_2 = modin_df.iloc[:0]
modin_result = pd.merge(
modin_df,
modin_df_2,
)
pandas_result = pandas.merge(pandas_df, pandas_df2)
df_equals(modin_result, pandas_result)


def test_merge_with_mi_columns():
modin_df1, pandas_df1 = create_test_dfs(
{
Expand Down

0 comments on commit 1202f53

Please sign in to comment.