Skip to content

Commit

Permalink
FIX-modin-project#6935: Fix Merge failed when right operand is an emp…
Browse files Browse the repository at this point in the history
…ty dataframe (modin-project#6941)

Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
Signed-off-by: arunjose696 <arunjose696@gmail.com>
  • Loading branch information
2 people authored and tochigiv committed Feb 22, 2024
1 parent 9eaedb9 commit d190e1e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 1 deletion.
21 changes: 20 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3203,6 +3203,25 @@ def _prepare_frame_to_broadcast(self, axis, indices, broadcast_all):
passed_len += len(internal)
return result_dict

def _extract_partitions(self):
"""
Extract partitions if partitions are present.
If partitions are empty return a dummy partition with empty data but
index and columns of current dataframe.
Returns
-------
np.ndarray
NumPy array with extracted partitions.
"""
if self._partitions.size > 0:
return self._partitions
else:
return self._partition_mgr_cls.create_partition_from_metadata(
index=self.index, columns=self.columns
)

@lazy_metadata_decorator(apply_axis="both")
def broadcast_apply_select_indices(
self,
Expand Down Expand Up @@ -3351,7 +3370,7 @@ def broadcast_apply_full_axis(
if other is not None:
if not isinstance(other, list):
other = [other]
other = [o._partitions for o in other] if len(other) else None
other = [o._extract_partitions() for o in other] if len(other) else None

if apply_indices is not None:
numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)
Expand Down
20 changes: 20 additions & 0 deletions modin/core/dataframe/pandas/partitioning/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,24 @@ def preprocess_func(cls, map_func):

# END Abstract Methods

@classmethod
def create_partition_from_metadata(cls, **metadata):
"""
Create NumPy array of partitions that holds an empty dataframe with given metadata.
Parameters
----------
**metadata : dict
Metadata that has to be wrapped in a partition.
Returns
-------
np.ndarray
A NumPy 2D array of a single partition which contains the data.
"""
metadata_dataframe = pandas.DataFrame(**metadata)
return np.array([[cls._partition_class.put(metadata_dataframe)]])

@classmethod
def column_partitions(cls, partitions, full_axis=True):
"""
Expand Down Expand Up @@ -1113,6 +1131,8 @@ def combine(cls, partitions):
np.ndarray
A NumPy 2D array of a single partition.
"""
if partitions.size <= 1:
return partitions

def to_pandas_remote(df, partition_shape, *dfs):
"""Copy of ``cls.to_pandas()`` method adapted for a remote function."""
Expand Down
7 changes: 7 additions & 0 deletions modin/pandas/test/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,13 @@ def test_merge(test_data, test_data2):
modin_df.merge("Non-valid type")


def test_merge_empty():
data = np.random.uniform(0, 100, size=(2**6, 2**6))
pandas_df = pandas.DataFrame(data)
modin_df = pd.DataFrame(data)
eval_general(modin_df, pandas_df, lambda df: df.merge(df.iloc[:0]))


def test_merge_with_mi_columns():
modin_df1, pandas_df1 = create_test_dfs(
{
Expand Down

0 comments on commit d190e1e

Please sign in to comment.