Skip to content

Commit

Permalink
FIX-#3702: add proper check for none-slice at df.__getitem__ (#3786)
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
Co-authored-by: Devin Petersohn <devin-petersohn@users.noreply.github.com>
  • Loading branch information
dchigarev and devin-petersohn committed Dec 10, 2021
1 parent cf426c4 commit 7e85c5d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 7 deletions.
8 changes: 2 additions & 6 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
find_common_type_cat as find_common_type,
)
from modin.pandas.indexing import is_range_like
from modin.pandas.utils import is_full_grab_slice


class PandasDataframe(object):
Expand Down Expand Up @@ -954,12 +955,7 @@ def _get_dict_of_block_index(self, axis, indices):
if isinstance(indices, slice) or (is_range_like(indices) and indices.step == 1):
# Converting range-like indexer to slice
indices = slice(indices.start, indices.stop, indices.step)
# Detecting full-axis grab
if (
indices.start in (None, 0)
and indices.step in (None, 1)
and (indices.stop is None or indices.stop >= len(self.axes[axis]))
):
if is_full_grab_slice(indices, sequence_len=len(self.axes[axis])):
return OrderedDict(
zip(
range(self._partitions.shape[axis]),
Expand Down
7 changes: 6 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import warnings
import pickle as pkl

from .utils import is_full_grab_slice
from modin.utils import try_cast_to_pandas, _inherit_docstrings
from modin.error_message import ErrorMessage
from modin.pandas.utils import is_scalar
Expand Down Expand Up @@ -2996,7 +2997,11 @@ def _getitem_slice(self, key: slice):
modin.pandas.BasePandasDataset
Selected rows.
"""
if key.start is None and key.stop is None:
if is_full_grab_slice(
key,
# Avoid triggering shape computation for lazy executions
sequence_len=(None if self._query_compiler.lazy_execution else len(self)),
):
return self.copy()
return self.iloc[key]

Expand Down
1 change: 1 addition & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,7 @@ def test___getitem__(data):
(-3, -1),
(1, -1, 2),
(-1, 1, -1),
(None, None, 2),
]

# slice test
Expand Down
1 change: 1 addition & 0 deletions modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ def test___getitem__(data):
pandas_series = pandas.Series(list(range(1000)))
df_equals(modin_series[:30], pandas_series[:30])
df_equals(modin_series[modin_series > 500], pandas_series[pandas_series > 500])
df_equals(modin_series[::2], pandas_series[::2])

# Test empty series
df_equals(pd.Series([])[:30], pandas.Series([])[:30])
Expand Down
29 changes: 29 additions & 0 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,35 @@ def is_scalar(obj):
return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)


def is_full_grab_slice(slc, sequence_len=None):
"""
Check that the passed slice grabs the whole sequence.
Parameters
----------
slc : slice
Slice object to check.
sequence_len : int, optional
Length of the sequence to index with the passed `slc`.
If not specified the function won't be able to check whether
``slc.stop`` is equal or greater than the sequence length to
consider `slc` to be a full-grab, and so, only slices with
``.stop is None`` are considered to be a full-grab.
Returns
-------
bool
"""
assert isinstance(slc, slice), "slice object required"
return (
slc.start in (None, 0)
and slc.step in (None, 1)
and (
slc.stop is None or (sequence_len is not None and slc.stop >= sequence_len)
)
)


def from_modin_frame_to_mi(df, sortorder=None, names=None):
"""
Make a pandas.MultiIndex from a DataFrame.
Expand Down

0 comments on commit 7e85c5d

Please sign in to comment.