Remove deprecated boolean masks for DataFrame.__getitem__ (#4342)

pola-rs · Aug 9, 2022 · c03fbd3 · c03fbd3
1 parent 7af0082
commit c03fbd3
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 53 deletions.
diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -24,7 +24,6 @@
 from polars import internals as pli
 from polars._html import NotebookFormatter
 from polars.datatypes import (
-    Boolean,
     ColumnsType,
     DataType,
     Int8,
@@ -53,7 +52,6 @@
     _process_null_values,
     format_path,
     handle_projection_columns,
-    is_bool_sequence,
     is_int_sequence,
     is_str_sequence,
     range_to_slice,
@@ -117,10 +115,8 @@
     # MultiRowSelector indexes into the vertical axis and
     # MultiColSelector indexes into the horizontal axis
     # NOTE: wrapping these as strings is necessary for Python <3.10
-    MultiRowSelector: TypeAlias = "slice | range | list[int] | list[bool] | pli.Series"
-    MultiColSelector: TypeAlias = (
-        "slice | range | list[int] | list[bool] | list[str] | pli.Series"
-    )
+    MultiRowSelector: TypeAlias = "slice | range | list[int] | pli.Series"
+    MultiColSelector: TypeAlias = "slice | range | list[int] | list[str] | pli.Series"
 
 
 def wrap_df(df: PyDataFrame) -> DataFrame:
@@ -1714,21 +1710,6 @@ def __getitem__(
                     df = self.__getitem__(self.columns[col_selection])
                     return df[row_selection]
 
-                # slice and boolean mask
-                # df[:2, [True, False, True]]
-                if isinstance(col_selection, (Sequence, pli.Series)):
-                    if (
-                        isinstance(col_selection[0], bool)
-                        or isinstance(col_selection, pli.Series)
-                        and col_selection.dtype() == Boolean
-                    ):
-                        df = self.__getitem__(row_selection)
-                        select = []
-                        for col, valid in zip(df.columns, col_selection):
-                            if valid:
-                                select.append(col)
-                        return df.select(select)
-
                 # single slice
                 # df[:, unknown]
                 series = self.__getitem__(col_selection)
@@ -1795,27 +1776,18 @@ def __getitem__(
                 )
             if isinstance(item[0], str):
                 return self._from_pydf(self._df.select(item))
-            if item.dtype == bool:
-                warnings.warn(
-                    "index notation '[]' is deprecated for boolean masks. Consider"
-                    " using 'filter'.",
-                    DeprecationWarning,
-                )
-                return self._from_pydf(self._df.filter(pli.Series("", item).inner()))
 
         if is_str_sequence(item, allow_str=False):
             # select multiple columns
             # df[["foo", "bar"]]
             return self._from_pydf(self._df.select(item))
-        elif is_bool_sequence(item) or is_int_sequence(item):
+        elif is_int_sequence(item):
             item = pli.Series("", item)  # fall through to next if isinstance
 
         if isinstance(item, pli.Series):
             dtype = item.dtype
             if dtype == Utf8:
                 return self._from_pydf(self._df.select(item))
-            if dtype == Boolean:
-                return self._from_pydf(self._df.filter(item.inner()))
             if dtype == UInt32:
                 return self._from_pydf(self._df.take_with_series(item.inner()))
             if dtype in {UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64}:
@@ -2668,7 +2640,7 @@ def drop_nulls(self: DF, subset: str | list[str] | None = None) -> DF:
 
         Drop a column if all values are null:
 
-        >>> df[:, [not (s.null_count() == df.height) for s in df]]
+        >>> df[[s.name for s in df if not (s.null_count() == df.height)]]
         shape: (4, 2)
         ┌──────┬──────┐
         │ b    ┆ c    │

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -87,8 +87,8 @@ def test_selection() -> None:
     assert df.get_column("a").to_list() == [1, 2, 3]
 
     # select columns by mask
-    assert df[:2, [True, False, False]].shape == (2, 1)
-    assert df[:2, pl.Series([True, False, False])].shape == (2, 1)
+    assert df[:2, :1].shape == (2, 1)
+    assert df[:2, "a"].shape == (2, 1)
 
     # column selection by string(s) in first dimension
     assert df["a"].to_list() == [1, 2, 3]
@@ -111,9 +111,6 @@ def test_selection() -> None:
         pl.DataFrame({"a": [3, 2], "b": [3.0, 2.0], "c": ["c", "b"]})
     )
 
-    assert df[[True, False, True]].frame_equal(
-        pl.DataFrame({"a": [1, 3], "b": [1.0, 3.0], "c": ["a", "c"]})
-    )
     assert df[["a", "b"]].columns == ["a", "b"]
     assert df[[1, 2], [1, 2]].frame_equal(
         pl.DataFrame({"b": [2.0, 3.0], "c": ["b", "c"]})
@@ -122,9 +119,6 @@ def test_selection() -> None:
     assert df[1, 1] == 2.0
     assert df[2, 0] == 3
 
-    assert df[[True, False, True], "b"].shape == (2, 1)
-    assert df[[True, False, False], ["a", "b"]].shape == (1, 2)
-
     assert df[[0, 1], "b"].shape == (2, 1)
     assert df[[2], ["a", "b"]].shape == (1, 2)
     assert df.to_series(0).name == "a"
@@ -1644,12 +1638,6 @@ def test_get_item() -> None:
     with pytest.raises(ValueError):
         _ = df[np.array([1.0])]
 
-    # using boolean masks with numpy is deprecated
-    with pytest.deprecated_call():
-        assert df[np.array([True, False, False, True])].frame_equal(
-            pl.DataFrame({"a": [1.0, 4.0], "b": [3, 6]})
-        )
-
     # sequences (lists or tuples; tuple only if length != 2)
     # if strings or list of expressions, assumed to be column names
     # if bools, assumed to be a row mask
@@ -1659,9 +1647,6 @@ def test_get_item() -> None:
     assert df[[1, -4, -1, 2, 1]].frame_equal(
         pl.DataFrame({"a": [2.0, 1.0, 4.0, 3.0, 2.0], "b": [4, 3, 6, 5, 4]})
     )
-    assert df[[False, True, True, False]].frame_equal(
-        pl.DataFrame({"a": [2.0, 3.0], "b": [4, 5]})
-    )
 
     # pl.Series: strings for column selections.
     assert df[pl.Series("", ["a", "b"])].frame_equal(df)
@@ -1687,10 +1672,13 @@ def test_get_item() -> None:
             pl.DataFrame({"a": [4.0, 1.0, 2.0, 3.0, 4.0, 1.0], "b": [6, 3, 4, 5, 6, 3]})
         )
 
-    # pl.Series: boolean masks for row selection.
-    assert df[pl.Series("", [False, True, True, False])].frame_equal(
-        pl.DataFrame({"a": [2.0, 3.0], "b": [4, 5]})
-    )
+    # Boolean masks not supported
+    with pytest.raises(ValueError):
+        df[np.array([True, False, True])]
+    with pytest.raises(ValueError):
+        df[[True, False, True], [False, True]]  # type: ignore[index]
+    with pytest.raises(ValueError):
+        df[pl.Series([True, False, True]), "b"]
 
 
 @pytest.mark.parametrize("as_series,inner_dtype", [(True, pl.Series), (False, list)])