Backport PR #41266: API: make hide_columns and hide_index have a …

…consistent signature and function in `Styler` (#42041) Co-authored-by: attack68 <24256554+attack68@users.noreply.github.com>
pandas-dev · Jun 16, 2021 · 7488d8e · 7488d8e
1 parent 05cfa0d
commit 7488d8e
Show file tree

Hide file tree

Showing 3 changed files with 235 additions and 68 deletions.
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -31,7 +31,10 @@
 from pandas.util._decorators import doc
 
 import pandas as pd
-from pandas import RangeIndex
+from pandas import (
+    IndexSlice,
+    RangeIndex,
+)
 from pandas.api.types import is_list_like
 from pandas.core import generic
 import pandas.core.common as com
@@ -726,7 +729,7 @@ def to_latex(
             self.data.columns = RangeIndex(stop=len(self.data.columns))
             numeric_cols = self.data._get_numeric_data().columns.to_list()
             self.data.columns = _original_columns
-            column_format = "" if self.hidden_index else "l" * self.data.index.nlevels
+            column_format = "" if self.hide_index_ else "l" * self.data.index.nlevels
             for ci, _ in enumerate(self.data.columns):
                 if ci not in self.hidden_columns:
                     column_format += (
@@ -971,7 +974,7 @@ def _copy(self, deepcopy: bool = False) -> Styler:
         )
 
         styler.uuid = self.uuid
-        styler.hidden_index = self.hidden_index
+        styler.hide_index_ = self.hide_index_
 
         if deepcopy:
             styler.ctx = copy.deepcopy(self.ctx)
@@ -1010,7 +1013,7 @@ def clear(self) -> None:
         self.cell_context.clear()
         self._todo.clear()
 
-        self.hidden_index = False
+        self.hide_index_ = False
         self.hidden_columns = []
         # self.format and self.table_styles may be dependent on user
         # input in self.__init__()
@@ -1141,7 +1144,7 @@ def _applymap(
     ) -> Styler:
         func = partial(func, **kwargs)  # applymap doesn't take kwargs?
         if subset is None:
-            subset = pd.IndexSlice[:]
+            subset = IndexSlice[:]
         subset = non_reducing_slice(subset)
         result = self.data.loc[subset].applymap(func)
         self._update_ctx(result)
@@ -1556,37 +1559,169 @@ def set_na_rep(self, na_rep: str) -> StylerRenderer:
         self.na_rep = na_rep
         return self.format(na_rep=na_rep, precision=self.precision)
 
-    def hide_index(self) -> Styler:
+    def hide_index(self, subset: Subset | None = None) -> Styler:
         """
-        Hide any indices from rendering.
+        Hide the entire index, or specific keys in the index from rendering.
+
+        This method has dual functionality:
+
+          - if ``subset`` is ``None`` then the entire index will be hidden whilst
+            displaying all data-rows.
+          - if a ``subset`` is given then those specific rows will be hidden whilst the
+            index itself remains visible.
+
+        .. versionchanged:: 1.3.0
+
+        Parameters
+        ----------
+        subset : label, array-like, IndexSlice, optional
+            A valid 1d input or single key along the index axis within
+            `DataFrame.loc[<subset>, :]`, to limit ``data`` to *before* applying
+            the function.
 
         Returns
         -------
         self : Styler
+
+        See Also
+        --------
+        Styler.hide_columns: Hide the entire column headers row, or specific columns.
+
+        Examples
+        --------
+        Simple application hiding specific rows:
+
+        >>> df = pd.DataFrame([[1,2], [3,4], [5,6]], index=["a", "b", "c"])
+        >>> df.style.hide_index(["a", "b"])
+             0    1
+        c    5    6
+
+        Hide the index and retain the data values:
+
+        >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]])
+        >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx)
+        >>> df.style.format("{:.1f}").hide_index()
+                         x                    y
+           a      b      c      a      b      c
+         0.1    0.0    0.4    1.3    0.6   -1.4
+         0.7    1.0    1.3    1.5   -0.0   -0.2
+         1.4   -0.8    1.6   -0.2   -0.4   -0.3
+         0.4    1.0   -0.2   -0.8   -1.2    1.1
+        -0.6    1.2    1.8    1.9    0.3    0.3
+         0.8    0.5   -0.3    1.2    2.2   -0.8
+
+        Hide specific rows but retain the index:
+
+        >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"]))
+                                 x                    y
+                   a      b      c      a      b      c
+        x   b    0.7    1.0    1.3    1.5   -0.0   -0.2
+        y   b   -0.6    1.2    1.8    1.9    0.3    0.3
+
+        Hide specific rows and the index:
+
+        >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"]))
+        ...     .hide_index()
+                         x                    y
+           a      b      c      a      b      c
+         0.7    1.0    1.3    1.5   -0.0   -0.2
+        -0.6    1.2    1.8    1.9    0.3    0.3
         """
-        self.hidden_index = True
+        if subset is None:
+            self.hide_index_ = True
+        else:
+            subset_ = IndexSlice[subset, :]  # new var so mypy reads not Optional
+            subset = non_reducing_slice(subset_)
+            hide = self.data.loc[subset]
+            hrows = self.index.get_indexer_for(hide.index)
+            # error: Incompatible types in assignment (expression has type
+            # "ndarray", variable has type "Sequence[int]")
+            self.hidden_rows = hrows  # type: ignore[assignment]
         return self
 
-    def hide_columns(self, subset: Subset) -> Styler:
+    def hide_columns(self, subset: Subset | None = None) -> Styler:
         """
-        Hide columns from rendering.
+        Hide the column headers or specific keys in the columns from rendering.
+
+        This method has dual functionality:
+
+          - if ``subset`` is ``None`` then the entire column headers row will be hidden
+            whilst the data-values remain visible.
+          - if a ``subset`` is given then those specific columns, including the
+            data-values will be hidden, whilst the column headers row remains visible.
+
+        .. versionchanged:: 1.3.0
 
         Parameters
         ----------
-        subset : label, array-like, IndexSlice
-            A valid 1d input or single key along the appropriate axis within
-            `DataFrame.loc[]`, to limit ``data`` to *before* applying the function.
+        subset : label, array-like, IndexSlice, optional
+            A valid 1d input or single key along the columns axis within
+            `DataFrame.loc[:, <subset>]`, to limit ``data`` to *before* applying
+            the function.
 
         Returns
         -------
         self : Styler
+
+        See Also
+        --------
+        Styler.hide_index: Hide the entire index, or specific keys in the index.
+
+        Examples
+        --------
+        Simple application hiding specific columns:
+
+        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+        >>> df.style.hide_columns(["a", "b"])
+             c
+        0    3
+        1    6
+
+        Hide column headers and retain the data values:
+
+        >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]])
+        >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx)
+        >>> df.style.format("{:.1f}").hide_columns()
+        x   d    0.1    0.0    0.4    1.3    0.6   -1.4
+            e    0.7    1.0    1.3    1.5   -0.0   -0.2
+            f    1.4   -0.8    1.6   -0.2   -0.4   -0.3
+        y   d    0.4    1.0   -0.2   -0.8   -1.2    1.1
+            e   -0.6    1.2    1.8    1.9    0.3    0.3
+            f    0.8    0.5   -0.3    1.2    2.2   -0.8
+
+        Hide specific columns but retain the column headers:
+
+        >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"]))
+                   x      y
+                   b      b
+        x   a    0.0    0.6
+            b    1.0   -0.0
+            c   -0.8   -0.4
+        y   a    1.0   -1.2
+            b    1.2    0.3
+            c    0.5    2.2
+
+        Hide specific columns and the column headers:
+
+        >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"]))
+        ...     .hide_columns()
+        x   a    0.0    0.6
+            b    1.0   -0.0
+            c   -0.8   -0.4
+        y   a    1.0   -1.2
+            b    1.2    0.3
+            c    0.5    2.2
         """
-        subset = non_reducing_slice(subset)
-        hidden_df = self.data.loc[subset]
-        hcols = self.columns.get_indexer_for(hidden_df.columns)
-        # error: Incompatible types in assignment (expression has type
-        # "ndarray", variable has type "Sequence[int]")
-        self.hidden_columns = hcols  # type: ignore[assignment]
+        if subset is None:
+            self.hide_columns_ = True
+        else:
+            subset_ = IndexSlice[:, subset]  # new var so mypy reads not Optional
+            subset = non_reducing_slice(subset_)
+            hide = self.data.loc[subset]
+            hcols = self.columns.get_indexer_for(hide.columns)
+            # error: Incompatible types in assignment (expression has type
+            # "ndarray", variable has type "Sequence[int]")
+            self.hidden_columns = hcols  # type: ignore[assignment]
         return self
 
     # -----------------------------------------------------------------------

diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -98,7 +98,9 @@ def __init__(
         self.cell_ids = cell_ids
 
         # add rendering variables
-        self.hidden_index: bool = False
+        self.hide_index_: bool = False  # bools for hiding col/row headers
+        self.hide_columns_: bool = False
+        self.hidden_rows: Sequence[int] = []  # sequence for specific hidden rows/cols
         self.hidden_columns: Sequence[int] = []
         self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
         self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str)
@@ -298,55 +300,56 @@ def _translate_header(
 
         head = []
         # 1) column headers
-        for r in range(self.data.columns.nlevels):
-            index_blanks = [
-                _element("th", blank_class, blank_value, not self.hidden_index)
-            ] * (self.data.index.nlevels - 1)
-
-            name = self.data.columns.names[r]
-            column_name = [
-                _element(
-                    "th",
-                    f"{blank_class if name is None else index_name_class} level{r}",
-                    name if name is not None else blank_value,
-                    not self.hidden_index,
-                )
-            ]
-
-            if clabels:
-                column_headers = [
+        if not self.hide_columns_:
+            for r in range(self.data.columns.nlevels):
+                index_blanks = [
+                    _element("th", blank_class, blank_value, not self.hide_index_)
+                ] * (self.data.index.nlevels - 1)
+
+                name = self.data.columns.names[r]
+                column_name = [
                     _element(
                         "th",
-                        f"{col_heading_class} level{r} col{c}",
-                        value,
-                        _is_visible(c, r, col_lengths),
-                        attributes=(
-                            f'colspan="{col_lengths.get((r, c), 0)}"'
-                            if col_lengths.get((r, c), 0) > 1
-                            else ""
-                        ),
+                        f"{blank_class if name is None else index_name_class} level{r}",
+                        name if name is not None else blank_value,
+                        not self.hide_index_,
                     )
-                    for c, value in enumerate(clabels[r])
                 ]
 
-                if len(self.data.columns) > max_cols:
-                    # add an extra column with `...` value to indicate trimming
-                    column_headers.append(
+                if clabels:
+                    column_headers = [
                         _element(
                             "th",
-                            f"{col_heading_class} level{r} {trimmed_col_class}",
-                            "...",
-                            True,
-                            attributes="",
+                            f"{col_heading_class} level{r} col{c}",
+                            value,
+                            _is_visible(c, r, col_lengths),
+                            attributes=(
+                                f'colspan="{col_lengths.get((r, c), 0)}"'
+                                if col_lengths.get((r, c), 0) > 1
+                                else ""
+                            ),
                         )
-                    )
-                head.append(index_blanks + column_name + column_headers)
+                        for c, value in enumerate(clabels[r])
+                    ]
+
+                    if len(self.data.columns) > max_cols:
+                        # add an extra column with `...` value to indicate trimming
+                        column_headers.append(
+                            _element(
+                                "th",
+                                f"{col_heading_class} level{r} {trimmed_col_class}",
+                                "...",
+                                True,
+                                attributes="",
+                            )
+                        )
+                    head.append(index_blanks + column_name + column_headers)
 
         # 2) index names
         if (
             self.data.index.names
             and com.any_not_none(*self.data.index.names)
-            and not self.hidden_index
+            and not self.hide_index_
         ):
             index_names = [
                 _element(
@@ -412,7 +415,9 @@ def _translate_body(
             The associated HTML elements needed for template rendering.
         """
         # for sparsifying a MultiIndex
-        idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows)
+        idx_lengths = _get_level_lengths(
+            self.index, sparsify_index, max_rows, self.hidden_rows
+        )
 
         rlabels = self.data.index.tolist()[:max_rows]  # slice to allow trimming
         if self.data.index.nlevels == 1:
@@ -426,7 +431,7 @@ def _translate_body(
                         "th",
                         f"{row_heading_class} level{c} {trimmed_row_class}",
                         "...",
-                        not self.hidden_index,
+                        not self.hide_index_,
                         attributes="",
                     )
                     for c in range(self.data.index.nlevels)
@@ -463,7 +468,7 @@ def _translate_body(
                     "th",
                     f"{row_heading_class} level{c} row{r}",
                     value,
-                    (_is_visible(r, c, idx_lengths) and not self.hidden_index),
+                    (_is_visible(r, c, idx_lengths) and not self.hide_index_),
                     id=f"level{c}_row{r}",
                     attributes=(
                         f'rowspan="{idx_lengths.get((c, r), 0)}"'
@@ -497,7 +502,7 @@ def _translate_body(
                     "td",
                     f"{data_class} row{r} col{c}{cls}",
                     value,
-                    (c not in self.hidden_columns),
+                    (c not in self.hidden_columns and r not in self.hidden_rows),
                     attributes="",
                     display_value=self._display_funcs[(r, c)](value),
                 )
@@ -528,7 +533,7 @@ def _translate_latex(self, d: dict) -> None:
         d["head"] = [[col for col in row if col["is_visible"]] for row in d["head"]]
         body = []
         for r, row in enumerate(d["body"]):
-            if self.hidden_index:
+            if self.hide_index_:
                 row_body_headers = []
             else:
                 row_body_headers = [
@@ -843,7 +848,13 @@ def _get_level_lengths(
                 last_label = j
                 lengths[(i, last_label)] = 0
             elif j not in hidden_elements:
-                lengths[(i, last_label)] += 1
+                if lengths[(i, last_label)] == 0:
+                    # if the previous iteration was first-of-kind but hidden then offset
+                    last_label = j
+                    lengths[(i, last_label)] = 1
+                else:
+                    # else add to previous iteration
+                    lengths[(i, last_label)] += 1
 
     non_zero_lengths = {
         element: length for element, length in lengths.items() if length >= 1