From 280da385ff69755b924b3508c3b5a458c35164bb Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 02:50:02 +0700 Subject: [PATCH 01/22] Refactor to_latex using polymorphism Previously there was a complicated logic in multiple methods for either longtable or regular table. This commit implements - ``LatexTableFormatter``, - ``LatexTabularFormatter``, - ``LatexLongTableFormatter``, derived from ``LatexFormatter``, based on ``LatexFormatterAbstract``. Each of the derived classes implements its own methods for writing - beginning of the table; - caption and labels; - separators; - end of the table. LatexFormatter changes ---------------------- - Make the process of creating tables more readable. - Drop escape and bold_rows attr --- pandas/io/formats/format.py | 28 ++- pandas/io/formats/latex.py | 456 ++++++++++++++++++++++-------------- 2 files changed, 307 insertions(+), 177 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 81990b3d505e1..7b200869fcfb4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -936,19 +936,35 @@ def to_latex( """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ - from pandas.io.formats.latex import LatexFormatter - - return LatexFormatter( - self, - column_format=column_format, + latex_formatter = self._create_latex_formatter( longtable=longtable, + column_format=column_format, multicolumn=multicolumn, multicolumn_format=multicolumn_format, multirow=multirow, caption=caption, label=label, position=position, - ).get_result(buf=buf, encoding=encoding) + ) + return latex_formatter.get_result(buf=buf, encoding=encoding) + + def _create_latex_formatter(self, **kwargs): + """Create concrete instance of LatexFormatter.""" + from pandas.io.formats.latex import ( + LatexLongTableFormatter, + LatexTableFormatter, + LatexTabularFormatter, + ) + + is_longtable = kwargs.pop("longtable") + is_table = any( + [kwargs.get("caption"), kwargs.get("label"), kwargs.get("position")] + ) + if is_longtable: + return LatexLongTableFormatter(self, **kwargs) + if is_table: + return LatexTableFormatter(self, **kwargs) + return LatexTabularFormatter(self, **kwargs) def _format_col(self, i: int) -> List[str]: frame = self.tr_frame diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 715b8bbdf5672..23a29a39e133e 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -1,6 +1,7 @@ """ Module for formatting output data in Latex. """ +from abc import ABC, abstractmethod from typing import IO, List, Optional, Tuple import numpy as np @@ -10,7 +11,51 @@ from pandas.io.formats.format import DataFrameFormatter, TableFormatter -class LatexFormatter(TableFormatter): +class LatexFormatterAbstract(ABC): + def _compose_string(self) -> str: + elements = [ + self._compose_env_begin(), + self._compose_top_separator(), + self._compose_header(), + self._compose_middle_separator(), + self._compose_env_body(), + self._compose_bottom_separator(), + self._compose_env_end(), + ] + result = "\n".join([item for item in elements if item]) + trailing_newline = "\n" + return result + trailing_newline + + @abstractmethod + def _compose_env_begin(self): + pass + + @abstractmethod + def _compose_top_separator(self): + pass + + @abstractmethod + def _compose_header(self): + pass + + @abstractmethod + def _compose_middle_separator(self): + pass + + @abstractmethod + def _compose_env_body(self): + pass + + @abstractmethod + def _compose_bottom_separator(self): + pass + + @abstractmethod + def _compose_env_end(self): + pass + + +class LatexFormatter(TableFormatter, LatexFormatterAbstract): """ Used to render a DataFrame to a LaTeX tabular/longtable environment output. @@ -20,8 +65,6 @@ class LatexFormatter(TableFormatter): column_format : str, default None The columns format as specified in `LaTeX table format `__ e.g 'rcl' for 3 columns - longtable : boolean, default False - Use a longtable environment instead of tabular. See Also -------- @@ -32,7 +75,6 @@ def __init__( self, formatter: DataFrameFormatter, column_format: Optional[str] = None, - longtable: bool = False, multicolumn: bool = False, multicolumn_format: Optional[str] = None, multirow: bool = False, @@ -42,24 +84,24 @@ def __init__( ): self.fmt = formatter self.frame = self.fmt.frame - self.bold_rows = self.fmt.bold_rows self.column_format = column_format - self.longtable = longtable self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format self.multirow = multirow self.caption = caption self.label = label - self.escape = self.fmt.escape self.position = position - self._table_float = any(p is not None for p in (caption, label, position)) def write_result(self, buf: IO[str]) -> None: """ Render a DataFrame to a LaTeX tabular, longtable, or table/tabular environment output. """ - # string representation of the columns + buf.write(self._compose_string()) + + @property + def strcols(self): + """String representation of the columns.""" if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = ( f"Empty {type(self.frame).__name__}\n" @@ -70,12 +112,6 @@ def write_result(self, buf: IO[str]) -> None: else: strcols = self.fmt._to_str_columns() - def get_col_type(dtype): - if issubclass(dtype.type, np.number): - return "r" - else: - return "l" - # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): out = self.frame.index.format( @@ -107,89 +143,171 @@ def pad_empties(x): # Get rid of old multiindex column and add new ones strcols = out + strcols[1:] + return strcols - if self.column_format is None: - dtypes = self.frame.dtypes._values - column_format = "".join(map(get_col_type, dtypes)) - if self.fmt.index: - index_format = "l" * self.frame.index.nlevels - column_format = index_format + column_format - elif not isinstance(self.column_format, str): # pragma: no cover + @property + def strrows(self): + return list(zip(*self.strcols)) + + @property + def column_format(self): + return self._column_format + + @column_format.setter + def column_format(self, input_column_format): + if input_column_format is None: + self._column_format = ( + self._get_index_format() + self._get_column_format_based_on_dtypes() + ) + elif not isinstance(input_column_format, str): # pragma: no cover raise AssertionError( - f"column_format must be str or unicode, not {type(column_format)}" + f"column_format must be str or unicode, " + f"not {type(input_column_format)}" ) else: - column_format = self.column_format + self._column_format = input_column_format + + def _get_column_format_based_on_dtypes(self): + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + return "l" + + dtypes = self.frame.dtypes._values + return "".join(map(get_col_type, dtypes)) + + def _get_index_format(self): + if self.fmt.index: + return "l" * self.frame.index.nlevels + return "" + + @property + def position_macro(self): + return f"[{self.position}]" if self.position else "" + + @property + def caption_macro(self): + return f"\\caption{{{self.caption}}}" if self.caption else "" + + @property + def label_macro(self): + return f"\\label{{{self.label}}}" if self.label else "" + + def _compose_header(self): + if self.fmt.header: + return "\n".join( + [self._compose_row(row_num=row_num) for row_num in range(self._nlevels)] + ) + return "" + + def _compose_top_separator(self): + return "\\toprule" - self._write_tabular_begin(buf, column_format) + def _compose_middle_separator(self): + return "\\midrule" if self._is_separator_required() else "" - buf.write("\\toprule\n") + def _is_header_present(self): + return self.fmt.header - ilevels = self.frame.index.nlevels - clevels = self.frame.columns.nlevels - nlevels = clevels + def _is_body_present(self): + return len(self.strrows) > self._nlevels + + def _is_separator_required(self): + return self._is_header_present() and self._is_body_present() + + @property + def _ilevels(self): + return self.frame.index.nlevels + + @property + def _clevels(self): + return self.frame.columns.nlevels + + @property + def _nlevels(self): + nlevels = self._clevels if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 - strrows = list(zip(*strcols)) + return nlevels + + def _compose_env_body(self): self.clinebuf: List[List[int]] = [] - for i, row in enumerate(strrows): - if i == nlevels and self.fmt.header: - buf.write("\\midrule\n") # End of header - if self.longtable: - buf.write("\\endhead\n") - buf.write("\\midrule\n") - buf.write( - f"\\multicolumn{{{len(row)}}}{{r}}" - "{{Continued on next page}} \\\\\n" - ) - buf.write("\\midrule\n") - buf.write("\\endfoot\n\n") - buf.write("\\bottomrule\n") - buf.write("\\endlastfoot\n") - if self.escape: - # escape backslashes first - crow = [ - ( - x.replace("\\", "\\textbackslash ") - .replace("_", "\\_") - .replace("%", "\\%") - .replace("$", "\\$") - .replace("#", "\\#") - .replace("{", "\\{") - .replace("}", "\\}") - .replace("~", "\\textasciitilde ") - .replace("^", "\\textasciicircum ") - .replace("&", "\\&") - if (x and x != "{}") - else "{}" - ) - for x in row - ] - else: - crow = [x if x else "{}" for x in row] - if self.bold_rows and self.fmt.index: - # bold row labels - crow = [ - f"\\textbf{{{x}}}" - if j < ilevels and x.strip() not in ["", "{}"] - else x - for j, x in enumerate(crow) - ] - if i < clevels and self.fmt.header and self.multicolumn: - # sum up columns to multicolumns - crow = self._format_multicolumn(crow, ilevels) - if i >= nlevels and self.fmt.index and self.multirow and ilevels > 1: - # sum up rows to multirows - crow = self._format_multirow(crow, ilevels, i, strrows) - buf.write(" & ".join(crow)) - buf.write(" \\\\\n") - if self.multirow and i < len(strrows) - 1: - self._print_cline(buf, i, len(strcols)) - - self._write_tabular_end(buf) - - def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]: + header_row_num = self._nlevels if self._is_header_present() else 0 + body_list = [ + self._compose_row(num) + for num, row in enumerate(self.strrows) + if num >= header_row_num + ] + return "\n".join(body_list) + + def _compose_row(self, row_num): + strrows = self.strrows + row = strrows[row_num] + + is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn + + is_multirow = ( + row_num >= self._nlevels + and self.fmt.index + and self.multirow + and self._ilevels > 1 + ) + + is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 + + crow = self._preprocess_row(row) + + if is_multicol: + crow = self._format_multicolumn(crow) + if is_multirow: + crow = self._format_multirow(crow, row_num, strrows) + + lst = [] + lst.append(" & ".join(crow)) + lst.append(" \\\\") + if is_cline_maybe_required: + cline = self._compose_cline(row_num, len(self.strcols)) + lst.append(cline) + return "".join(lst) + + def _preprocess_row(self, row): + if self.fmt.escape: + crow = self._escape_backslashes(row) + else: + crow = [x if x else "{}" for x in row] + if self.fmt.bold_rows and self.fmt.index: + crow = self._convert_to_bold(crow, self._ilevels) + return crow + + @staticmethod + def _escape_backslashes(row): + return [ + ( + x.replace("\\", "\\textbackslash ") + .replace("_", "\\_") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~", "\\textasciitilde ") + .replace("^", "\\textasciicircum ") + .replace("&", "\\&") + if (x and x != "{}") + else "{}" + ) + for x in row + ] + + @staticmethod + def _convert_to_bold(crow, ilevels): + return [ + f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x + for j, x in enumerate(crow) + ] + + def _format_multicolumn(self, row: List[str]) -> List[str]: r""" Combine columns belonging to a group to a single multicolumn entry according to self.multicolumn_format @@ -199,7 +317,7 @@ def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]: will become \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} """ - row2 = list(row[:ilevels]) + row2 = list(row[: self._ilevels]) ncol = 1 coltext = "" @@ -214,7 +332,7 @@ def append_col(): else: row2.append(coltext) - for c in row[ilevels:]: + for c in row[self._ilevels :]: # if next col has text, write the previous if c.strip(): if coltext: @@ -230,7 +348,7 @@ def append_col(): return row2 def _format_multirow( - self, row: List[str], ilevels: int, i: int, rows: List[Tuple[str, ...]] + self, row: List[str], i: int, rows: List[Tuple[str, ...]] ) -> List[str]: r""" Check following rows, whether row should be a multirow @@ -241,7 +359,7 @@ def _format_multirow( b & 0 & \cline{1-2} b & 0 & """ - for j in range(ilevels): + for j in range(self._ilevels): if row[j].strip(): nrow = 1 for r in rows[i + 1 :]: @@ -256,88 +374,84 @@ def _format_multirow( self.clinebuf.append([i + nrow - 1, j + 1]) return row - def _print_cline(self, buf: IO[str], i: int, icol: int) -> None: + def _compose_cline(self, i: int, icol: int) -> str: """ Print clines after multirow-blocks are finished. """ + lst = [] for cl in self.clinebuf: if cl[0] == i: - buf.write(f"\\cline{{{cl[1]:d}-{icol:d}}}\n") - # remove entries that have been written to buffer - self.clinebuf = [x for x in self.clinebuf if x[0] != i] - - def _write_tabular_begin(self, buf, column_format: str): - """ - Write the beginning of a tabular environment or - nested table/tabular environments including caption and label. - - Parameters - ---------- - buf : string or file handle - File path or object. If not specified, the result is returned as - a string. - column_format : str - The columns format as specified in `LaTeX table format - `__ e.g 'rcl' - for 3 columns - """ - if self._table_float: - # then write output in a nested table/tabular or longtable environment - if self.caption is None: - caption_ = "" - else: - caption_ = f"\n\\caption{{{self.caption}}}" - - if self.label is None: - label_ = "" - else: - label_ = f"\n\\label{{{self.label}}}" - - if self.position is None: - position_ = "" - else: - position_ = f"[{self.position}]" - - if self.longtable: - table_ = f"\\begin{{longtable}}{position_}{{{column_format}}}" - tabular_ = "\n" - else: - table_ = f"\\begin{{table}}{position_}\n\\centering" - tabular_ = f"\n\\begin{{tabular}}{{{column_format}}}\n" - - if self.longtable and (self.caption is not None or self.label is not None): - # a double-backslash is required at the end of the line - # as discussed here: - # https://tex.stackexchange.com/questions/219138 - backlash_ = "\\\\" - else: - backlash_ = "" - buf.write(f"{table_}{caption_}{label_}{backlash_}{tabular_}") - else: - if self.longtable: - tabletype_ = "longtable" - else: - tabletype_ = "tabular" - buf.write(f"\\begin{{{tabletype_}}}{{{column_format}}}\n") - - def _write_tabular_end(self, buf): - """ - Write the end of a tabular environment or nested table/tabular - environment. - - Parameters - ---------- - buf : string or file handle - File path or object. If not specified, the result is returned as - a string. - - """ - if self.longtable: - buf.write("\\end{longtable}\n") - else: - buf.write("\\bottomrule\n") - buf.write("\\end{tabular}\n") - if self._table_float: - buf.write("\\end{table}\n") - else: - pass + lst.append(f"\n\\cline{{{cl[1]:d}-{icol:d}}}") + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + return "".join(lst) + + +class LatexTableFormatter(LatexFormatter): + def _compose_env_begin(self): + elements = [ + f"\\begin{{table}}{self.position_macro}", + f"\\centering", + f"{self.caption_macro}", + f"{self.label_macro}", + f"\\begin{{tabular}}{{{self.column_format}}}", + ] + return "\n".join([item for item in elements if item]) + + def _compose_bottom_separator(self): + return "\\bottomrule" + + def _compose_env_end(self): + return "\n".join(["\\end{tabular}", "\\end{table}"]) + + +class LatexTabularFormatter(LatexFormatter): + def _compose_env_begin(self): + return f"\\begin{{tabular}}{{{self.column_format}}}" + + def _compose_bottom_separator(self): + return "\\bottomrule" + + def _compose_env_end(self): + return "\\end{tabular}" + + +class LatexLongTableFormatter(LatexFormatter): + def _compose_env_begin(self): + first_row = ( + f"\\begin{{longtable}}{self.position_macro}" f"{{{self.column_format}}}" + ) + elements = [first_row, f"{self._caption_and_label()}"] + return "\n".join([item for item in elements if item]) + + def _caption_and_label(self): + if not self.caption and not self.label: + return "" + elif self.caption or self.label: + double_backslash = "\\\\" + elements = [f"{self.caption_macro}", f"{self.label_macro}"] + caption_and_label = "\n".join([item for item in elements if item]) + caption_and_label += double_backslash + return caption_and_label + + def _compose_middle_separator(self): + elements = [ + "\\midrule", + "\\endhead", + "\\midrule", + f"\\multicolumn{{{len(self.strcols)}}}{{r}}" + "{{Continued on next page}} \\\\", + "\\midrule", + "\\endfoot\n", + "\\bottomrule", + "\\endlastfoot", + ] + if self._is_separator_required(): + return "\n".join(elements) + return "" + + def _compose_bottom_separator(self): + return "" + + def _compose_env_end(self): + return "\\end{longtable}" From 104f968cb99052378f3b954005d1c4c11b8c6ac2 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 15:00:08 +0700 Subject: [PATCH 02/22] Move strcols from property to attr for performance --- pandas/io/formats/latex.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 23a29a39e133e..fcd59b7b20bbd 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -92,6 +92,9 @@ def __init__( self.label = label self.position = position + self.strcols = self._get_strcols() + self.strrows = list(zip(*self.strcols)) + def write_result(self, buf: IO[str]) -> None: """ Render a DataFrame to a LaTeX tabular, longtable, or table/tabular @@ -99,8 +102,7 @@ def write_result(self, buf: IO[str]) -> None: """ buf.write(self._compose_string()) - @property - def strcols(self): + def _get_strcols(self): """String representation of the columns.""" if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = ( @@ -145,10 +147,6 @@ def pad_empties(x): strcols = out + strcols[1:] return strcols - @property - def strrows(self): - return list(zip(*self.strcols)) - @property def column_format(self): return self._column_format @@ -242,8 +240,7 @@ def _compose_env_body(self): return "\n".join(body_list) def _compose_row(self, row_num): - strrows = self.strrows - row = strrows[row_num] + row = self.strrows[row_num] is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn @@ -261,7 +258,7 @@ def _compose_row(self, row_num): if is_multicol: crow = self._format_multicolumn(crow) if is_multirow: - crow = self._format_multirow(crow, row_num, strrows) + crow = self._format_multirow(crow, row_num) lst = [] lst.append(" & ".join(crow)) @@ -347,9 +344,7 @@ def append_col(): append_col() return row2 - def _format_multirow( - self, row: List[str], i: int, rows: List[Tuple[str, ...]] - ) -> List[str]: + def _format_multirow(self, row: List[str], i: int) -> List[str]: r""" Check following rows, whether row should be a multirow @@ -362,7 +357,7 @@ def _format_multirow( for j in range(self._ilevels): if row[j].strip(): nrow = 1 - for r in rows[i + 1 :]: + for r in self.strrows[i + 1 :]: if not r[j].strip(): nrow += 1 else: From 18bc3bfd68cafb63177d5118471229f7e6f00224 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 15:57:55 +0700 Subject: [PATCH 03/22] Separate row format from LatexFormatting New classes for iterations over headers and body: - RowHeaderIterator; - RowBodyIterator, based on RowCreator. --- pandas/io/formats/latex.py | 429 +++++++++++++++++++------------------ 1 file changed, 226 insertions(+), 203 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index fcd59b7b20bbd..a3cacda4a74e5 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -2,7 +2,7 @@ Module for formatting output data in Latex. """ from abc import ABC, abstractmethod -from typing import IO, List, Optional, Tuple +from typing import IO, List, Optional import numpy as np @@ -11,96 +11,63 @@ from pandas.io.formats.format import DataFrameFormatter, TableFormatter -class LatexFormatterAbstract(ABC): - def _compose_string(self) -> str: - elements = [ - self._compose_env_begin(), - self._compose_top_separator(), - self._compose_header(), - self._compose_middle_separator(), - self._compose_env_body(), - self._compose_bottom_separator(), - self._compose_env_end(), - ] - result = "\n".join([item for item in elements if item]) - trailing_newline = "\n" - return result + trailing_newline - - @abstractmethod - def _compose_env_begin(self): - pass - - @abstractmethod - def _compose_top_separator(self): - pass - - @abstractmethod - def _compose_header(self): - pass - - @abstractmethod - def _compose_middle_separator(self): - pass +class RowCreator: + def __init__(self, *, fmt, multicolumn, multicolumn_format, multirow): + self.fmt = fmt + self.frame = self.fmt.frame + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.clinebuf: List[List[int]] = [] + self.strcols = self._get_strcols() + self.strrows = list(zip(*self.strcols)) - @abstractmethod - def _compose_env_body(self): - pass + def get_strrow(self, row_num, row): + """Get string representation of the row.""" + is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn - @abstractmethod - def _compose_bottom_separator(self): - pass + is_multirow = ( + row_num >= self._nlevels + and self.fmt.index + and self.multirow + and self._ilevels > 1 + ) - @abstractmethod - def _compose_env_end(self): - pass + is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 + crow = self._preprocess_row(row) -class LatexFormatter(TableFormatter, LatexFormatterAbstract): - """ - Used to render a DataFrame to a LaTeX tabular/longtable environment output. + if is_multicol: + crow = self._format_multicolumn(crow) + if is_multirow: + crow = self._format_multirow(crow, row_num) - Parameters - ---------- - formatter : `DataFrameFormatter` - column_format : str, default None - The columns format as specified in `LaTeX table format - `__ e.g 'rcl' for 3 columns + lst = [] + lst.append(" & ".join(crow)) + lst.append(" \\\\") + if is_cline_maybe_required: + cline = self._compose_cline(row_num, len(self.strcols)) + lst.append(cline) + return "".join(lst) - See Also - -------- - HTMLFormatter - """ + @property + def _header_row_num(self): + return self._nlevels if self.fmt.header else 0 - def __init__( - self, - formatter: DataFrameFormatter, - column_format: Optional[str] = None, - multicolumn: bool = False, - multicolumn_format: Optional[str] = None, - multirow: bool = False, - caption: Optional[str] = None, - label: Optional[str] = None, - position: Optional[str] = None, - ): - self.fmt = formatter - self.frame = self.fmt.frame - self.column_format = column_format - self.multicolumn = multicolumn - self.multicolumn_format = multicolumn_format - self.multirow = multirow - self.caption = caption - self.label = label - self.position = position + @property + def _ilevels(self): + return self.frame.index.nlevels - self.strcols = self._get_strcols() - self.strrows = list(zip(*self.strcols)) + @property + def _clevels(self): + return self.frame.columns.nlevels - def write_result(self, buf: IO[str]) -> None: - """ - Render a DataFrame to a LaTeX tabular, longtable, or table/tabular - environment output. - """ - buf.write(self._compose_string()) + @property + def _nlevels(self): + nlevels = self._clevels + if self.fmt.has_index_names and self.fmt.show_index_names: + nlevels += 1 + return nlevels def _get_strcols(self): """String representation of the columns.""" @@ -147,127 +114,6 @@ def pad_empties(x): strcols = out + strcols[1:] return strcols - @property - def column_format(self): - return self._column_format - - @column_format.setter - def column_format(self, input_column_format): - if input_column_format is None: - self._column_format = ( - self._get_index_format() + self._get_column_format_based_on_dtypes() - ) - elif not isinstance(input_column_format, str): # pragma: no cover - raise AssertionError( - f"column_format must be str or unicode, " - f"not {type(input_column_format)}" - ) - else: - self._column_format = input_column_format - - def _get_column_format_based_on_dtypes(self): - def get_col_type(dtype): - if issubclass(dtype.type, np.number): - return "r" - return "l" - - dtypes = self.frame.dtypes._values - return "".join(map(get_col_type, dtypes)) - - def _get_index_format(self): - if self.fmt.index: - return "l" * self.frame.index.nlevels - return "" - - @property - def position_macro(self): - return f"[{self.position}]" if self.position else "" - - @property - def caption_macro(self): - return f"\\caption{{{self.caption}}}" if self.caption else "" - - @property - def label_macro(self): - return f"\\label{{{self.label}}}" if self.label else "" - - def _compose_header(self): - if self.fmt.header: - return "\n".join( - [self._compose_row(row_num=row_num) for row_num in range(self._nlevels)] - ) - return "" - - def _compose_top_separator(self): - return "\\toprule" - - def _compose_middle_separator(self): - return "\\midrule" if self._is_separator_required() else "" - - def _is_header_present(self): - return self.fmt.header - - def _is_body_present(self): - return len(self.strrows) > self._nlevels - - def _is_separator_required(self): - return self._is_header_present() and self._is_body_present() - - @property - def _ilevels(self): - return self.frame.index.nlevels - - @property - def _clevels(self): - return self.frame.columns.nlevels - - @property - def _nlevels(self): - nlevels = self._clevels - if self.fmt.has_index_names and self.fmt.show_index_names: - nlevels += 1 - return nlevels - - def _compose_env_body(self): - self.clinebuf: List[List[int]] = [] - - header_row_num = self._nlevels if self._is_header_present() else 0 - body_list = [ - self._compose_row(num) - for num, row in enumerate(self.strrows) - if num >= header_row_num - ] - return "\n".join(body_list) - - def _compose_row(self, row_num): - row = self.strrows[row_num] - - is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn - - is_multirow = ( - row_num >= self._nlevels - and self.fmt.index - and self.multirow - and self._ilevels > 1 - ) - - is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 - - crow = self._preprocess_row(row) - - if is_multicol: - crow = self._format_multicolumn(crow) - if is_multirow: - crow = self._format_multirow(crow, row_num) - - lst = [] - lst.append(" & ".join(crow)) - lst.append(" \\\\") - if is_cline_maybe_required: - cline = self._compose_cline(row_num, len(self.strcols)) - lst.append(cline) - return "".join(lst) - def _preprocess_row(self, row): if self.fmt.escape: crow = self._escape_backslashes(row) @@ -382,6 +228,182 @@ def _compose_cline(self, i: int, icol: int) -> str: return "".join(lst) +class RowHeaderIterator(RowCreator): + def __iter__(self): + for row_num, row in enumerate(self.strrows): + if row_num < self._header_row_num: + yield self.get_strrow(row_num, row) + + +class RowBodyIterator(RowCreator): + def __iter__(self): + for row_num, row in enumerate(self.strrows): + if row_num >= self._header_row_num: + yield self.get_strrow(row_num, row) + + +class LatexFormatterAbstract(ABC): + def _compose_string(self) -> str: + elements = [ + self._compose_env_begin(), + self._compose_top_separator(), + self._compose_header(), + self._compose_middle_separator(), + self._compose_env_body(), + self._compose_bottom_separator(), + self._compose_env_end(), + ] + result = "\n".join([item for item in elements if item]) + trailing_newline = "\n" + return result + trailing_newline + + @abstractmethod + def _compose_env_begin(self): + pass + + @abstractmethod + def _compose_top_separator(self): + pass + + @abstractmethod + def _compose_header(self): + pass + + @abstractmethod + def _compose_middle_separator(self): + pass + + @abstractmethod + def _compose_env_body(self): + pass + + @abstractmethod + def _compose_bottom_separator(self): + pass + + @abstractmethod + def _compose_env_end(self): + pass + + +class LatexFormatter(TableFormatter, LatexFormatterAbstract): + """ + Used to render a DataFrame to a LaTeX tabular/longtable environment output. + + Parameters + ---------- + formatter : `DataFrameFormatter` + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns + + See Also + -------- + HTMLFormatter + """ + + def __init__( + self, + formatter: DataFrameFormatter, + column_format: Optional[str] = None, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, + position: Optional[str] = None, + ): + self.fmt = formatter + self.frame = self.fmt.frame + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.label = label + self.position = position + + def write_result(self, buf: IO[str]) -> None: + """ + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. + """ + buf.write(self._compose_string()) + + @property + def column_format(self): + return self._column_format + + @column_format.setter + def column_format(self, input_column_format): + if input_column_format is None: + self._column_format = ( + self._get_index_format() + self._get_column_format_based_on_dtypes() + ) + elif not isinstance(input_column_format, str): # pragma: no cover + raise AssertionError( + f"column_format must be str or unicode, " + f"not {type(input_column_format)}" + ) + else: + self._column_format = input_column_format + + def _get_column_format_based_on_dtypes(self): + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + return "l" + + dtypes = self.frame.dtypes._values + return "".join(map(get_col_type, dtypes)) + + def _get_index_format(self): + if self.fmt.index: + return "l" * self.frame.index.nlevels + return "" + + @property + def position_macro(self): + return f"[{self.position}]" if self.position else "" + + @property + def caption_macro(self): + return f"\\caption{{{self.caption}}}" if self.caption else "" + + @property + def label_macro(self): + return f"\\label{{{self.label}}}" if self.label else "" + + def _create_row_iterator(self, over): + kwargs = dict( + fmt=self.fmt, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + ) + if over == "header": + return RowHeaderIterator(**kwargs) + elif over == "body": + return RowBodyIterator(**kwargs) + + def _compose_header(self): + iterator = self._create_row_iterator(over="header") + return "\n".join(list(iterator)) + + def _compose_top_separator(self): + return "\\toprule" + + def _compose_middle_separator(self): + return "\\midrule" if self._is_separator_required() else "" + + def _compose_env_body(self): + iterator = self._create_row_iterator(over="body") + return "\n".join(list(iterator)) + + def _is_separator_required(self): + return self._compose_header() and self._compose_env_body() + + class LatexTableFormatter(LatexFormatter): def _compose_env_begin(self): elements = [ @@ -430,11 +452,12 @@ def _caption_and_label(self): return caption_and_label def _compose_middle_separator(self): + iterator = self._create_row_iterator(over="header") elements = [ "\\midrule", "\\endhead", "\\midrule", - f"\\multicolumn{{{len(self.strcols)}}}{{r}}" + f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}" "{{Continued on next page}} \\\\", "\\midrule", "\\endfoot\n", From 83fae7d3993aac43b609f8bc02b37859b6584d48 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 16:06:24 +0700 Subject: [PATCH 04/22] Use property for table components --- pandas/io/formats/latex.py | 79 ++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index a3cacda4a74e5..97da95fea3f29 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -245,44 +245,51 @@ def __iter__(self): class LatexFormatterAbstract(ABC): def _compose_string(self) -> str: elements = [ - self._compose_env_begin(), - self._compose_top_separator(), - self._compose_header(), - self._compose_middle_separator(), - self._compose_env_body(), - self._compose_bottom_separator(), - self._compose_env_end(), + self.env_begin, + self.top_separator, + self.header, + self.middle_separator, + self.env_body, + self.bottom_separator, + self.env_end, ] result = "\n".join([item for item in elements if item]) trailing_newline = "\n" return result + trailing_newline + @property @abstractmethod - def _compose_env_begin(self): + def env_begin(self): pass + @property @abstractmethod - def _compose_top_separator(self): + def top_separator(self): pass + @property @abstractmethod - def _compose_header(self): + def header(self): pass + @property @abstractmethod - def _compose_middle_separator(self): + def middle_separator(self): pass + @property @abstractmethod - def _compose_env_body(self): + def env_body(self): pass + @property @abstractmethod - def _compose_bottom_separator(self): + def bottom_separator(self): pass + @property @abstractmethod - def _compose_env_end(self): + def env_end(self): pass @@ -386,26 +393,31 @@ def _create_row_iterator(self, over): elif over == "body": return RowBodyIterator(**kwargs) - def _compose_header(self): + @property + def header(self): iterator = self._create_row_iterator(over="header") return "\n".join(list(iterator)) - def _compose_top_separator(self): + @property + def top_separator(self): return "\\toprule" - def _compose_middle_separator(self): + @property + def middle_separator(self): return "\\midrule" if self._is_separator_required() else "" - def _compose_env_body(self): + @property + def env_body(self): iterator = self._create_row_iterator(over="body") return "\n".join(list(iterator)) def _is_separator_required(self): - return self._compose_header() and self._compose_env_body() + return self.header and self.env_body class LatexTableFormatter(LatexFormatter): - def _compose_env_begin(self): + @property + def env_begin(self): elements = [ f"\\begin{{table}}{self.position_macro}", f"\\centering", @@ -415,26 +427,32 @@ def _compose_env_begin(self): ] return "\n".join([item for item in elements if item]) - def _compose_bottom_separator(self): + @property + def bottom_separator(self): return "\\bottomrule" - def _compose_env_end(self): + @property + def env_end(self): return "\n".join(["\\end{tabular}", "\\end{table}"]) class LatexTabularFormatter(LatexFormatter): - def _compose_env_begin(self): + @property + def env_begin(self): return f"\\begin{{tabular}}{{{self.column_format}}}" - def _compose_bottom_separator(self): + @property + def bottom_separator(self): return "\\bottomrule" - def _compose_env_end(self): + @property + def env_end(self): return "\\end{tabular}" class LatexLongTableFormatter(LatexFormatter): - def _compose_env_begin(self): + @property + def env_begin(self): first_row = ( f"\\begin{{longtable}}{self.position_macro}" f"{{{self.column_format}}}" ) @@ -451,7 +469,8 @@ def _caption_and_label(self): caption_and_label += double_backslash return caption_and_label - def _compose_middle_separator(self): + @property + def middle_separator(self): iterator = self._create_row_iterator(over="header") elements = [ "\\midrule", @@ -468,8 +487,10 @@ def _compose_middle_separator(self): return "\n".join(elements) return "" - def _compose_bottom_separator(self): + @property + def bottom_separator(self): return "" - def _compose_env_end(self): + @property + def env_end(self): return "\\end{longtable}" From 286f600eba290e35d946ba0cf21da460c48ce3f7 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 21:06:30 +0700 Subject: [PATCH 05/22] Enable builder pattern for various types of tables This reverts ``pandas/io/formats/format.py`` to its nearly original state and enables polymorphism under the hood inside ``pandas/io/formats/latex.py``. Add ``pandas/tests/io/formats/test_latex.py`` to test lower-level functions/classes declared in ``pandas/io/formats/latex.py`` --- pandas/io/formats/format.py | 23 +- pandas/io/formats/latex.py | 298 +++++++++++++++----------- pandas/tests/io/formats/test_latex.py | 94 ++++++++ 3 files changed, 272 insertions(+), 143 deletions(-) create mode 100644 pandas/tests/io/formats/test_latex.py diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 7b200869fcfb4..1a1b126c32b48 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -936,7 +936,10 @@ def to_latex( """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ - latex_formatter = self._create_latex_formatter( + from pandas.io.formats.latex import LatexFormatter + + latex_formatter = LatexFormatter( + self, longtable=longtable, column_format=column_format, multicolumn=multicolumn, @@ -948,24 +951,6 @@ def to_latex( ) return latex_formatter.get_result(buf=buf, encoding=encoding) - def _create_latex_formatter(self, **kwargs): - """Create concrete instance of LatexFormatter.""" - from pandas.io.formats.latex import ( - LatexLongTableFormatter, - LatexTableFormatter, - LatexTabularFormatter, - ) - - is_longtable = kwargs.pop("longtable") - is_table = any( - [kwargs.get("caption"), kwargs.get("label"), kwargs.get("position")] - ) - if is_longtable: - return LatexLongTableFormatter(self, **kwargs) - if is_table: - return LatexTableFormatter(self, **kwargs) - return LatexTabularFormatter(self, **kwargs) - def _format_col(self, i: int) -> List[str]: frame = self.tr_frame formatter = self._get_formatter(i) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 97da95fea3f29..38b36dda95783 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -11,9 +11,11 @@ from pandas.io.formats.format import DataFrameFormatter, TableFormatter -class RowCreator: - def __init__(self, *, fmt, multicolumn, multicolumn_format, multirow): - self.fmt = fmt +class RowStringConverter: + def __init__( + self, formatter, multicolumn=False, multicolumn_format=None, multirow=False, + ): + self.fmt = formatter self.frame = self.fmt.frame self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format @@ -22,8 +24,10 @@ def __init__(self, *, fmt, multicolumn, multicolumn_format, multirow): self.strcols = self._get_strcols() self.strrows = list(zip(*self.strcols)) - def get_strrow(self, row_num, row): + def get_strrow(self, row_num): """Get string representation of the row.""" + row = self.strrows[row_num] + is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn is_multirow = ( @@ -116,7 +120,7 @@ def pad_empties(x): def _preprocess_row(self, row): if self.fmt.escape: - crow = self._escape_backslashes(row) + crow = self._escape_symbols(row) else: crow = [x if x else "{}" for x in row] if self.fmt.bold_rows and self.fmt.index: @@ -124,7 +128,7 @@ def _preprocess_row(self, row): return crow @staticmethod - def _escape_backslashes(row): + def _escape_symbols(row): return [ ( x.replace("\\", "\\textbackslash ") @@ -228,22 +232,23 @@ def _compose_cline(self, i: int, icol: int) -> str: return "".join(lst) -class RowHeaderIterator(RowCreator): +class RowHeaderIterator(RowStringConverter): def __iter__(self): - for row_num, row in enumerate(self.strrows): + for row_num in range(len(self.strrows)): if row_num < self._header_row_num: - yield self.get_strrow(row_num, row) + yield self.get_strrow(row_num) -class RowBodyIterator(RowCreator): +class RowBodyIterator(RowStringConverter): def __iter__(self): - for row_num, row in enumerate(self.strrows): + for row_num in range(len(self.strrows)): if row_num >= self._header_row_num: - yield self.get_strrow(row_num, row) + yield self.get_strrow(row_num) -class LatexFormatterAbstract(ABC): - def _compose_string(self) -> str: +class TableBuilderAbstract(ABC): + @property + def product(self) -> str: elements = [ self.env_begin, self.top_separator, @@ -255,7 +260,8 @@ def _compose_string(self) -> str: ] result = "\n".join([item for item in elements if item]) trailing_newline = "\n" - return result + trailing_newline + result += trailing_newline + return result @property @abstractmethod @@ -293,22 +299,7 @@ def env_end(self): pass -class LatexFormatter(TableFormatter, LatexFormatterAbstract): - """ - Used to render a DataFrame to a LaTeX tabular/longtable environment output. - - Parameters - ---------- - formatter : `DataFrameFormatter` - column_format : str, default None - The columns format as specified in `LaTeX table format - `__ e.g 'rcl' for 3 columns - - See Also - -------- - HTMLFormatter - """ - +class TableBuilder(TableBuilderAbstract): def __init__( self, formatter: DataFrameFormatter, @@ -321,7 +312,6 @@ def __init__( position: Optional[str] = None, ): self.fmt = formatter - self.frame = self.fmt.frame self.column_format = column_format self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format @@ -330,60 +320,42 @@ def __init__( self.label = label self.position = position - def write_result(self, buf: IO[str]) -> None: - """ - Render a DataFrame to a LaTeX tabular, longtable, or table/tabular - environment output. - """ - buf.write(self._compose_string()) - @property - def column_format(self): - return self._column_format + def header(self): + iterator = self._create_row_iterator(over="header") + return "\n".join(list(iterator)) - @column_format.setter - def column_format(self, input_column_format): - if input_column_format is None: - self._column_format = ( - self._get_index_format() + self._get_column_format_based_on_dtypes() - ) - elif not isinstance(input_column_format, str): # pragma: no cover - raise AssertionError( - f"column_format must be str or unicode, " - f"not {type(input_column_format)}" - ) - else: - self._column_format = input_column_format + @property + def top_separator(self): + return "\\toprule" - def _get_column_format_based_on_dtypes(self): - def get_col_type(dtype): - if issubclass(dtype.type, np.number): - return "r" - return "l" + @property + def middle_separator(self): + return "\\midrule" if self._is_separator_required() else "" - dtypes = self.frame.dtypes._values - return "".join(map(get_col_type, dtypes)) + @property + def env_body(self): + iterator = self._create_row_iterator(over="body") + return "\n".join(list(iterator)) - def _get_index_format(self): - if self.fmt.index: - return "l" * self.frame.index.nlevels - return "" + def _is_separator_required(self): + return self.header and self.env_body @property - def position_macro(self): + def _position_macro(self): return f"[{self.position}]" if self.position else "" @property - def caption_macro(self): + def _caption_macro(self): return f"\\caption{{{self.caption}}}" if self.caption else "" @property - def label_macro(self): + def _label_macro(self): return f"\\label{{{self.label}}}" if self.label else "" def _create_row_iterator(self, over): kwargs = dict( - fmt=self.fmt, + formatter=self.fmt, multicolumn=self.multicolumn, multicolumn_format=self.multicolumn_format, multirow=self.multirow, @@ -392,37 +364,65 @@ def _create_row_iterator(self, over): return RowHeaderIterator(**kwargs) elif over == "body": return RowBodyIterator(**kwargs) + else: + msg = f"'over' must be either 'header' or 'body', but {over} was provided" + raise ValueError(msg) - @property - def header(self): - iterator = self._create_row_iterator(over="header") - return "\n".join(list(iterator)) +class LongTableBuilder(TableBuilder): @property - def top_separator(self): - return "\\toprule" + def env_begin(self): + first_row = ( + f"\\begin{{longtable}}{self._position_macro}" f"{{{self.column_format}}}" + ) + elements = [first_row, f"{self._caption_and_label()}"] + return "\n".join([item for item in elements if item]) + + def _caption_and_label(self): + if not self.caption and not self.label: + return "" + elif self.caption or self.label: + double_backslash = "\\\\" + elements = [f"{self._caption_macro}", f"{self._label_macro}"] + caption_and_label = "\n".join([item for item in elements if item]) + caption_and_label += double_backslash + return caption_and_label @property def middle_separator(self): - return "\\midrule" if self._is_separator_required() else "" + iterator = self._create_row_iterator(over="header") + elements = [ + "\\midrule", + "\\endhead", + "\\midrule", + f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}" + "{{Continued on next page}} \\\\", + "\\midrule", + "\\endfoot\n", + "\\bottomrule", + "\\endlastfoot", + ] + if self._is_separator_required(): + return "\n".join(elements) + return "" @property - def env_body(self): - iterator = self._create_row_iterator(over="body") - return "\n".join(list(iterator)) + def bottom_separator(self): + return "" - def _is_separator_required(self): - return self.header and self.env_body + @property + def env_end(self): + return "\\end{longtable}" -class LatexTableFormatter(LatexFormatter): +class RegularTableBuilder(TableBuilder): @property def env_begin(self): elements = [ - f"\\begin{{table}}{self.position_macro}", + f"\\begin{{table}}{self._position_macro}", f"\\centering", - f"{self.caption_macro}", - f"{self.label_macro}", + f"{self._caption_macro}", + f"{self._label_macro}", f"\\begin{{tabular}}{{{self.column_format}}}", ] return "\n".join([item for item in elements if item]) @@ -436,7 +436,7 @@ def env_end(self): return "\n".join(["\\end{tabular}", "\\end{table}"]) -class LatexTabularFormatter(LatexFormatter): +class TabularBuilder(TableBuilder): @property def env_begin(self): return f"\\begin{{tabular}}{{{self.column_format}}}" @@ -450,47 +450,97 @@ def env_end(self): return "\\end{tabular}" -class LatexLongTableFormatter(LatexFormatter): - @property - def env_begin(self): - first_row = ( - f"\\begin{{longtable}}{self.position_macro}" f"{{{self.column_format}}}" - ) - elements = [first_row, f"{self._caption_and_label()}"] - return "\n".join([item for item in elements if item]) +class LatexFormatter(TableFormatter): + """ + Used to render a DataFrame to a LaTeX tabular/longtable environment output. - def _caption_and_label(self): - if not self.caption and not self.label: - return "" - elif self.caption or self.label: - double_backslash = "\\\\" - elements = [f"{self.caption_macro}", f"{self.label_macro}"] - caption_and_label = "\n".join([item for item in elements if item]) - caption_and_label += double_backslash - return caption_and_label + Parameters + ---------- + formatter : `DataFrameFormatter` + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns - @property - def middle_separator(self): - iterator = self._create_row_iterator(over="header") - elements = [ - "\\midrule", - "\\endhead", - "\\midrule", - f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}" - "{{Continued on next page}} \\\\", - "\\midrule", - "\\endfoot\n", - "\\bottomrule", - "\\endlastfoot", - ] - if self._is_separator_required(): - return "\n".join(elements) - return "" + See Also + -------- + HTMLFormatter + """ + + def __init__( + self, + formatter: DataFrameFormatter, + longtable: bool = False, + column_format: Optional[str] = None, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, + position: Optional[str] = None, + ): + self.fmt = formatter + self.frame = self.fmt.frame + self.longtable = longtable + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.label = label + self.position = position + + def write_result(self, buf: IO[str]) -> None: + """ + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. + """ + table_string = self.builder.product + buf.write(table_string) @property - def bottom_separator(self): - return "" + def builder(self): + kwargs = dict( + formatter=self.fmt, + column_format=self.column_format, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + caption=self.caption, + label=self.label, + position=self.position, + ) + if self.longtable: + return LongTableBuilder(**kwargs) + if any([self.caption, self.label, self.position]): + return RegularTableBuilder(**kwargs) + return TabularBuilder(**kwargs) @property - def env_end(self): - return "\\end{longtable}" + def column_format(self): + return self._column_format + + @column_format.setter + def column_format(self, input_column_format): + if input_column_format is None: + self._column_format = ( + self._get_index_format() + self._get_column_format_based_on_dtypes() + ) + elif not isinstance(input_column_format, str): # pragma: no cover + raise AssertionError( + f"column_format must be str or unicode, " + f"not {type(input_column_format)}" + ) + else: + self._column_format = input_column_format + + def _get_column_format_based_on_dtypes(self): + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + return "l" + + dtypes = self.frame.dtypes._values + return "".join(map(get_col_type, dtypes)) + + def _get_index_format(self): + return "l" * self.frame.index.nlevels if self.fmt.index else "" diff --git a/pandas/tests/io/formats/test_latex.py b/pandas/tests/io/formats/test_latex.py new file mode 100644 index 0000000000000..7b629562ff1be --- /dev/null +++ b/pandas/tests/io/formats/test_latex.py @@ -0,0 +1,94 @@ +import pytest + +from pandas import DataFrame +from pandas.io.formats.format import DataFrameFormatter +from pandas.io.formats.latex import ( + RegularTableBuilder, + RowHeaderIterator, + RowBodyIterator, + RowStringConverter, +) + + +class TestTableBuilder: + @pytest.fixture + def dataframe(self): + return DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + @pytest.fixture + def table_builder(self, dataframe): + return RegularTableBuilder(formatter=DataFrameFormatter(dataframe)) + + def test_create_row_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="header") + assert isinstance(iterator, RowHeaderIterator) + + def test_create_body_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="body") + assert isinstance(iterator, RowBodyIterator) + + def test_create_body_wrong_kwarg_raises(self, table_builder): + with pytest.raises(ValueError, match="must be either 'header' or 'body'"): + table_builder._create_row_iterator(over="SOMETHING BAD") + + +class TestRowStringConverter: + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design & ratio & xy \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_without_escape(self, row_num, expected): + df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]}) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design \# & ratio, \% & x\&y \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_with_escape(self, row_num, expected): + df = DataFrame( + {r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]} + ) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"), + (1, r"{} & 0 & 1 & 0 & 1 & 0 \\"), + (2, r"0 & 0 & 5 & 0 & 5 & 0 \\"), + ], + ) + def test_get_strrow_multindex_multicolumn(self, row_num, expected): + df = DataFrame( + { + ("c1", 0): {x: x for x in range(5)}, + ("c1", 1): {x: x + 5 for x in range(5)}, + ("c2", 0): {x: x for x in range(5)}, + ("c2", 1): {x: x + 5 for x in range(5)}, + ("c3", 0): {x: x for x in range(5)}, + } + ) + + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df), + multicolumn=True, + multicolumn_format="r", + multirow=True, + ) + + assert row_string_converter.get_strrow(row_num=row_num) == expected From f60159818bb99b87e9c0d34652d8ae71822614de Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 15:04:59 +0000 Subject: [PATCH 06/22] Fix linting --- pandas/io/formats/latex.py | 4 ++-- pandas/tests/io/formats/test_latex.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 38b36dda95783..dd3be20fc8e9b 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -373,7 +373,7 @@ class LongTableBuilder(TableBuilder): @property def env_begin(self): first_row = ( - f"\\begin{{longtable}}{self._position_macro}" f"{{{self.column_format}}}" + f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}" ) elements = [first_row, f"{self._caption_and_label()}"] return "\n".join([item for item in elements if item]) @@ -420,7 +420,7 @@ class RegularTableBuilder(TableBuilder): def env_begin(self): elements = [ f"\\begin{{table}}{self._position_macro}", - f"\\centering", + "\\centering", f"{self._caption_macro}", f"{self._label_macro}", f"\\begin{{tabular}}{{{self.column_format}}}", diff --git a/pandas/tests/io/formats/test_latex.py b/pandas/tests/io/formats/test_latex.py index 7b629562ff1be..7ada6e229cbb8 100644 --- a/pandas/tests/io/formats/test_latex.py +++ b/pandas/tests/io/formats/test_latex.py @@ -1,11 +1,12 @@ import pytest from pandas import DataFrame + from pandas.io.formats.format import DataFrameFormatter from pandas.io.formats.latex import ( RegularTableBuilder, - RowHeaderIterator, RowBodyIterator, + RowHeaderIterator, RowStringConverter, ) From 3d5d658253276a48e183303de0e474d9a522f539 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 24 Aug 2020 15:25:27 +0000 Subject: [PATCH 07/22] REF: simplify logic in _caption_and_label --- pandas/io/formats/latex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index dd3be20fc8e9b..9f3e55c61538c 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -379,14 +379,14 @@ def env_begin(self): return "\n".join([item for item in elements if item]) def _caption_and_label(self): - if not self.caption and not self.label: - return "" - elif self.caption or self.label: + if self.caption or self.label: double_backslash = "\\\\" elements = [f"{self._caption_macro}", f"{self._label_macro}"] caption_and_label = "\n".join([item for item in elements if item]) caption_and_label += double_backslash return caption_and_label + else: + return "" @property def middle_separator(self): From 7e18adc2a5b00556c9ad29051080a044366d9728 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 25 Aug 2020 22:10:09 +0000 Subject: [PATCH 08/22] Add docstrings --- pandas/io/formats/latex.py | 113 ++++++++++++++++++++++++++++++++++--- 1 file changed, 105 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 9f3e55c61538c..1d1340d570b3a 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -12,6 +12,21 @@ class RowStringConverter: + r"""Converter for dataframe rows into LaTeX strings. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + multicolumn: bool, optional + Whether to use \multicolumn macro. + multicolumn_format: str, optional + Multicolumn format. + multirow: bool, optional + Whether to use \multirow macro. + + """ + def __init__( self, formatter, multicolumn=False, multicolumn_format=None, multirow=False, ): @@ -56,10 +71,12 @@ def get_strrow(self, row_num): @property def _header_row_num(self): + """Number of rows in header.""" return self._nlevels if self.fmt.header else 0 @property def _ilevels(self): + """Integer number of levels in index.""" return self.frame.index.nlevels @property @@ -119,6 +136,7 @@ def pad_empties(x): return strcols def _preprocess_row(self, row): + """Preprocess elements of the row.""" if self.fmt.escape: crow = self._escape_symbols(row) else: @@ -129,6 +147,18 @@ def _preprocess_row(self, row): @staticmethod def _escape_symbols(row): + """Carry out string replacements for special symbols. + + Parameters + ---------- + row : list + List of string, that may contain special symbols. + + Returns + ------- + list + list of strings with the special symbols replaced. + """ return [ ( x.replace("\\", "\\textbackslash ") @@ -149,6 +179,7 @@ def _escape_symbols(row): @staticmethod def _convert_to_bold(crow, ilevels): + """Convert elements in ``crow`` to bold.""" return [ f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x for j, x in enumerate(crow) @@ -221,7 +252,7 @@ def _format_multirow(self, row: List[str], i: int) -> List[str]: def _compose_cline(self, i: int, icol: int) -> str: """ - Print clines after multirow-blocks are finished. + Create clines after multirow-blocks are finished. """ lst = [] for cl in self.clinebuf: @@ -233,6 +264,8 @@ def _compose_cline(self, i: int, icol: int) -> str: class RowHeaderIterator(RowStringConverter): + """Iterator for the table header rows.""" + def __iter__(self): for row_num in range(len(self.strrows)): if row_num < self._header_row_num: @@ -240,6 +273,8 @@ def __iter__(self): class RowBodyIterator(RowStringConverter): + """Iterator for the table body rows.""" + def __iter__(self): for row_num in range(len(self.strrows)): if row_num >= self._header_row_num: @@ -247,8 +282,13 @@ def __iter__(self): class TableBuilderAbstract(ABC): + """ + Abstract table builder producing string representation of LaTeX table. + """ + @property def product(self) -> str: + """String representation of LaTeX table.""" elements = [ self.env_begin, self.top_separator, @@ -266,40 +306,62 @@ def product(self) -> str: @property @abstractmethod def env_begin(self): - pass + """Beginning of the environment.""" @property @abstractmethod def top_separator(self): - pass + """Top level separator.""" @property @abstractmethod def header(self): - pass + """Header lines.""" @property @abstractmethod def middle_separator(self): - pass + """Middle level separator.""" @property @abstractmethod def env_body(self): - pass + """Environment body.""" @property @abstractmethod def bottom_separator(self): - pass + """Bottom level separator.""" @property @abstractmethod def env_end(self): - pass + """End of the environment.""" class TableBuilder(TableBuilderAbstract): + """Table builder producing string representation of LaTeX table. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + column_format: str, optional + Column format, for example, 'rcl' for three columns. + multicolumn: bool, optional + Use multicolumn to enhance MultiIndex columns. + multicolumn_format: str, optional + The alignment for multicolumns, similar to column_format. + multirow: bool, optional + Use multirow to enhance MultiIndex rows. + caption: str, optional + Table caption. + label: str, optional + LaTeX label. + position: str, optional + Float placement specifier, for example, 'htb'. + """ + def __init__( self, formatter: DataFrameFormatter, @@ -343,17 +405,32 @@ def _is_separator_required(self): @property def _position_macro(self): + r"""Position macro, extracted from self.position, like [h].""" return f"[{self.position}]" if self.position else "" @property def _caption_macro(self): + r"""Caption macro, extracted from self.caption, like \caption{cap}.""" return f"\\caption{{{self.caption}}}" if self.caption else "" @property def _label_macro(self): + r"""Label macro, extracted from self.label, like \label{ref}.""" return f"\\label{{{self.label}}}" if self.label else "" def _create_row_iterator(self, over): + """Create iterator over header or body of the table. + + Parameters + ---------- + over : {'body', 'header'} + Over what to iterate. + + Returns + ------- + RowStringConverter + Iterator over body or header. + """ kwargs = dict( formatter=self.fmt, multicolumn=self.multicolumn, @@ -370,6 +447,8 @@ def _create_row_iterator(self, over): class LongTableBuilder(TableBuilder): + """Concrete table builder for longtable.""" + @property def env_begin(self): first_row = ( @@ -416,6 +495,8 @@ def env_end(self): class RegularTableBuilder(TableBuilder): + """Concrete table builder for regular table.""" + @property def env_begin(self): elements = [ @@ -437,6 +518,8 @@ def env_end(self): class TabularBuilder(TableBuilder): + """Concrete table builder for tabular environment.""" + @property def env_begin(self): return f"\\begin{{tabular}}{{{self.column_format}}}" @@ -499,6 +582,12 @@ def write_result(self, buf: IO[str]) -> None: @property def builder(self): + """Concrete table builder. + + Returns + ------- + TableBuilder + """ kwargs = dict( formatter=self.fmt, column_format=self.column_format, @@ -517,10 +606,12 @@ def builder(self): @property def column_format(self): + """Column format.""" return self._column_format @column_format.setter def column_format(self, input_column_format): + """Setter for column format.""" if input_column_format is None: self._column_format = ( self._get_index_format() + self._get_column_format_based_on_dtypes() @@ -534,6 +625,11 @@ def column_format(self, input_column_format): self._column_format = input_column_format def _get_column_format_based_on_dtypes(self): + """Get column format based on data type. + + Right alignment for numbers and left - for strings. + """ + def get_col_type(dtype): if issubclass(dtype.type, np.number): return "r" @@ -543,4 +639,5 @@ def get_col_type(dtype): return "".join(map(get_col_type, dtypes)) def _get_index_format(self): + """Get index column format.""" return "l" * self.frame.index.nlevels if self.fmt.index else "" From d200c708320c4fe5244549959e3acd724f704616 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Wed, 26 Aug 2020 04:40:10 +0000 Subject: [PATCH 09/22] Add type hints to RowStringConverter --- pandas/io/formats/latex.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 1d1340d570b3a..bea6b3fa8f209 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -28,7 +28,11 @@ class RowStringConverter: """ def __init__( - self, formatter, multicolumn=False, multicolumn_format=None, multirow=False, + self, + formatter: DataFrameFormatter, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, ): self.fmt = formatter self.frame = self.fmt.frame @@ -39,7 +43,7 @@ def __init__( self.strcols = self._get_strcols() self.strrows = list(zip(*self.strcols)) - def get_strrow(self, row_num): + def get_strrow(self, row_num: int) -> str: """Get string representation of the row.""" row = self.strrows[row_num] From 47416cff1cef31eafd43f2f9514dae82fdb934e5 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Wed, 26 Aug 2020 04:48:05 +0000 Subject: [PATCH 10/22] Move tests from test_latex to test_to_latex --- pandas/tests/io/formats/test_latex.py | 95 ------------------------ pandas/tests/io/formats/test_to_latex.py | 92 +++++++++++++++++++++++ 2 files changed, 92 insertions(+), 95 deletions(-) delete mode 100644 pandas/tests/io/formats/test_latex.py diff --git a/pandas/tests/io/formats/test_latex.py b/pandas/tests/io/formats/test_latex.py deleted file mode 100644 index 7ada6e229cbb8..0000000000000 --- a/pandas/tests/io/formats/test_latex.py +++ /dev/null @@ -1,95 +0,0 @@ -import pytest - -from pandas import DataFrame - -from pandas.io.formats.format import DataFrameFormatter -from pandas.io.formats.latex import ( - RegularTableBuilder, - RowBodyIterator, - RowHeaderIterator, - RowStringConverter, -) - - -class TestTableBuilder: - @pytest.fixture - def dataframe(self): - return DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - - @pytest.fixture - def table_builder(self, dataframe): - return RegularTableBuilder(formatter=DataFrameFormatter(dataframe)) - - def test_create_row_iterator(self, table_builder): - iterator = table_builder._create_row_iterator(over="header") - assert isinstance(iterator, RowHeaderIterator) - - def test_create_body_iterator(self, table_builder): - iterator = table_builder._create_row_iterator(over="body") - assert isinstance(iterator, RowBodyIterator) - - def test_create_body_wrong_kwarg_raises(self, table_builder): - with pytest.raises(ValueError, match="must be either 'header' or 'body'"): - table_builder._create_row_iterator(over="SOMETHING BAD") - - -class TestRowStringConverter: - @pytest.mark.parametrize( - "row_num, expected", - [ - (0, r"{} & Design & ratio & xy \\"), - (1, r"0 & 1 & 4 & 10 \\"), - (2, r"1 & 2 & 5 & 11 \\"), - ], - ) - def test_get_strrow_normal_without_escape(self, row_num, expected): - df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]}) - row_string_converter = RowStringConverter( - formatter=DataFrameFormatter(df, escape=True), - ) - assert row_string_converter.get_strrow(row_num=row_num) == expected - - @pytest.mark.parametrize( - "row_num, expected", - [ - (0, r"{} & Design \# & ratio, \% & x\&y \\"), - (1, r"0 & 1 & 4 & 10 \\"), - (2, r"1 & 2 & 5 & 11 \\"), - ], - ) - def test_get_strrow_normal_with_escape(self, row_num, expected): - df = DataFrame( - {r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]} - ) - row_string_converter = RowStringConverter( - formatter=DataFrameFormatter(df, escape=True), - ) - assert row_string_converter.get_strrow(row_num=row_num) == expected - - @pytest.mark.parametrize( - "row_num, expected", - [ - (0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"), - (1, r"{} & 0 & 1 & 0 & 1 & 0 \\"), - (2, r"0 & 0 & 5 & 0 & 5 & 0 \\"), - ], - ) - def test_get_strrow_multindex_multicolumn(self, row_num, expected): - df = DataFrame( - { - ("c1", 0): {x: x for x in range(5)}, - ("c1", 1): {x: x + 5 for x in range(5)}, - ("c2", 0): {x: x for x in range(5)}, - ("c2", 1): {x: x + 5 for x in range(5)}, - ("c3", 0): {x: x for x in range(5)}, - } - ) - - row_string_converter = RowStringConverter( - formatter=DataFrameFormatter(df), - multicolumn=True, - multicolumn_format="r", - multirow=True, - ) - - assert row_string_converter.get_strrow(row_num=row_num) == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 96a9ed2b86cf4..85c7750dbcadc 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -7,6 +7,14 @@ from pandas import DataFrame, Series import pandas._testing as tm +from pandas.io.formats.format import DataFrameFormatter +from pandas.io.formats.latex import ( + RegularTableBuilder, + RowBodyIterator, + RowHeaderIterator, + RowStringConverter, +) + class TestToLatex: def test_to_latex_filename(self, float_frame): @@ -930,3 +938,87 @@ def test_to_latex_multindex_header(self): \end{tabular} """ assert observed == expected + + +class TestTableBuilder: + @pytest.fixture + def dataframe(self): + return DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + @pytest.fixture + def table_builder(self, dataframe): + return RegularTableBuilder(formatter=DataFrameFormatter(dataframe)) + + def test_create_row_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="header") + assert isinstance(iterator, RowHeaderIterator) + + def test_create_body_iterator(self, table_builder): + iterator = table_builder._create_row_iterator(over="body") + assert isinstance(iterator, RowBodyIterator) + + def test_create_body_wrong_kwarg_raises(self, table_builder): + with pytest.raises(ValueError, match="must be either 'header' or 'body'"): + table_builder._create_row_iterator(over="SOMETHING BAD") + + +class TestRowStringConverter: + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design & ratio & xy \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_without_escape(self, row_num, expected): + df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]}) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & Design \# & ratio, \% & x\&y \\"), + (1, r"0 & 1 & 4 & 10 \\"), + (2, r"1 & 2 & 5 & 11 \\"), + ], + ) + def test_get_strrow_normal_with_escape(self, row_num, expected): + df = DataFrame( + {r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]} + ) + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df, escape=True), + ) + assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize( + "row_num, expected", + [ + (0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"), + (1, r"{} & 0 & 1 & 0 & 1 & 0 \\"), + (2, r"0 & 0 & 5 & 0 & 5 & 0 \\"), + ], + ) + def test_get_strrow_multindex_multicolumn(self, row_num, expected): + df = DataFrame( + { + ("c1", 0): {x: x for x in range(5)}, + ("c1", 1): {x: x + 5 for x in range(5)}, + ("c2", 0): {x: x for x in range(5)}, + ("c2", 1): {x: x + 5 for x in range(5)}, + ("c3", 0): {x: x for x in range(5)}, + } + ) + + row_string_converter = RowStringConverter( + formatter=DataFrameFormatter(df), + multicolumn=True, + multicolumn_format="r", + multirow=True, + ) + + assert row_string_converter.get_strrow(row_num=row_num) == expected From 2aa05cc0afc9b8448aaf0491267b3a69b74d3f45 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 12:11:27 +0700 Subject: [PATCH 11/22] Rename _ilevels, _nlevels, _clevels as reviewed --- pandas/io/formats/latex.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index bea6b3fa8f209..28c690358781c 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -47,13 +47,17 @@ def get_strrow(self, row_num: int) -> str: """Get string representation of the row.""" row = self.strrows[row_num] - is_multicol = row_num < self._clevels and self.fmt.header and self.multicolumn + is_multicol = ( + row_num < self.index_clevels + and self.fmt.header + and self.multicolumn + ) is_multirow = ( - row_num >= self._nlevels + row_num >= self.index_nlevels and self.fmt.index and self.multirow - and self._ilevels > 1 + and self.index_ilevels > 1 ) is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 @@ -74,22 +78,22 @@ def get_strrow(self, row_num: int) -> str: return "".join(lst) @property - def _header_row_num(self): + def _header_row_num(self) -> int: """Number of rows in header.""" - return self._nlevels if self.fmt.header else 0 + return self.index_nlevels if self.fmt.header else 0 @property - def _ilevels(self): + def index_ilevels(self) -> int: """Integer number of levels in index.""" return self.frame.index.nlevels @property - def _clevels(self): + def index_clevels(self) -> int: return self.frame.columns.nlevels @property - def _nlevels(self): - nlevels = self._clevels + def index_nlevels(self) -> int: + nlevels = self.index_clevels if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 return nlevels @@ -146,7 +150,7 @@ def _preprocess_row(self, row): else: crow = [x if x else "{}" for x in row] if self.fmt.bold_rows and self.fmt.index: - crow = self._convert_to_bold(crow, self._ilevels) + crow = _convert_to_bold(crow, self.index_ilevels) return crow @staticmethod @@ -199,7 +203,7 @@ def _format_multicolumn(self, row: List[str]) -> List[str]: will become \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} """ - row2 = list(row[: self._ilevels]) + row2 = list(row[: self.index_ilevels]) ncol = 1 coltext = "" @@ -214,7 +218,7 @@ def append_col(): else: row2.append(coltext) - for c in row[self._ilevels :]: + for c in row[self.index_ilevels :]: # if next col has text, write the previous if c.strip(): if coltext: @@ -239,7 +243,7 @@ def _format_multirow(self, row: List[str], i: int) -> List[str]: b & 0 & \cline{1-2} b & 0 & """ - for j in range(self._ilevels): + for j in range(self.index_ilevels): if row[j].strip(): nrow = 1 for r in self.strrows[i + 1 :]: From b06846c52afe53f13b0e4d322096ff405451cbe1 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 12:12:44 +0700 Subject: [PATCH 12/22] Move static methods to module level funcs --- pandas/io/formats/latex.py | 80 +++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 28c690358781c..3b2bc0e4d9da7 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -153,46 +153,6 @@ def _preprocess_row(self, row): crow = _convert_to_bold(crow, self.index_ilevels) return crow - @staticmethod - def _escape_symbols(row): - """Carry out string replacements for special symbols. - - Parameters - ---------- - row : list - List of string, that may contain special symbols. - - Returns - ------- - list - list of strings with the special symbols replaced. - """ - return [ - ( - x.replace("\\", "\\textbackslash ") - .replace("_", "\\_") - .replace("%", "\\%") - .replace("$", "\\$") - .replace("#", "\\#") - .replace("{", "\\{") - .replace("}", "\\}") - .replace("~", "\\textasciitilde ") - .replace("^", "\\textasciicircum ") - .replace("&", "\\&") - if (x and x != "{}") - else "{}" - ) - for x in row - ] - - @staticmethod - def _convert_to_bold(crow, ilevels): - """Convert elements in ``crow`` to bold.""" - return [ - f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x - for j, x in enumerate(crow) - ] - def _format_multicolumn(self, row: List[str]) -> List[str]: r""" Combine columns belonging to a group to a single multicolumn entry @@ -649,3 +609,43 @@ def get_col_type(dtype): def _get_index_format(self): """Get index column format.""" return "l" * self.frame.index.nlevels if self.fmt.index else "" + + +def _escape_symbols(row: List[str]) -> List[str]: + """Carry out string replacements for special symbols. + + Parameters + ---------- + row : list + List of string, that may contain special symbols. + + Returns + ------- + list + list of strings with the special symbols replaced. + """ + return [ + ( + x.replace("\\", "\\textbackslash ") + .replace("_", "\\_") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~", "\\textasciitilde ") + .replace("^", "\\textasciicircum ") + .replace("&", "\\&") + if (x and x != "{}") + else "{}" + ) + for x in row + ] + + +def _convert_to_bold(crow: List[str], ilevels: int) -> List[str]: + """Convert elements in ``crow`` to bold.""" + return [ + f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x + for j, x in enumerate(crow) + ] From a753ed59967a81942099bf93e410df25e01beb99 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 12:13:23 +0700 Subject: [PATCH 13/22] Add test for incorrect column_format --- pandas/io/formats/latex.py | 2 +- pandas/tests/io/formats/test_to_latex.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 3b2bc0e4d9da7..5eecd3a2b91ba 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -585,7 +585,7 @@ def column_format(self, input_column_format): self._get_index_format() + self._get_column_format_based_on_dtypes() ) elif not isinstance(input_column_format, str): # pragma: no cover - raise AssertionError( + raise ValueError( f"column_format must be str or unicode, " f"not {type(input_column_format)}" ) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 85c7750dbcadc..a2cb8f52dfd5b 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -68,6 +68,16 @@ def test_to_latex(self, float_frame): assert withoutindex_result == withoutindex_expected + @pytest.mark.parametrize( + "bad_column_format", + [5, 1.2, ["l", "r"], ("r", "c"), {"r", "c", "l"}, dict(a="r", b="l")], + ) + def test_to_latex_bad_column_format(self, bad_column_format): + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + msg = r"column_format must be str or unicode" + with pytest.raises(ValueError, match=msg): + df.to_latex(column_format=bad_column_format) + def test_to_latex_format(self, float_frame): # GH Bug #9402 float_frame.to_latex(column_format="ccc") From 84b29ad57adfa30cbaba79290384ff606003f602 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 14:26:24 +0700 Subject: [PATCH 14/22] Add type annotations Problems -------- Could not figure out how to deal with the following. 1. Property setters 2. Inability of mypy to figure out that list can get iterator as argument rather than iterable only. pandas/io/formats/latex.py:44: error: Argument 1 to "list" has incompatible type "Iterator[Tuple[Any, ...]]"; expected "Iterable[List[str]]" [arg-type] pandas/io/formats/latex.py:544: error: Incompatible types in assignment (expression has type "Optional[str]", variable has type "str") [assignment] --- pandas/io/formats/latex.py | 203 ++++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 95 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 5eecd3a2b91ba..8541a083d33a6 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -2,7 +2,7 @@ Module for formatting output data in Latex. """ from abc import ABC, abstractmethod -from typing import IO, List, Optional +from typing import IO, Iterator, List, Optional, Type import numpy as np @@ -11,7 +11,7 @@ from pandas.io.formats.format import DataFrameFormatter, TableFormatter -class RowStringConverter: +class RowStringConverter(ABC): r"""Converter for dataframe rows into LaTeX strings. Parameters @@ -41,16 +41,14 @@ def __init__( self.multirow = multirow self.clinebuf: List[List[int]] = [] self.strcols = self._get_strcols() - self.strrows = list(zip(*self.strcols)) + self.strrows: List[List[str]] = list(zip(*self.strcols)) def get_strrow(self, row_num: int) -> str: """Get string representation of the row.""" row = self.strrows[row_num] is_multicol = ( - row_num < self.index_clevels - and self.fmt.header - and self.multicolumn + row_num < self.index_clevels and self.fmt.header and self.multicolumn ) is_multirow = ( @@ -98,7 +96,7 @@ def index_nlevels(self) -> int: nlevels += 1 return nlevels - def _get_strcols(self): + def _get_strcols(self) -> List[List[str]]: """String representation of the columns.""" if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = ( @@ -143,10 +141,10 @@ def pad_empties(x): strcols = out + strcols[1:] return strcols - def _preprocess_row(self, row): + def _preprocess_row(self, row: List[str]) -> List[str]: """Preprocess elements of the row.""" if self.fmt.escape: - crow = self._escape_symbols(row) + crow = _escape_symbols(row) else: crow = [x if x else "{}" for x in row] if self.fmt.bold_rows and self.fmt.index: @@ -231,19 +229,27 @@ def _compose_cline(self, i: int, icol: int) -> str: return "".join(lst) -class RowHeaderIterator(RowStringConverter): +class RowStringIterator(RowStringConverter): + """Iterator over rows of the header or the body of the table.""" + + @abstractmethod + def __iter__(self) -> Iterator[str]: + """Iterate over LaTeX string representations of rows.""" + + +class RowHeaderIterator(RowStringIterator): """Iterator for the table header rows.""" - def __iter__(self): + def __iter__(self) -> Iterator[str]: for row_num in range(len(self.strrows)): if row_num < self._header_row_num: yield self.get_strrow(row_num) -class RowBodyIterator(RowStringConverter): +class RowBodyIterator(RowStringIterator): """Iterator for the table body rows.""" - def __iter__(self): + def __iter__(self) -> Iterator[str]: for row_num in range(len(self.strrows)): if row_num >= self._header_row_num: yield self.get_strrow(row_num) @@ -252,8 +258,47 @@ def __iter__(self): class TableBuilderAbstract(ABC): """ Abstract table builder producing string representation of LaTeX table. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + column_format: str, optional + Column format, for example, 'rcl' for three columns. + multicolumn: bool, optional + Use multicolumn to enhance MultiIndex columns. + multicolumn_format: str, optional + The alignment for multicolumns, similar to column_format. + multirow: bool, optional + Use multirow to enhance MultiIndex rows. + caption: str, optional + Table caption. + label: str, optional + LaTeX label. + position: str, optional + Float placement specifier, for example, 'htb'. """ + def __init__( + self, + formatter: DataFrameFormatter, + column_format: Optional[str] = None, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, + position: Optional[str] = None, + ): + self.fmt = formatter + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.label = label + self.position = position + @property def product(self) -> str: """String representation of LaTeX table.""" @@ -273,120 +318,80 @@ def product(self) -> str: @property @abstractmethod - def env_begin(self): + def env_begin(self) -> str: """Beginning of the environment.""" @property @abstractmethod - def top_separator(self): + def top_separator(self) -> str: """Top level separator.""" @property @abstractmethod - def header(self): + def header(self) -> str: """Header lines.""" @property @abstractmethod - def middle_separator(self): + def middle_separator(self) -> str: """Middle level separator.""" @property @abstractmethod - def env_body(self): + def env_body(self) -> str: """Environment body.""" @property @abstractmethod - def bottom_separator(self): + def bottom_separator(self) -> str: """Bottom level separator.""" @property @abstractmethod - def env_end(self): + def env_end(self) -> str: """End of the environment.""" class TableBuilder(TableBuilderAbstract): - """Table builder producing string representation of LaTeX table. - - Parameters - ---------- - formatter : `DataFrameFormatter` - Instance of `DataFrameFormatter`. - column_format: str, optional - Column format, for example, 'rcl' for three columns. - multicolumn: bool, optional - Use multicolumn to enhance MultiIndex columns. - multicolumn_format: str, optional - The alignment for multicolumns, similar to column_format. - multirow: bool, optional - Use multirow to enhance MultiIndex rows. - caption: str, optional - Table caption. - label: str, optional - LaTeX label. - position: str, optional - Float placement specifier, for example, 'htb'. - """ - - def __init__( - self, - formatter: DataFrameFormatter, - column_format: Optional[str] = None, - multicolumn: bool = False, - multicolumn_format: Optional[str] = None, - multirow: bool = False, - caption: Optional[str] = None, - label: Optional[str] = None, - position: Optional[str] = None, - ): - self.fmt = formatter - self.column_format = column_format - self.multicolumn = multicolumn - self.multicolumn_format = multicolumn_format - self.multirow = multirow - self.caption = caption - self.label = label - self.position = position + """Table builder producing string representation of LaTeX table.""" @property - def header(self): + def header(self) -> str: iterator = self._create_row_iterator(over="header") return "\n".join(list(iterator)) @property - def top_separator(self): + def top_separator(self) -> str: return "\\toprule" @property - def middle_separator(self): + def middle_separator(self) -> str: return "\\midrule" if self._is_separator_required() else "" @property - def env_body(self): + def env_body(self) -> str: iterator = self._create_row_iterator(over="body") return "\n".join(list(iterator)) - def _is_separator_required(self): - return self.header and self.env_body + def _is_separator_required(self) -> bool: + return bool(self.header and self.env_body) @property - def _position_macro(self): + def _position_macro(self) -> str: r"""Position macro, extracted from self.position, like [h].""" return f"[{self.position}]" if self.position else "" @property - def _caption_macro(self): + def _caption_macro(self) -> str: r"""Caption macro, extracted from self.caption, like \caption{cap}.""" return f"\\caption{{{self.caption}}}" if self.caption else "" @property - def _label_macro(self): + def _label_macro(self) -> str: r"""Label macro, extracted from self.label, like \label{ref}.""" return f"\\label{{{self.label}}}" if self.label else "" - def _create_row_iterator(self, over): + def _create_row_iterator(self, over: str) -> RowStringIterator: """Create iterator over header or body of the table. Parameters @@ -396,19 +401,23 @@ def _create_row_iterator(self, over): Returns ------- - RowStringConverter + RowStringIterator Iterator over body or header. """ - kwargs = dict( + iterator_kind = self._select_iterator(over) + return iterator_kind( formatter=self.fmt, multicolumn=self.multicolumn, multicolumn_format=self.multicolumn_format, multirow=self.multirow, ) + + def _select_iterator(self, over: str) -> Type[RowStringIterator]: + """Select proper iterator over table rows.""" if over == "header": - return RowHeaderIterator(**kwargs) + return RowHeaderIterator elif over == "body": - return RowBodyIterator(**kwargs) + return RowBodyIterator else: msg = f"'over' must be either 'header' or 'body', but {over} was provided" raise ValueError(msg) @@ -418,14 +427,14 @@ class LongTableBuilder(TableBuilder): """Concrete table builder for longtable.""" @property - def env_begin(self): + def env_begin(self) -> str: first_row = ( f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}" ) elements = [first_row, f"{self._caption_and_label()}"] return "\n".join([item for item in elements if item]) - def _caption_and_label(self): + def _caption_and_label(self) -> str: if self.caption or self.label: double_backslash = "\\\\" elements = [f"{self._caption_macro}", f"{self._label_macro}"] @@ -436,7 +445,7 @@ def _caption_and_label(self): return "" @property - def middle_separator(self): + def middle_separator(self) -> str: iterator = self._create_row_iterator(over="header") elements = [ "\\midrule", @@ -454,11 +463,11 @@ def middle_separator(self): return "" @property - def bottom_separator(self): + def bottom_separator(self) -> str: return "" @property - def env_end(self): + def env_end(self) -> str: return "\\end{longtable}" @@ -466,7 +475,7 @@ class RegularTableBuilder(TableBuilder): """Concrete table builder for regular table.""" @property - def env_begin(self): + def env_begin(self) -> str: elements = [ f"\\begin{{table}}{self._position_macro}", "\\centering", @@ -477,11 +486,11 @@ def env_begin(self): return "\n".join([item for item in elements if item]) @property - def bottom_separator(self): + def bottom_separator(self) -> str: return "\\bottomrule" @property - def env_end(self): + def env_end(self) -> str: return "\n".join(["\\end{tabular}", "\\end{table}"]) @@ -489,15 +498,15 @@ class TabularBuilder(TableBuilder): """Concrete table builder for tabular environment.""" @property - def env_begin(self): + def env_begin(self) -> str: return f"\\begin{{tabular}}{{{self.column_format}}}" @property - def bottom_separator(self): + def bottom_separator(self) -> str: return "\\bottomrule" @property - def env_end(self): + def env_end(self) -> str: return "\\end{tabular}" @@ -549,14 +558,15 @@ def write_result(self, buf: IO[str]) -> None: buf.write(table_string) @property - def builder(self): + def builder(self) -> TableBuilderAbstract: """Concrete table builder. Returns ------- TableBuilder """ - kwargs = dict( + builder = self._select_builder() + return builder( formatter=self.fmt, column_format=self.column_format, multicolumn=self.multicolumn, @@ -566,19 +576,22 @@ def builder(self): label=self.label, position=self.position, ) + + def _select_builder(self) -> Type[TableBuilderAbstract]: + """Select proper table builder.""" if self.longtable: - return LongTableBuilder(**kwargs) + return LongTableBuilder if any([self.caption, self.label, self.position]): - return RegularTableBuilder(**kwargs) - return TabularBuilder(**kwargs) + return RegularTableBuilder + return TabularBuilder @property - def column_format(self): + def column_format(self) -> str: """Column format.""" return self._column_format @column_format.setter - def column_format(self, input_column_format): + def column_format(self, input_column_format: Optional[str]) -> None: """Setter for column format.""" if input_column_format is None: self._column_format = ( @@ -592,7 +605,7 @@ def column_format(self, input_column_format): else: self._column_format = input_column_format - def _get_column_format_based_on_dtypes(self): + def _get_column_format_based_on_dtypes(self) -> str: """Get column format based on data type. Right alignment for numbers and left - for strings. @@ -606,7 +619,7 @@ def get_col_type(dtype): dtypes = self.frame.dtypes._values return "".join(map(get_col_type, dtypes)) - def _get_index_format(self): + def _get_index_format(self) -> str: """Get index column format.""" return "l" * self.frame.index.nlevels if self.fmt.index else "" From c9dd27edf1ae89af0f75942c7ad37da6693af807 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 14:42:28 +0700 Subject: [PATCH 15/22] Remove unnecessary list call --- pandas/io/formats/latex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 8541a083d33a6..6cfbd6e82d9a0 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -161,7 +161,7 @@ def _format_multicolumn(self, row: List[str]) -> List[str]: will become \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} """ - row2 = list(row[: self.index_ilevels]) + row2 = row[: self.index_ilevels] ncol = 1 coltext = "" From bbcd9305a0f106addfb8d672ce71b58e1df46ffa Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 14:43:48 +0700 Subject: [PATCH 16/22] Drop comment --- pandas/io/formats/latex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 6cfbd6e82d9a0..33835f18b62e8 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -597,7 +597,7 @@ def column_format(self, input_column_format: Optional[str]) -> None: self._column_format = ( self._get_index_format() + self._get_column_format_based_on_dtypes() ) - elif not isinstance(input_column_format, str): # pragma: no cover + elif not isinstance(input_column_format, str): raise ValueError( f"column_format must be str or unicode, " f"not {type(input_column_format)}" From 115ed841f6ea58509c37a0f74a24dd9e8f24dae0 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 15:42:47 +0700 Subject: [PATCH 17/22] Set more descriptive _xlevels names --- pandas/io/formats/latex.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 33835f18b62e8..6558fd9cd299e 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -48,14 +48,14 @@ def get_strrow(self, row_num: int) -> str: row = self.strrows[row_num] is_multicol = ( - row_num < self.index_clevels and self.fmt.header and self.multicolumn + row_num < self.column_levels and self.fmt.header and self.multicolumn ) is_multirow = ( - row_num >= self.index_nlevels + row_num >= self.header_levels and self.fmt.index and self.multirow - and self.index_ilevels > 1 + and self.index_levels > 1 ) is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 @@ -78,20 +78,20 @@ def get_strrow(self, row_num: int) -> str: @property def _header_row_num(self) -> int: """Number of rows in header.""" - return self.index_nlevels if self.fmt.header else 0 + return self.header_levels if self.fmt.header else 0 @property - def index_ilevels(self) -> int: + def index_levels(self) -> int: """Integer number of levels in index.""" return self.frame.index.nlevels @property - def index_clevels(self) -> int: + def column_levels(self) -> int: return self.frame.columns.nlevels @property - def index_nlevels(self) -> int: - nlevels = self.index_clevels + def header_levels(self) -> int: + nlevels = self.column_levels if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 return nlevels @@ -148,7 +148,7 @@ def _preprocess_row(self, row: List[str]) -> List[str]: else: crow = [x if x else "{}" for x in row] if self.fmt.bold_rows and self.fmt.index: - crow = _convert_to_bold(crow, self.index_ilevels) + crow = _convert_to_bold(crow, self.index_levels) return crow def _format_multicolumn(self, row: List[str]) -> List[str]: @@ -161,7 +161,7 @@ def _format_multicolumn(self, row: List[str]) -> List[str]: will become \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} """ - row2 = row[: self.index_ilevels] + row2 = row[: self.index_levels] ncol = 1 coltext = "" @@ -176,7 +176,7 @@ def append_col(): else: row2.append(coltext) - for c in row[self.index_ilevels :]: + for c in row[self.index_levels :]: # if next col has text, write the previous if c.strip(): if coltext: @@ -201,7 +201,7 @@ def _format_multirow(self, row: List[str], i: int) -> List[str]: b & 0 & \cline{1-2} b & 0 & """ - for j in range(self.index_ilevels): + for j in range(self.index_levels): if row[j].strip(): nrow = 1 for r in self.strrows[i + 1 :]: From 8e3c7294a8c0d5a62897a58bcdd6d26302f83849 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 27 Aug 2020 16:05:10 +0700 Subject: [PATCH 18/22] Ignore two problematic type checks --- pandas/io/formats/latex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 6558fd9cd299e..ed99366669db7 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -41,7 +41,7 @@ def __init__( self.multirow = multirow self.clinebuf: List[List[int]] = [] self.strcols = self._get_strcols() - self.strrows: List[List[str]] = list(zip(*self.strcols)) + self.strrows: List[List[str]] = list(zip(*self.strcols)) # type: ignore def get_strrow(self, row_num: int) -> str: """Get string representation of the row.""" @@ -541,7 +541,7 @@ def __init__( self.fmt = formatter self.frame = self.fmt.frame self.longtable = longtable - self.column_format = column_format + self.column_format = column_format # type: ignore self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format self.multirow = multirow From 1892b155c7f5b170db8b5c7c72d72e8330fabc00 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 6 Sep 2020 22:34:18 +0700 Subject: [PATCH 19/22] REF: rename product property to get_result() --- pandas/io/formats/latex.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index ed99366669db7..2ee6b0f8a2480 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -299,8 +299,7 @@ def __init__( self.label = label self.position = position - @property - def product(self) -> str: + def get_result(self) -> str: """String representation of LaTeX table.""" elements = [ self.env_begin, @@ -554,7 +553,7 @@ def write_result(self, buf: IO[str]) -> None: Render a DataFrame to a LaTeX tabular, longtable, or table/tabular environment output. """ - table_string = self.builder.product + table_string = self.builder.get_result() buf.write(table_string) @property From 3e6a829811ffafde875f07551ae203c893b22e2e Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 6 Sep 2020 23:19:58 +0700 Subject: [PATCH 20/22] REF: rename base builder, add doctests - TableBuilder -> GenericTableBuilder - Add doctests explaining what each table builder does --- pandas/io/formats/latex.py | 90 +++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 2ee6b0f8a2480..76d871a92e178 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -351,7 +351,7 @@ def env_end(self) -> str: """End of the environment.""" -class TableBuilder(TableBuilderAbstract): +class GenericTableBuilder(TableBuilderAbstract): """Table builder producing string representation of LaTeX table.""" @property @@ -422,8 +422,36 @@ def _select_iterator(self, over: str) -> Type[RowStringIterator]: raise ValueError(msg) -class LongTableBuilder(TableBuilder): - """Concrete table builder for longtable.""" +class LongTableBuilder(GenericTableBuilder): + """Concrete table builder for longtable. + + >>> from pandas import DataFrame + >>> from pandas.io.formats import format as fmt + >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = LongTableBuilder(formatter, caption='caption', label='lab', + ... column_format='lrl') + >>> table = builder.get_result() + >>> print(table) + \\begin{longtable}{lrl} + \\caption{caption} + \\label{lab}\\\\ + \\toprule + {} & a & b \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{3}{r}{{Continued on next page}} \\\\ + \\midrule + \\endfoot + + \\bottomrule + \\endlastfoot + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\end{longtable} + + """ @property def env_begin(self) -> str: @@ -470,8 +498,32 @@ def env_end(self) -> str: return "\\end{longtable}" -class RegularTableBuilder(TableBuilder): - """Concrete table builder for regular table.""" +class RegularTableBuilder(GenericTableBuilder): + """Concrete table builder for regular table. + + >>> from pandas import DataFrame + >>> from pandas.io.formats import format as fmt + >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = RegularTableBuilder(formatter, caption='caption', label='lab', + ... column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{table} + \\centering + \\caption{caption} + \\label{lab} + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + \\end{table} + + """ @property def env_begin(self) -> str: @@ -493,8 +545,26 @@ def env_end(self) -> str: return "\n".join(["\\end{tabular}", "\\end{table}"]) -class TabularBuilder(TableBuilder): - """Concrete table builder for tabular environment.""" +class TabularBuilder(GenericTableBuilder): + """Concrete table builder for tabular environment. + + >>> from pandas import DataFrame + >>> from pandas.io.formats import format as fmt + >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = TabularBuilder(formatter, column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + + """ @property def env_begin(self) -> str: @@ -661,3 +731,9 @@ def _convert_to_bold(crow: List[str], ilevels: int) -> List[str]: f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x for j, x in enumerate(crow) ] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From 94d8dc72ea2e3dd4d8d0ef6fbeac4e1e3900ac77 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 7 Sep 2020 00:03:18 +0700 Subject: [PATCH 21/22] TYP: ignore specific mypy errors --- pandas/io/formats/latex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 76d871a92e178..e3c5a2cafbb6b 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -41,7 +41,7 @@ def __init__( self.multirow = multirow self.clinebuf: List[List[int]] = [] self.strcols = self._get_strcols() - self.strrows: List[List[str]] = list(zip(*self.strcols)) # type: ignore + self.strrows: List[List[str]] = list(zip(*self.strcols)) # type: ignore[arg-type] def get_strrow(self, row_num: int) -> str: """Get string representation of the row.""" @@ -610,7 +610,7 @@ def __init__( self.fmt = formatter self.frame = self.fmt.frame self.longtable = longtable - self.column_format = column_format # type: ignore + self.column_format = column_format # type: ignore[assignment] self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format self.multirow = multirow From c4da0a2de12fd25fe4e604da2bac86415a5b4728 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 7 Sep 2020 00:55:14 +0700 Subject: [PATCH 22/22] FIX: long line with type ignore --- pandas/io/formats/latex.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index e3c5a2cafbb6b..8080d953da308 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -41,7 +41,9 @@ def __init__( self.multirow = multirow self.clinebuf: List[List[int]] = [] self.strcols = self._get_strcols() - self.strrows: List[List[str]] = list(zip(*self.strcols)) # type: ignore[arg-type] + self.strrows: List[List[str]] = ( + list(zip(*self.strcols)) # type: ignore[arg-type] + ) def get_strrow(self, row_num: int) -> str: """Get string representation of the row."""