Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: handling of max_colwidth parameter #25977

Closed
Closed
73 changes: 52 additions & 21 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import partial
from io import StringIO
from shutil import get_terminal_size
from typing import Optional, Type, Union
from unicodedata import east_asian_width

import numpy as np
Expand All @@ -32,6 +33,7 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndexClass,
ABCMultiIndex,
ABCSeries,
Expand Down Expand Up @@ -402,6 +404,12 @@ class TableFormatter:

is_truncated = False
show_dimensions = None
tr_frame = None # type: ABCDataFrame
float_format = None
na_rep = None
col_space = None
decimal = None
max_colwidth = None

@property
def should_show_dimensions(self):
Expand All @@ -420,6 +428,24 @@ def _get_formatter(self, i):
i = self.columns[i]
return self.formatters.get(i, None)

def _format_col(self, i: int):
"""
Calls `format_array` for column `i` of truncated DataFrame with
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you say positional column i

optional `formatter`
"""
frame = self.tr_frame
jreback marked this conversation as resolved.
Show resolved Hide resolved
formatter = self._get_formatter(i)
values_to_format = frame.iloc[:, i]._formatting_values()
return format_array(
values_to_format,
formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space,
decimal=self.decimal,
max_colwidth=self.max_colwidth,
)


class DataFrameFormatter(TableFormatter):
"""
Expand Down Expand Up @@ -501,6 +527,7 @@ def __init__(

self._chk_truncate()
self.adj = _get_adjustment()
self.max_colwidth = None # use display.max_colwidth setting

def _chk_truncate(self):
"""
Expand Down Expand Up @@ -804,19 +831,6 @@ def to_latex(
else:
raise TypeError("buf is not a file name and it has no write " "method")

def _format_col(self, i):
frame = self.tr_frame
formatter = self._get_formatter(i)
values_to_format = frame.iloc[:, i]._formatting_values()
return format_array(
values_to_format,
formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space,
decimal=self.decimal,
)

def to_html(self, classes=None, notebook=False, border=None):
"""
Render a DataFrame to a html table.
Expand Down Expand Up @@ -966,6 +980,7 @@ def format_array(
justify="right",
decimal=".",
leading_space=None,
max_colwidth: Optional[Union[bool, int]] = None,
):
"""
Format an array for printing.
Expand All @@ -988,14 +1003,21 @@ def format_array(
When formatting an Index subclass
(e.g. IntervalIndex._format_native_types), we don't want the
leading space since it should be left-aligned.
max_colwidth: False, int or None, optional, default None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a versionadded tag

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not a public function. have added anyway.

Whether the array should be formatted with strings truncated.
* False: do not truncate strings
* int: the maximum width of strings
* None: use display.max_colwidth setting

.. versionadded:: 1.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can just remove the versionadded (or make it 0.25.0)


Returns
-------
List[str]
"""

if is_datetime64_dtype(values.dtype):
fmt_klass = Datetime64Formatter
fmt_klass = Datetime64Formatter # type: Type[GenericArrayFormatter]
elif is_datetime64tz_dtype(values):
fmt_klass = Datetime64TZFormatter
elif is_timedelta64_dtype(values.dtype):
Expand All @@ -1018,6 +1040,9 @@ def format_array(
if digits is None:
digits = get_option("display.precision")

if max_colwidth is None:
max_colwidth = get_option("display.max_colwidth")

fmt_obj = fmt_klass(
values,
digits=digits,
Expand All @@ -1028,6 +1053,7 @@ def format_array(
justify=justify,
decimal=decimal,
leading_space=leading_space,
max_colwidth=max_colwidth,
)

return fmt_obj.get_result()
Expand All @@ -1047,6 +1073,7 @@ def __init__(
quoting=None,
fixed_width=True,
leading_space=None,
max_colwidth=None,
):
self.values = values
self.digits = digits
Expand All @@ -1059,10 +1086,13 @@ def __init__(
self.quoting = quoting
self.fixed_width = fixed_width
self.leading_space = leading_space
self.max_colwidth = max_colwidth

def get_result(self):
fmt_values = self._format_strings()
return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(
fmt_values, self.justify, max_colwidth=self.max_colwidth
)

def _format_strings(self):
if self.float_format is None:
Expand Down Expand Up @@ -1552,7 +1582,9 @@ def _formatter(x):
return _formatter


def _make_fixed_width(strings, justify="right", minimum=None, adj=None):
def _make_fixed_width(
strings, justify="right", minimum=None, adj=None, max_colwidth=None
):

if len(strings) == 0 or justify == "all":
return strings
Expand All @@ -1565,13 +1597,12 @@ def _make_fixed_width(strings, justify="right", minimum=None, adj=None):
if minimum is not None:
max_len = max(minimum, max_len)

conf_max = get_option("display.max_colwidth")
if conf_max is not None and max_len > conf_max:
max_len = conf_max
if max_colwidth is not None and max_len > max_colwidth:
max_len = max_colwidth

def just(x):
if conf_max is not None:
if (conf_max > 3) & (adj.len(x) > max_len):
if max_colwidth is not None:
if (max_colwidth > 3) & (adj.len(x) > max_len):
x = x[: max_len - 3] + "..."
return x

Expand Down
18 changes: 11 additions & 7 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

from pandas.core.dtypes.generic import ABCMultiIndex

from pandas import option_context

from pandas.io.common import _is_url
from pandas.io.formats.format import TableFormatter, get_level_lengths
from pandas.io.formats.printing import pprint_thing
Expand Down Expand Up @@ -43,8 +41,15 @@ def __init__(self, formatter, classes=None, border=None):
self.border = border
self.table_id = self.fmt.table_id
self.render_links = self.fmt.render_links
self.max_colwidth = False # do not truncate strings
self.tr_frame = self.fmt.tr_frame
self.formatters = self.fmt.formatters
self.float_format = self.fmt.float_format
self.na_rep = self.fmt.na_rep
self.decimal = self.fmt.decimal
if isinstance(self.fmt.col_space, int):
self.fmt.col_space = "{colspace}px".format(colspace=self.fmt.col_space)
self.col_space = self.fmt.col_space

@property
def show_row_idx_names(self):
Expand Down Expand Up @@ -356,9 +361,7 @@ def _write_header(self, indent):
self.write("</thead>", indent)

def _get_formatted_values(self):
with option_context("display.max_colwidth", 999999):
fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
return fmt_values
return {i: self._format_col(i) for i in range(self.ncols)}

def _write_body(self, indent):
self.write("<tbody>", indent)
Expand Down Expand Up @@ -546,8 +549,9 @@ class NotebookFormatter(HTMLFormatter):
DataFrame._repr_html_() and DataFrame.to_html(notebook=True)
"""

def _get_formatted_values(self):
return {i: self.fmt._format_col(i) for i in range(self.ncols)}
def __init__(self, formatter, classes=None, border=None):
super().__init__(formatter, classes, border)
self.max_colwidth = None # use display.max_colwidth setting

def _get_columns_formatted_values(self):
return self.columns.format()
Expand Down