Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: dataframe formatters/outputs #36510

Merged
merged 37 commits into from Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c3568a2
REF: drop TableFormatter
ivanovmg Sep 20, 2020
837858f
REF: extract ConsoleFormatter
ivanovmg Sep 20, 2020
08e899f
CLN: remove ConsoleFormatter to_string method
ivanovmg Sep 20, 2020
602c984
REF: move table_id & render_links to HTMLFormatter
ivanovmg Sep 20, 2020
bd5cb87
REF: move _join_multiline to ConsoleFormatter
ivanovmg Sep 20, 2020
6e8d4d8
REF: separate dataframe formatting from rendering
ivanovmg Sep 20, 2020
cbd3c76
REF: extract _empty_info_line property in latex
ivanovmg Sep 20, 2020
5c30924
DOC: docstrings for DataFrame & String Formatters
ivanovmg Sep 20, 2020
af8fe98
REF: make _get_buffer private
ivanovmg Sep 20, 2020
6e9fb3c
REF: pass DataFrameFormatter to CSVFormatter
ivanovmg Sep 20, 2020
878eed2
REF: create to_csv in DataFrameRenderer
ivanovmg Sep 20, 2020
d87638b
LINT: imports and line breaks
ivanovmg Sep 20, 2020
1292be5
REF: move StringFormatter to separate module
ivanovmg Sep 20, 2020
41553f6
TYP: handle mypy errors after enabling composition
ivanovmg Sep 20, 2020
a66ca5e
REF: remove non-existent parent in LatexFormatter
ivanovmg Sep 20, 2020
3fbe4ba
REF: move line_width to StringFormatter
ivanovmg Sep 21, 2020
733fa34
REF: move need_to_wrap to StringFormatter
ivanovmg Sep 21, 2020
bfb37d7
DOC: add docstrings to DataFrame.to_xxx methods
ivanovmg Sep 21, 2020
f1b494e
CLN: to_string on top in HTMLFormatter
ivanovmg Sep 21, 2020
75daa74
LINT: black conv multiline -> oneline
ivanovmg Sep 21, 2020
6e39277
DOC: add docstring to DataFrameRenderer
ivanovmg Sep 22, 2020
df3b5c6
REF: move _get_result, _get_buffer to module level
ivanovmg Sep 22, 2020
5a18386
REF: replace setters with initializer methods
ivanovmg Sep 22, 2020
fc68fa5
REF: extract properties to make composition clear
ivanovmg Sep 22, 2020
19d2156
REF: eliminate inheritance for HTMLFormatter
ivanovmg Sep 22, 2020
271ef5c
LINT: new black
ivanovmg Sep 22, 2020
b1018ad
Merge branch 'master' into refactor/fmt
ivanovmg Sep 23, 2020
22d0982
TYP: type properties in CSVFormatter
ivanovmg Sep 23, 2020
94dbadd
TYP: type strcols
ivanovmg Sep 23, 2020
914981b
TYP: _join_multiline
ivanovmg Sep 23, 2020
482ccd1
LINT: sort in one line
ivanovmg Sep 23, 2020
7b57fc8
REF: _join_multiline to accept single arg
ivanovmg Sep 23, 2020
1e2969f
REF: eliminate mutation in _join_multiline
ivanovmg Sep 23, 2020
90977fd
Merge branch 'master' into refactor/fmt
ivanovmg Sep 29, 2020
fc7a091
Merge branch 'master' into refactor/fmt
ivanovmg Oct 20, 2020
1335a11
TYP: extract imports for typing only in csvs
ivanovmg Oct 20, 2020
b67b481
TYP: move FloatFormatType alias to _typing
ivanovmg Oct 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions pandas/_typing.py
Expand Up @@ -38,6 +38,8 @@
from pandas.core.indexes.base import Index
from pandas.core.series import Series

from pandas.io.formats.format import EngFormatter

# array-like

AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
Expand Down Expand Up @@ -127,6 +129,10 @@
EncodingVar = TypeVar("EncodingVar", str, None, Optional[str])


# type of float formatter in DataFrameFormatter
FloatFormatType = Union[str, Callable, "EngFormatter"]


@dataclass
class IOargs(Generic[ModeVar, EncodingVar]):
"""
Expand Down
23 changes: 12 additions & 11 deletions pandas/core/frame.py
Expand Up @@ -788,10 +788,8 @@ def _repr_html_(self) -> Optional[str]:
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=".",
table_id=None,
render_links=False,
)
return formatter.to_html(notebook=True)
return fmt.DataFrameRenderer(formatter).to_html(notebook=True)
else:
return None

Expand Down Expand Up @@ -874,9 +872,12 @@ def to_string(
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=decimal,
)
return fmt.DataFrameRenderer(formatter).to_string(
buf=buf,
encoding=encoding,
line_width=line_width,
)
return formatter.to_string(buf=buf, encoding=encoding)

# ----------------------------------------------------------------------

Expand Down Expand Up @@ -2476,29 +2477,29 @@ def to_html(
columns=columns,
col_space=col_space,
na_rep=na_rep,
header=header,
index=index,
formatters=formatters,
float_format=float_format,
bold_rows=bold_rows,
sparsify=sparsify,
justify=justify,
index_names=index_names,
header=header,
index=index,
bold_rows=bold_rows,
escape=escape,
decimal=decimal,
max_rows=max_rows,
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=decimal,
table_id=table_id,
render_links=render_links,
)
# TODO: a generic formatter wld b in DataFrameFormatter
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this TODO still needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what it actually means here... :)

return formatter.to_html(
return fmt.DataFrameRenderer(formatter).to_html(
buf=buf,
classes=classes,
notebook=notebook,
border=border,
encoding=encoding,
table_id=table_id,
render_links=render_links,
)

# ----------------------------------------------------------------------
Expand Down
37 changes: 17 additions & 20 deletions pandas/core/generic.py
Expand Up @@ -4,7 +4,6 @@
from datetime import timedelta
import functools
import gc
from io import StringIO
import json
import operator
import pickle
Expand Down Expand Up @@ -109,7 +108,11 @@
from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window

from pandas.io.formats import format as fmt
from pandas.io.formats.format import DataFrameFormatter, format_percentiles
from pandas.io.formats.format import (
DataFrameFormatter,
DataFrameRenderer,
format_percentiles,
)
from pandas.io.formats.printing import pprint_thing

if TYPE_CHECKING:
Expand Down Expand Up @@ -3149,7 +3152,7 @@ def to_latex(
escape=escape,
decimal=decimal,
)
return formatter.to_latex(
return DataFrameRenderer(formatter).to_latex(
buf=buf,
column_format=column_format,
longtable=longtable,
Expand Down Expand Up @@ -3182,7 +3185,7 @@ def to_csv(
date_format: Optional[str] = None,
doublequote: bool_t = True,
escapechar: Optional[str] = None,
decimal: Optional[str] = ".",
decimal: str = ".",
errors: str = "strict",
storage_options: StorageOptions = None,
) -> Optional[str]:
Expand Down Expand Up @@ -3340,39 +3343,33 @@ def to_csv(
"""
df = self if isinstance(self, ABCDataFrame) else self.to_frame()

from pandas.io.formats.csvs import CSVFormatter
formatter = DataFrameFormatter(
frame=df,
header=header,
index=index,
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
)

formatter = CSVFormatter(
df,
return DataFrameRenderer(formatter).to_csv(
path_or_buf,
line_terminator=line_terminator,
sep=sep,
encoding=encoding,
errors=errors,
compression=compression,
quoting=quoting,
na_rep=na_rep,
float_format=float_format,
cols=columns,
header=header,
index=index,
columns=columns,
index_label=index_label,
mode=mode,
chunksize=chunksize,
quotechar=quotechar,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar,
decimal=decimal,
storage_options=storage_options,
)
formatter.save()

if path_or_buf is None:
assert isinstance(formatter.path_or_buf, StringIO)
return formatter.path_or_buf.getvalue()

return None

# ----------------------------------------------------------------------
# Lookup Caching
Expand Down
103 changes: 47 additions & 56 deletions pandas/io/formats/csvs.py
Expand Up @@ -5,14 +5,15 @@
import csv as csvlib
from io import StringIO, TextIOWrapper
import os
from typing import Any, Dict, Hashable, Iterator, List, Optional, Sequence, Union
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Union

import numpy as np

from pandas._libs import writers as libwriters
from pandas._typing import (
CompressionOptions,
FilePathOrBuffer,
FloatFormatType,
IndexLabel,
Label,
StorageOptions,
Expand All @@ -30,18 +31,17 @@

from pandas.io.common import get_filepath_or_buffer, get_handle

if TYPE_CHECKING:
from pandas.io.formats.format import DataFrameFormatter


class CSVFormatter:
def __init__(
self,
obj,
formatter: "DataFrameFormatter",
path_or_buf: Optional[FilePathOrBuffer[str]] = None,
sep: str = ",",
na_rep: str = "",
float_format: Optional[str] = None,
cols: Optional[Sequence[Label]] = None,
header: Union[bool, Sequence[Hashable]] = True,
index: bool = True,
index_label: Optional[IndexLabel] = None,
mode: str = "w",
encoding: Optional[str] = None,
Expand All @@ -54,10 +54,11 @@ def __init__(
date_format: Optional[str] = None,
doublequote: bool = True,
escapechar: Optional[str] = None,
decimal=".",
storage_options: StorageOptions = None,
):
self.obj = obj
self.fmt = formatter

self.obj = self.fmt.frame

self.encoding = encoding or "utf-8"

Expand All @@ -79,35 +80,45 @@ def __init__(
self.mode = ioargs.mode

self.sep = sep
self.na_rep = na_rep
self.float_format = float_format
self.decimal = decimal
self.header = header
self.index = index
self.index_label = index_label
self.index_label = self._initialize_index_label(index_label)
self.errors = errors
self.quoting = quoting or csvlib.QUOTE_MINIMAL
self.quotechar = quotechar
self.quotechar = self._initialize_quotechar(quotechar)
self.doublequote = doublequote
self.escapechar = escapechar
self.line_terminator = line_terminator or os.linesep
self.date_format = date_format
self.cols = cols # type: ignore[assignment]
self.chunksize = chunksize # type: ignore[assignment]
self.cols = self._initialize_columns(cols)
self.chunksize = self._initialize_chunksize(chunksize)

@property
def na_rep(self) -> str:
return self.fmt.na_rep

@property
def float_format(self) -> Optional["FloatFormatType"]:
return self.fmt.float_format

@property
def index_label(self) -> IndexLabel:
return self._index_label
def decimal(self) -> str:
return self.fmt.decimal

@index_label.setter
def index_label(self, index_label: Optional[IndexLabel]) -> None:
@property
def header(self) -> Union[bool, Sequence[str]]:
return self.fmt.header

@property
def index(self) -> bool:
return self.fmt.index

def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLabel:
if index_label is not False:
if index_label is None:
index_label = self._get_index_label_from_obj()
return self._get_index_label_from_obj()
elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndexClass)):
# given a string for a DF with Index
index_label = [index_label]
self._index_label = index_label
return [index_label]
return index_label

def _get_index_label_from_obj(self) -> List[str]:
if isinstance(self.obj.index, ABCMultiIndex):
Expand All @@ -122,30 +133,17 @@ def _get_index_label_flat(self) -> List[str]:
index_label = self.obj.index.name
return [""] if index_label is None else [index_label]

@property
def quotechar(self) -> Optional[str]:
def _initialize_quotechar(self, quotechar: Optional[str]) -> Optional[str]:
if self.quoting != csvlib.QUOTE_NONE:
# prevents crash in _csv
return self._quotechar
return quotechar
return None

@quotechar.setter
def quotechar(self, quotechar: Optional[str]) -> None:
self._quotechar = quotechar

@property
def has_mi_columns(self) -> bool:
return bool(isinstance(self.obj.columns, ABCMultiIndex))

@property
def cols(self) -> Sequence[Label]:
return self._cols

@cols.setter
def cols(self, cols: Optional[Sequence[Label]]) -> None:
self._cols = self._refine_cols(cols)

def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:
def _initialize_columns(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:
# validate mi options
if self.has_mi_columns:
if cols is not None:
Expand All @@ -161,12 +159,16 @@ def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:

# update columns to include possible multiplicity of dupes
# and make sure sure cols is just a list of labels
cols = self.obj.columns
if isinstance(cols, ABCIndexClass):
return cols._format_native_types(**self._number_format)
new_cols = self.obj.columns
if isinstance(new_cols, ABCIndexClass):
return new_cols._format_native_types(**self._number_format)
else:
assert isinstance(cols, Sequence)
return list(cols)
return list(new_cols)

def _initialize_chunksize(self, chunksize: Optional[int]) -> int:
if chunksize is None:
return (100000 // (len(self.cols) or 1)) or 1
return int(chunksize)

@property
def _number_format(self) -> Dict[str, Any]:
Expand All @@ -179,17 +181,6 @@ def _number_format(self) -> Dict[str, Any]:
decimal=self.decimal,
)

@property
def chunksize(self) -> int:
return self._chunksize

@chunksize.setter
def chunksize(self, chunksize: Optional[int]) -> None:
if chunksize is None:
chunksize = (100000 // (len(self.cols) or 1)) or 1
assert chunksize is not None
self._chunksize = int(chunksize)

@property
def data_index(self) -> Index:
data_index = self.obj.index
Expand Down