-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
_html.py
176 lines (153 loc) · 5.61 KB
/
_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""Module for formatting output data in HTML."""
from __future__ import annotations
import os
from textwrap import dedent
from typing import TYPE_CHECKING, Iterable
from polars.dependencies import html
if TYPE_CHECKING:
from types import TracebackType
from polars import DataFrame
class Tag:
"""Class for representing an HTML tag."""
def __init__(
self,
elements: list[str],
tag: str,
attributes: dict[str, str] | None = None,
):
self.tag = tag
self.elements = elements
self.attributes = attributes
def __enter__(self) -> None:
if self.attributes is not None:
s = f"<{self.tag} "
for k, v in self.attributes.items():
s += f'{k}="{v}" '
s = f"{s.rstrip()}>"
self.elements.append(s)
else:
self.elements.append(f"<{self.tag}>")
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> None:
self.elements.append(f"</{self.tag}>")
class HTMLFormatter:
def __init__(
self,
df: DataFrame,
*,
max_cols: int = 75,
max_rows: int = 40,
from_series: bool = False,
):
self.df = df
self.elements: list[str] = []
self.max_cols = max_cols
self.max_rows = max_rows
self.from_series = from_series
self.row_idx: Iterable[int]
self.col_idx: Iterable[int]
if max_rows < df.height:
half, rest = divmod(max_rows, 2)
self.row_idx = [
*list(range(half + rest)),
-1,
*list(range(df.height - half, df.height)),
]
else:
self.row_idx = range(df.height)
if max_cols < df.width:
self.col_idx = [
*list(range(max_cols // 2)),
-1,
*list(range(df.width - max_cols // 2, df.width)),
]
else:
self.col_idx = range(df.width)
def write_header(self) -> None:
"""Write the header of an HTML table."""
with Tag(self.elements, "thead"):
if not bool(int(os.environ.get("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES", "0"))):
with Tag(self.elements, "tr"):
columns = self.df.columns
for c in self.col_idx:
with Tag(self.elements, "th"):
if c == -1:
self.elements.append("…")
else:
self.elements.append(html.escape(columns[c]))
if not bool(
int(os.environ.get("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES", "0"))
):
with Tag(self.elements, "tr"):
dtypes = self.df._df.dtype_strings()
for c in self.col_idx:
with Tag(self.elements, "td"):
if c == -1:
self.elements.append("…")
else:
self.elements.append(dtypes[c])
def write_body(self) -> None:
"""Write the body of an HTML table."""
str_lengths = int(os.environ.get("POLARS_FMT_STR_LEN", "15"))
with Tag(self.elements, "tbody"):
for r in self.row_idx:
with Tag(self.elements, "tr"):
for c in self.col_idx:
with Tag(self.elements, "td"):
if r == -1 or c == -1:
self.elements.append("…")
else:
series = self.df[:, c]
self.elements.append(
html.escape(series._s.get_fmt(r, str_lengths))
)
def write(self, inner: str) -> None:
"""Append a raw string to the inner HTML."""
self.elements.append(inner)
def render(self) -> list[str]:
"""Return the lines needed to render a HTML table."""
if not bool(
int(
os.environ.get("POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION", "0")
)
):
# format frame/series shape with '_' thousand-separators
s = self.df.shape
shape = f"({s[0]:_},)" if self.from_series else f"({s[0]:_}, {s[1]:_})"
self.elements.append(f"<small>shape: {shape}</small>")
with Tag(
# be careful changing the CSS class ref here...
# ref: https://github.com/pola-rs/polars/issues/7443
self.elements,
"table",
{"border": "1", "class": "dataframe"},
):
self.write_header()
self.write_body()
return self.elements
class NotebookFormatter(HTMLFormatter):
"""
Class for formatting output data in HTML for display in Jupyter Notebooks.
This class is intended for functionality specific to DataFrame._repr_html_().
"""
def write_style(self) -> None:
style = """\
<style>
.dataframe > thead > tr,
.dataframe > tbody > tr {
text-align: right;
white-space: pre-wrap;
}
</style>
"""
self.write(dedent(style))
def render(self) -> list[str]:
"""Return the lines needed to render a HTML table."""
with Tag(self.elements, "div"):
self.write_style()
super().render()
return self.elements