Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

ENH: add escape parameter to to_html() #2919

Merged
merged 2 commits into from Apr 10, 2013
Jump to file or symbol
Failed to load files and symbols.
+55 −7
Split
View
@@ -131,6 +131,9 @@ pandas 0.11.0
- Add ``time()`` method to DatetimeIndex (GH3180_)
- Return NA when using Series.str[...] for values that are not long enough
(GH3223_)
+ - to_html() now accepts an optional "escape" argument to control reserved
+ HTML character escaping (enabled by default) and escapes ``&``, in addition
+ to ``<`` and ``>``. (GH2919_)
**API Changes**
@@ -390,6 +393,7 @@ pandas 0.11.0
.. _GH3238: https://github.com/pydata/pandas/issues/3238
.. _GH3258: https://github.com/pydata/pandas/issues/3258
.. _GH3283: https://github.com/pydata/pandas/issues/3283
+.. _GH2919: https://github.com/pydata/pandas/issues/2919
pandas 0.10.1
=============
View
@@ -325,6 +325,10 @@ Enhancements
- Treat boolean values as integers (values 1 and 0) for numeric
operations. (GH2641_)
+ - to_html() now accepts an optional "escape" argument to control reserved
+ HTML character escaping (enabled by default) and escapes ``&``, in addition
+ to ``<`` and ``>``. (GH2919_)
+
See the `full release notes
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
on GitHub for a complete list.
@@ -350,3 +354,4 @@ on GitHub for a complete list.
.. _GH3070: https://github.com/pydata/pandas/issues/3070
.. _GH3075: https://github.com/pydata/pandas/issues/3075
.. _GH2641: https://github.com/pydata/pandas/issues/2641
+.. _GH2919: https://github.com/pydata/pandas/issues/2919
View
@@ -495,6 +495,7 @@ def __init__(self, formatter, classes=None):
self.columns = formatter.columns
self.elements = []
self.bold_rows = self.fmt.kwds.get('bold_rows', False)
+ self.escape = self.fmt.kwds.get('escape', True)
def write(self, s, indent=0):
rs = com.pprint_thing(s)
@@ -517,7 +518,10 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
else:
start_tag = '<%s>' % kind
- esc = {'<' : r'&lt;', '>' : r'&gt;'}
+ if self.escape:
+ esc = {'<' : r'&lt;', '>' : r'&gt;', '&' : r'&amp;'}
+ else:
+ esc = {}
rs = com.pprint_thing(s, escape_chars=esc)
self.write(
'%s%s</%s>' % (start_tag, rs, kind), indent)
View
@@ -1459,13 +1459,15 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
justify=None, force_unicode=None, bold_rows=True,
- classes=None):
+ classes=None, escape=True):
"""
to_html-specific options
bold_rows : boolean, default True
Make the row labels bold in the output
classes : str or list or tuple, default None
CSS class(es) to apply to the resulting html table
+ escape : boolean, default True
+ Convert the characters <, >, and & to HTML-safe sequences.
Render a DataFrame to an html table.
"""
@@ -1488,7 +1490,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
justify=justify,
index_names=index_names,
header=header, index=index,
- bold_rows=bold_rows)
+ bold_rows=bold_rows,
+ escape=escape)
formatter.to_html(classes=classes)
if buf is None:
@@ -275,8 +275,8 @@ def test_to_html_unicode(self):
df.to_html()
def test_to_html_escaped(self):
- a = 'str<ing1'
- b = 'stri>ng2'
+ a = 'str<ing1 &amp;'
+ b = 'stri>ng2 &amp;'
test_dict = {'co<l1': {a: "<type 'str'>",
b: "<type 'str'>"},
@@ -293,19 +293,51 @@ def test_to_html_escaped(self):
</thead>
<tbody>
<tr>
- <th>str&lt;ing1</th>
+ <th>str&lt;ing1 &amp;amp;</th>
<td> &lt;type 'str'&gt;</td>
<td> &lt;type 'str'&gt;</td>
</tr>
<tr>
- <th>stri&gt;ng2</th>
+ <th>stri&gt;ng2 &amp;amp;</th>
<td> &lt;type 'str'&gt;</td>
<td> &lt;type 'str'&gt;</td>
</tr>
</tbody>
</table>"""
self.assertEqual(xp, rs)
+ def test_to_html_escape_disabled(self):
+ a = 'str<ing1 &amp;'
+ b = 'stri>ng2 &amp;'
+
+ test_dict = {'co<l1': {a: "<b>bold</b>",
+ b: "<b>bold</b>"},
+ 'co>l2': {a: "<b>bold</b>",
+ b: "<b>bold</b>"}}
+ rs = pd.DataFrame(test_dict).to_html(escape=False)
+ xp = """<table border="1" class="dataframe">
+ <thead>
+ <tr style="text-align: right;">
+ <th></th>
+ <th>co<l1</th>
+ <th>co>l2</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <th>str<ing1 &amp;</th>
+ <td> <b>bold</b></td>
+ <td> <b>bold</b></td>
+ </tr>
+ <tr>
+ <th>stri>ng2 &amp;</th>
+ <td> <b>bold</b></td>
+ <td> <b>bold</b></td>
+ </tr>
+ </tbody>
+</table>"""
+ self.assertEqual(xp, rs)
+
def test_to_html_multiindex_sparsify(self):
index = pd.MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]],
names=['foo', None])