Skip to content

Commit

Permalink
refactor: replace bleach with nh3 (ammonia) (#295)
Browse files Browse the repository at this point in the history
  • Loading branch information
miketheman committed Sep 5, 2023
1 parent b509830 commit e5221c2
Show file tree
Hide file tree
Showing 20 changed files with 75 additions and 118 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ authors = [
]
readme = "README.rst"
license = {text = "Apache License, Version 2.0"}
dependencies = ["bleach>=2.1.0", "docutils>=0.13.1", "Pygments>=2.5.1"]
dependencies = ["nh3>=0.2.14", "docutils>=0.13.1", "Pygments>=2.5.1"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
Expand Down
128 changes: 42 additions & 86 deletions readme_renderer/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import functools
from typing import Any, Dict, Iterator, List, Optional
from typing import Dict, Optional, Set

import bleach
import bleach.callbacks
import bleach.linkifier
import bleach.sanitizer
import nh3


ALLOWED_TAGS = [
ALLOWED_TAGS = {
# Bleach Defaults
"a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol",
"strong", "ul",
Expand All @@ -32,102 +28,62 @@
"span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead",
"tr", "tt", "kbd", "var", "input", "section", "aside", "nav", "s", "figure",
"figcaption",
]
}

ALLOWED_ATTRIBUTES = {
# Bleach Defaults
"a": ["href", "title"],
"abbr": ["title"],
"acronym": ["title"],
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},

# Custom Additions
"*": ["id"],
"hr": ["class"],
"img": ["src", "width", "height", "alt", "align", "class"],
"span": ["class"],
"th": ["align", "class"],
"td": ["align", "colspan", "rowspan"],
"div": ["align", "class"],
"h1": ["align"],
"h2": ["align"],
"h3": ["align"],
"h4": ["align"],
"h5": ["align"],
"h6": ["align"],
"code": ["class"],
"p": ["align", "class"],
"pre": ["lang"],
"ol": ["start"],
"input": ["type", "checked", "disabled"],
"aside": ["class"],
"dd": ["class"],
"dl": ["class"],
"dt": ["class"],
"ul": ["class"],
"nav": ["class"],
"figure": ["class"],
"*": {"id"},
"hr": {"class"},
"img": {"src", "width", "height", "alt", "align", "class"},
"span": {"class"},
"th": {"align", "class"},
"td": {"align", "colspan", "rowspan"},
"div": {"align", "class"},
"h1": {"align"},
"h2": {"align"},
"h3": {"align"},
"h4": {"align"},
"h5": {"align"},
"h6": {"align"},
"code": {"class"},
"p": {"align", "class"},
"pre": {"lang"},
"ol": {"start"},
"input": {"type", "checked", "disabled"},
"aside": {"class"},
"dd": {"class"},
"dl": {"class"},
"dt": {"class"},
"ul": {"class"},
"nav": {"class"},
"figure": {"class"},
}


class DisabledCheckboxInputsFilter:
# The typeshed for bleach (html5lib) filters is incomplete, use `typing.Any`
# See https://github.com/python/typeshed/blob/505ea726415016e53638c8b584b8fdc9c722cac1/stubs/bleach/bleach/html5lib_shim.pyi#L7-L8 # noqa E501
def __init__(self, source: Any) -> None:
self.source = source

def __iter__(self) -> Iterator[Dict[str, Optional[str]]]:
for token in self.source:
if token.get("name") == "input":
# only allow disabled checkbox inputs
is_checkbox, is_disabled, unsafe_attrs = False, False, False
for (_, attrname), value in token.get("data", {}).items():
if attrname == "type" and value == "checkbox":
is_checkbox = True
elif attrname == "disabled":
is_disabled = True
elif attrname != "checked":
unsafe_attrs = True
break
if is_checkbox and is_disabled and not unsafe_attrs:
yield token
else:
yield token

def __getattr__(self, name: str) -> Any:
return getattr(self.source, name)


def clean(
html: str,
tags: Optional[List[str]] = None,
attributes: Optional[Dict[str, List[str]]] = None
tags: Optional[Set[str]] = None,
attributes: Optional[Dict[str, Set[str]]] = None
) -> Optional[str]:
if tags is None:
tags = ALLOWED_TAGS
if attributes is None:
attributes = ALLOWED_ATTRIBUTES

# Clean the output using Bleach
cleaner = bleach.sanitizer.Cleaner(
tags=tags,
attributes=attributes,
filters=[
# Bleach Linkify makes it easy to modify links, however, we will
# not be using it to create additional links.
functools.partial(
bleach.linkifier.LinkifyFilter,
callbacks=[
lambda attrs, new: attrs if not new else None,
bleach.callbacks.nofollow,
],
skip_tags=["pre"],
parse_email=False,
),
DisabledCheckboxInputsFilter,
],
)
try:
cleaned = cleaner.clean(html)
cleaned = nh3.clean(
html,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
link_rel="nofollow",
url_schemes={"http", "https", "mailto"},
)

return cleaned
except ValueError:
return None
2 changes: 1 addition & 1 deletion readme_renderer/txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@

def render(raw: str, **kwargs: Any) -> Optional[str]:
rendered = html_escape(raw).replace("\n", "<br>")
return clean(rendered, tags=["br"])
return clean(rendered, tags={"br"})
2 changes: 1 addition & 1 deletion tests/fixtures/test_CommonMark_006.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
&lt;iframe src="http://mymalicioussite.com/"&gt;Click here&lt;/iframe&gt;
Click here
4 changes: 1 addition & 3 deletions tests/fixtures/test_CommonMark_007.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
<p>Something naughty this way comes</p>
&lt;script&gt;
alert("Hello");
&lt;/script&gt;

8 changes: 4 additions & 4 deletions tests/fixtures/test_CommonMark_008.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>

<span class="k">def</span> <span class="nf">make_sound</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Ruff!&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">'Ruff!'</span><span class="p">)</span>

<span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">&#39;Fido&#39;</span><span class="p">)</span>
<span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">'Fido'</span><span class="p">)</span>
</pre>
<p>and then here is some bash:</p>
<pre lang="bash"><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">&quot;</span><span class="nv">$1</span><span class="s2">&quot;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">&quot;--help&quot;</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span>
<span class="w"> </span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;OK&quot;</span>
<pre lang="bash"><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">"</span><span class="nv">$1</span><span class="s2">"</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"--help"</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span>
<span class="w"> </span><span class="nb">echo</span><span class="w"> </span><span class="s2">"OK"</span>
<span class="k">fi</span>
</pre>
<p>or click <a href="http://www.surveymonkey.com" rel="nofollow">SurveyMonkey</a></p>
2 changes: 1 addition & 1 deletion tests/fixtures/test_GFM_019.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<p><a href="http://commonmark.org" rel="nofollow">http://commonmark.org</a></p>
<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)" rel="nofollow">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
<p>Anonymous FTP is available at <a>ftp://foo.bar.baz</a>.</p>
<p>Anonymous FTP is available at <a rel="nofollow">ftp://foo.bar.baz</a>.</p>
2 changes: 1 addition & 1 deletion tests/fixtures/test_GFM_020.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<p><a href="mailto:foo@bar.baz">foo@bar.baz</a></p>
<p><a href="mailto:foo@bar.baz" rel="nofollow">foo@bar.baz</a></p>
2 changes: 1 addition & 1 deletion tests/fixtures/test_GFM_021.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<p>hello@mail+xyz.example isn't valid, but <a href="mailto:hello+xyz@mail.example">hello+xyz@mail.example</a> is.</p>
<p>hello@mail+xyz.example isn't valid, but <a href="mailto:hello+xyz@mail.example" rel="nofollow">hello+xyz@mail.example</a> is.</p>
4 changes: 2 additions & 2 deletions tests/fixtures/test_GFM_022.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a></p>
<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a>.</p>
<p><a href="mailto:a.b-c_d@a.b" rel="nofollow">a.b-c_d@a.b</a></p>
<p><a href="mailto:a.b-c_d@a.b" rel="nofollow">a.b-c_d@a.b</a>.</p>
<p>a.b-c_d@a.b-</p>
<p>a.b-c_d@a.b_</p>
14 changes: 7 additions & 7 deletions tests/fixtures/test_GFM_024.html
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
<ul>
<li><input type="checkbox" disabled> Valid unchecked checkbox</li>
<li><input type="checkbox" checked disabled> Valid checked checkbox</li>
<li> Invalid enabled checkbox</li>
<li><input type="checkbox" disabled=""> Valid unchecked checkbox</li>
<li><input type="checkbox" checked="" disabled=""> Valid checked checkbox</li>
<li><input type="checkbox"> Invalid enabled checkbox</li>
<li>

<input>
</li>
<li>

<input type="submit">
</li>
<li>

<input>
</li>
<li>

<input type="checkbox" checked="">
</li>
</ul>
2 changes: 1 addition & 1 deletion tests/fixtures/test_GFM_doublequotes.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<pre><code>This is code text.
</code></pre>
<pre lang="python3"><span class="k">def</span> <span class="nf">this_is_python</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;This is a docstring.&quot;&quot;&quot;</span>
<span class="w"> </span><span class="sd">"""This is a docstring."""</span>
<span class="k">pass</span>
</pre>
<pre lang="go"><span class="kd">func</span><span class="w"> </span><span class="nx">ThisIsGo</span><span class="p">(){</span>
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/test_GFM_malicious_pre.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<p>This is normal text.</p>
<pre lang="python3"><span class="k">def</span> <span class="nf">this_is_python</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;This is a docstring.&quot;&quot;&quot;</span>
<span class="w"> </span><span class="sd">"""This is a docstring."""</span>
<span class="k">pass</span>
<span class="o">&lt;</span><span class="n">script</span> <span class="nb">type</span><span class="o">=</span><span class="s2">&quot;text/javascript&quot;</span><span class="o">&gt;</span><span class="n">alert</span><span class="p">(</span><span class="s1">&#39;I am evil.&#39;</span><span class="p">);</span><span class="o">&lt;/</span><span class="n">script</span><span class="o">&gt;</span>
<span class="o">&lt;</span><span class="n">script</span> <span class="nb">type</span><span class="o">=</span><span class="s2">"text/javascript"</span><span class="o">&gt;</span><span class="n">alert</span><span class="p">(</span><span class="s1">'I am evil.'</span><span class="p">);</span><span class="o">&lt;/</span><span class="n">script</span><span class="o">&gt;</span>
</pre>
4 changes: 2 additions & 2 deletions tests/fixtures/test_rst_008.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

</span><span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">'Fido'</span><span class="p">)</span></code></pre>
<p>and then here is some bash:</p>
<pre><code><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">&quot;</span><span class="nv">$1</span><span class="s2">&quot;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">&quot;--help&quot;</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span><span class="w">
</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;OK&quot;</span><span class="w">
<pre><code><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">"</span><span class="nv">$1</span><span class="s2">"</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"--help"</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span><span class="w">
</span><span class="nb">echo</span><span class="w"> </span><span class="s2">"OK"</span><span class="w">
</span><span class="k">fi</span></code></pre>
<p>or click <a href="http://www.surveymonkey.com" rel="nofollow">SurveyMonkey</a></p>
<pre><code>An unknown code fence block</code></pre>
2 changes: 1 addition & 1 deletion tests/fixtures/test_rst_bibtex.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
<pre><code><span class="nc">&#64;article</span><span class="p">{</span><span class="nl">the_impact_of_pygments_docutils_config_and_html5</span><span class="p">,</span><span class="w">
<pre><code><span class="nc">@article</span><span class="p">{</span><span class="nl">the_impact_of_pygments_docutils_config_and_html5</span><span class="p">,</span><span class="w">
</span><span class="na">year</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><s>{2022}</s><span class="p">,</span></code></pre>
2 changes: 1 addition & 1 deletion tests/fixtures/test_rst_docinfo.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<dd class="created"><p>mer 02 ago 2017 14:49:24 CEST</p>
</dd>
<dt class="author">Author<span class="colon">:</span></dt>
<dd class="author"><p>Lele Gaifax &lt;<a href="mailto:lele&#37;&#52;&#48;metapensiero&#46;it">lele<span>&#64;</span>metapensiero<span>&#46;</span>it</a>&gt;</p></dd>
<dd class="author"><p>Lele Gaifax &lt;<a href="mailto:lele%40metapensiero.it" rel="nofollow">lele<span>@</span>metapensiero<span>.</span>it</a>&gt;</p></dd>
<dt class="license">License<span class="colon">:</span></dt>
<dd class="license"><p>GNU General Public License version 3 or later</p>
</dd>
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/test_rst_linkify.html
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ <h2>Development</h2>
<dd><p><a href="http://multigtfs.readthedocs.org/" rel="nofollow">http://multigtfs.readthedocs.org/</a></p>
</dd>
<dt>IRC<span class="colon">:</span></dt>
<dd><p><a>irc://irc.freenode.net/tulsawebdevs</a></p>
<dd><p><a rel="nofollow">irc://irc.freenode.net/tulsawebdevs</a></p>
</dd>
</dl>
</section>
Loading

0 comments on commit e5221c2

Please sign in to comment.