Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize mdstrip #1351

Merged
merged 4 commits into from
Jan 8, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 16 additions & 8 deletions udata/frontend/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from flask import current_app, Markup
from werkzeug.local import LocalProxy
from jinja2.filters import do_truncate, do_striptags
from jinja2.utils import escape

from udata.i18n import _

Expand Down Expand Up @@ -64,14 +65,7 @@ def __init__(self, app, parser, renderer):
def __call__(self, stream, source_tooltip=False):
if not stream:
return ''
# Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
# By default, only keeps `bleach.ALLOWED_TAGS`.
stream = bleach.clean(
stream,
tags=current_app.config['MD_ALLOWED_TAGS'],
attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
styles=current_app.config['MD_ALLOWED_STYLES'],
strip_comments=False)
stream = bleach_clean(stream)
# Turn markdown to HTML.
ast = self.parser().parse(stream)
html = self.renderer.render(ast)
Expand All @@ -87,6 +81,19 @@ def __call__(self, stream, source_tooltip=False):
return Markup(html)


def bleach_clean(stream):
"""
Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
By default, only keeps `bleach.ALLOWED_TAGS`.
"""
return bleach.clean(
stream,
tags=current_app.config['MD_ALLOWED_TAGS'],
attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
styles=current_app.config['MD_ALLOWED_STYLES'],
strip_comments=False)


def mdstrip(value, length=None, end='…'):
'''
Truncate and strip tags from a markdown source
Expand All @@ -100,6 +107,7 @@ def mdstrip(value, length=None, end='…'):
value = value.split(EXCERPT_TOKEN, 1)[0]
rendered = md(value)
text = do_striptags(rendered)
text = bleach_clean(text)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to be working. I'm just wondering whether this should after or before the do_striptags

Copy link
Contributor Author

@abulte abulte Jan 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow do_striptags would reintroduce the injection. I tested because it seemed more logical but...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine to me, I was just wondering the reasonning 👍

if length > 0:
text = do_truncate(None, text, length, end=end, leeway=2)
return text
Expand Down
11 changes: 10 additions & 1 deletion udata/tests/frontend/test_dataset_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_json_ld(self):
self.assertEquals(json_ld['@context'], 'http://schema.org')
self.assertEquals(json_ld['@type'], 'Dataset')
self.assertEquals(json_ld['@id'], str(dataset.id))
self.assertEquals(json_ld['description'], 'a&éèëù$£')
self.assertEquals(json_ld['description'], 'a&éèëù$£')
self.assertEquals(json_ld['alternateName'], dataset.slug)
self.assertEquals(json_ld['dateCreated'][:16],
dataset.created_at.isoformat()[:16])
Expand Down Expand Up @@ -179,6 +179,15 @@ def test_json_ld(self):
self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence')
self.assertEquals(json_ld['author']['@type'], 'Person')

def test_json_ld_sanitize(self):
'''Json-ld should be sanitized'''
dataset = DatasetFactory(description='an <script>evil()</script>')
url = url_for('datasets.show', dataset=dataset)
response = self.get(url)
json_ld = self.get_json_ld(response)
self.assertEquals(json_ld['description'],
'an &lt;script&gt;evil()&lt;/script&gt;')

def test_raise_404_if_private(self):
'''It should raise a 404 if the dataset is private'''
dataset = DatasetFactory(private=True)
Expand Down