Skip to content

Commit

Permalink
Sanitize mdstrip (#1351)
Browse files Browse the repository at this point in the history
  • Loading branch information
abulte committed Jan 8, 2018
1 parent b6fd499 commit f53ce8f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Current (in progress)

- Nothing yet
- Sanitize mdstrip [#1351](https://github.com/opendatateam/udata/pull/1351)

## 1.2.6 (2018-01-04)

Expand Down
24 changes: 16 additions & 8 deletions udata/frontend/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from flask import current_app, Markup
from werkzeug.local import LocalProxy
from jinja2.filters import do_truncate, do_striptags
from jinja2.utils import escape

from udata.i18n import _

Expand Down Expand Up @@ -64,14 +65,7 @@ def __init__(self, app, parser, renderer):
def __call__(self, stream, source_tooltip=False):
if not stream:
return ''
# Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
# By default, only keeps `bleach.ALLOWED_TAGS`.
stream = bleach.clean(
stream,
tags=current_app.config['MD_ALLOWED_TAGS'],
attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
styles=current_app.config['MD_ALLOWED_STYLES'],
strip_comments=False)
stream = bleach_clean(stream)
# Turn markdown to HTML.
ast = self.parser().parse(stream)
html = self.renderer.render(ast)
Expand All @@ -87,6 +81,19 @@ def __call__(self, stream, source_tooltip=False):
return Markup(html)


def bleach_clean(stream):
"""
Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
By default, only keeps `bleach.ALLOWED_TAGS`.
"""
return bleach.clean(
stream,
tags=current_app.config['MD_ALLOWED_TAGS'],
attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
styles=current_app.config['MD_ALLOWED_STYLES'],
strip_comments=False)


def mdstrip(value, length=None, end='…'):
'''
Truncate and strip tags from a markdown source
Expand All @@ -100,6 +107,7 @@ def mdstrip(value, length=None, end='…'):
value = value.split(EXCERPT_TOKEN, 1)[0]
rendered = md(value)
text = do_striptags(rendered)
text = bleach_clean(text)
if length > 0:
text = do_truncate(None, text, length, end=end, leeway=2)
return text
Expand Down
11 changes: 10 additions & 1 deletion udata/tests/frontend/test_dataset_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_json_ld(self):
self.assertEquals(json_ld['@context'], 'http://schema.org')
self.assertEquals(json_ld['@type'], 'Dataset')
self.assertEquals(json_ld['@id'], str(dataset.id))
self.assertEquals(json_ld['description'], 'a&éèëù$£')
self.assertEquals(json_ld['description'], 'a&éèëù$£')
self.assertEquals(json_ld['alternateName'], dataset.slug)
self.assertEquals(json_ld['dateCreated'][:16],
dataset.created_at.isoformat()[:16])
Expand Down Expand Up @@ -179,6 +179,15 @@ def test_json_ld(self):
self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence')
self.assertEquals(json_ld['author']['@type'], 'Person')

def test_json_ld_sanitize(self):
'''Json-ld should be sanitized'''
dataset = DatasetFactory(description='an <script>evil()</script>')
url = url_for('datasets.show', dataset=dataset)
response = self.get(url)
json_ld = self.get_json_ld(response)
self.assertEquals(json_ld['description'],
'an &lt;script&gt;evil()&lt;/script&gt;')

def test_raise_404_if_private(self):
'''It should raise a 404 if the dataset is private'''
dataset = DatasetFactory(private=True)
Expand Down

0 comments on commit f53ce8f

Please sign in to comment.