opendatateam · abulte · Jan 8, 2018 · Jan 8, 2018 · Jan 8, 2018 · Jan 8, 2018
diff --git a/udata/frontend/markdown.py b/udata/frontend/markdown.py
@@ -8,6 +8,7 @@
 from flask import current_app, Markup
 from werkzeug.local import LocalProxy
 from jinja2.filters import do_truncate, do_striptags
+from jinja2.utils import escape
 
 from udata.i18n import _
 
@@ -64,14 +65,7 @@ def __init__(self, app, parser, renderer):
     def __call__(self, stream, source_tooltip=False):
         if not stream:
             return ''
-        # Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
-        # By default, only keeps `bleach.ALLOWED_TAGS`.
-        stream = bleach.clean(
-            stream,
-            tags=current_app.config['MD_ALLOWED_TAGS'],
-            attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
-            styles=current_app.config['MD_ALLOWED_STYLES'],
-            strip_comments=False)
+        stream = bleach_clean(stream)
         # Turn markdown to HTML.
         ast = self.parser().parse(stream)
         html = self.renderer.render(ast)
@@ -87,6 +81,19 @@ def __call__(self, stream, source_tooltip=False):
         return Markup(html)
 
 
+def bleach_clean(stream):
+    """
+    Sanitize malicious attempts but keep the `EXCERPT_TOKEN`.
+    By default, only keeps `bleach.ALLOWED_TAGS`.
+    """
+    return bleach.clean(
+        stream,
+        tags=current_app.config['MD_ALLOWED_TAGS'],
+        attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
+        styles=current_app.config['MD_ALLOWED_STYLES'],
+        strip_comments=False)
+
+
 def mdstrip(value, length=None, end='…'):
     '''
     Truncate and strip tags from a markdown source
@@ -100,6 +107,7 @@ def mdstrip(value, length=None, end='…'):
         value = value.split(EXCERPT_TOKEN, 1)[0]
     rendered = md(value)
     text = do_striptags(rendered)
+    text = bleach_clean(text)
     if length > 0:
         text = do_truncate(None, text, length, end=end, leeway=2)
     return text

diff --git a/udata/tests/frontend/test_dataset_frontend.py b/udata/tests/frontend/test_dataset_frontend.py
@@ -103,7 +103,7 @@ def test_json_ld(self):
         self.assertEquals(json_ld['@context'], 'http://schema.org')
         self.assertEquals(json_ld['@type'], 'Dataset')
         self.assertEquals(json_ld['@id'], str(dataset.id))
-        self.assertEquals(json_ld['description'], 'a&éèëù$£')
+        self.assertEquals(json_ld['description'], 'a&amp;éèëù$£')
         self.assertEquals(json_ld['alternateName'], dataset.slug)
         self.assertEquals(json_ld['dateCreated'][:16],
                           dataset.created_at.isoformat()[:16])
@@ -179,6 +179,15 @@ def test_json_ld(self):
         self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence')
         self.assertEquals(json_ld['author']['@type'], 'Person')
 
+    def test_json_ld_sanitize(self):
+        '''Json-ld should be sanitized'''
+        dataset = DatasetFactory(description='an <script>evil()</script>')
+        url = url_for('datasets.show', dataset=dataset)
+        response = self.get(url)
+        json_ld = self.get_json_ld(response)
+        self.assertEquals(json_ld['description'],
+                          'an &lt;script&gt;evil()&lt;/script&gt;')
+
     def test_raise_404_if_private(self):
         '''It should raise a 404 if the dataset is private'''
         dataset = DatasetFactory(private=True)