diff --git a/CHANGES.rst b/CHANGES.rst index 545e8140f..ccbdf7186 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,7 +14,11 @@ New features: Bug fixes: -- *add item here* +- Use original raw text and mimetype when indexing rich text. + This avoids a double transform (raw source to output mimetype to plain text). + Includes a reindex of the SearchableText index for Collections, Documents and News Items. + `Issue 2066 `_. + [maurits] 1.1.3 (2017-07-20) diff --git a/plone/app/contenttypes/indexers.py b/plone/app/contenttypes/indexers.py index d77900dd0..d78b5f55f 100644 --- a/plone/app/contenttypes/indexers.py +++ b/plone/app/contenttypes/indexers.py @@ -42,10 +42,13 @@ def SearchableText(obj): textvalue = richtext.text if IRichTextValue.providedBy(textvalue): transforms = getToolByName(obj, 'portal_transforms') + # Before you think about switching raw/output + # or mimeType/outputMimeType, first read + # https://github.com/plone/Products.CMFPlone/issues/2066 text = transforms.convertTo( 'text/plain', - safe_unicode(textvalue.output).encode('utf8'), - mimetype=textvalue.outputMimeType, + safe_unicode(textvalue.raw).encode('utf-8'), + mimetype=textvalue.mimeType, ).getData().strip() subject = u' '.join( diff --git a/plone/app/contenttypes/profiles/default/metadata.xml b/plone/app/contenttypes/profiles/default/metadata.xml index 920628e82..fb0941388 100644 --- a/plone/app/contenttypes/profiles/default/metadata.xml +++ b/plone/app/contenttypes/profiles/default/metadata.xml @@ -1,5 +1,5 @@ - 1105 + 1106 profile-plone.app.dexterity:default profile-plone.app.event:default diff --git a/plone/app/contenttypes/tests/test_indexes.py b/plone/app/contenttypes/tests/test_indexes.py index 5e8935a99..76889ba8d 100644 --- a/plone/app/contenttypes/tests/test_indexes.py +++ b/plone/app/contenttypes/tests/test_indexes.py @@ -224,6 +224,30 @@ def test_html_stripped_searchable_text_index(self): self.assertEqual(index_data['SearchableText'].count('p'), 0) self.assertEqual(index_data['SearchableText'].count('b'), 0) + def test_raw_text_searchable_text_index(self): + """Ensure that raw text is used, instead of output. + + It makes no sense to transform raw text to the output mimetype, + and then transform it again to plain text. + Note that this does mean that javascript may get in the + searchable text, but you will usually have a hard time setting it. + """ + self.document.text = RichTextValue( + u"""""", + mimeType='text/html', + outputMimeType='text/x-html-safe' + ) + self.document.reindexObject() + brains = self.catalog.searchResults(dict( + SearchableText=u'Lorem ipsum', + )) + self.assertEqual(len(brains), 1) + rid = brains[0].getRID() + index_data = self.catalog.getIndexDataForRID(rid) + self.assertEqual(index_data['SearchableText'].count('script'), 0) + self.assertEqual(index_data['SearchableText'].count('text'), 0) + def test_file_fulltext_in_searchable_text_index_string(self): from plone.namedfile.file import NamedBlobFile data = ("Lorem ipsum. Köln