From bc20977ed096823735cd9e6e4238a33521585f3e Mon Sep 17 00:00:00 2001
From: Maurits van Rees <maurits@vanrees.org>
Date: Thu, 6 May 2021 17:29:19 +0200
Subject: [PATCH 1/4] Refactored test_transforms to not have so much duplicate
 code.

And prepared this file for having another TestCase class.
---
 plone/app/blocks/tests/test_transforms.py | 57 +++++++++++------------
 1 file changed, 26 insertions(+), 31 deletions(-)
diff --git a/plone/app/blocks/tests/test_transforms.py b/plone/app/blocks/tests/test_transforms.py
index a3a56bf9..467e94fa 100644
--- a/plone/app/blocks/tests/test_transforms.py
+++ b/plone/app/blocks/tests/test_transforms.py
@@ -10,41 +10,49 @@
 
 
 @implementer(IBlocksTransformEnabled)
-class TestTransformedView(object):
+class TransformedView(object):
 
     def __init__(self, ret_body):
-        self.__call__ = lambda b=ret_body: b
+        self.body = ret_body
 
+    def __call__(self):
+        return self.body
 
-class TestTransforms(unittest.TestCase):
 
+class BaseTestCase(unittest.TestCase):
     layer = BLOCKS_INTEGRATION_TESTING
 
+    def prepare_request(self, body=None):
+        if body is None:
+            body = """\
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Empty</title></head>
+<body></body>
+</html>"""
+        request = self.layer['request']
+        request.set('PUBLISHED', TransformedView(body))
+        request.response.setBase(request.getURL())
+        request.response.setHeader('content-type', 'text/html')
+        request.response.setBody(body)
+        alsoProvides(request, IBlocksLayer)
+        return request
+
+
+class TestTransforms(BaseTestCase):
+
     def test_transforms_with_crlf(self):
         """Test fix for issue where layouts with CR[+LF] line-endings are
         somehow turned into having &#13; line-endings and getting their heads
         being dropped
         """
-
-        @implementer(IBlocksTransformEnabled)
-        class TransformedView(object):
-
-            def __init__(self, ret_body):
-                self.__call__ = lambda b=ret_body: b
-
         body = """\
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">&#13;
 <head></head>&#13;
 <body></body>&#13;
 </html>"""
-        request = self.layer['request']
-        request.set('PUBLISHED', TransformedView(body))
-        request.response.setBase(request.getURL())
-        request.response.setHeader('content-type', 'text/html')
-        request.response.setBody(body)
-
-        alsoProvides(request, IBlocksLayer)
+        request = self.prepare_request(body)
         result = applyTransform(request)
         self.assertIn('<head>', ''.join(str(result)))
 
@@ -52,25 +60,12 @@ def test_transforms_with_cdata(self):
         """Test fix for issue where layouts with inline js got rendered with
         quoted (and therefore broken) <![CDATA[...]]> block
         """
-
-        @implementer(IBlocksTransformEnabled)
-        class TransformedView(object):
-
-            def __init__(self, ret_body):
-                self.__call__ = lambda b=ret_body: b
-
         body = """\
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head><script type="text/javascript"><![CDATA[]]></script></head>
 <body></body>
 </html>"""
-        request = self.layer['request']
-        request.set('PUBLISHED', TransformedView(body))
-        request.response.setBase(request.getURL())
-        request.response.setHeader('content-type', 'text/html')
-        request.response.setBody(body)
-
-        alsoProvides(request, IBlocksLayer)
+        request = self.prepare_request(body)
         result = applyTransform(request)
         self.assertIn('<![CDATA[]]>', ''.join(str(result)))

From 35e4ed2a5da58389e21adb18156b55d935e10db5 Mon Sep 17 00:00:00 2001
From: Maurits van Rees <maurits@vanrees.org>
Date: Thu, 6 May 2021 17:50:57 +0200
Subject: [PATCH 2/4] Nuke trailing white space in transform.py and fix
 indentation.

---
 plone/app/blocks/transform.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/plone/app/blocks/transform.py b/plone/app/blocks/transform.py
index e8fb14fe..094c880f 100644
--- a/plone/app/blocks/transform.py
+++ b/plone/app/blocks/transform.py
@@ -98,7 +98,7 @@ def transformIterable(self, result, encoding):
                     iterable, pretty_print=self.pretty_print, encoding=encoding)
                 # Fix XHTML layouts with where etree.tostring breaks <![CDATA[
                 if any(['<![CDATA[' in item for item in iterable]):
-                    result.serializer = html.tostring                
+                    result.serializer = html.tostring
             else:
                 iterable = [
                     re.sub('&#13;'.encode('utf-8'), '\n'.encode('utf-8'), re.sub('&#13;\n'.encode('utf-8'), '\n'.encode('utf-8'), item))  # noqa
@@ -108,12 +108,13 @@ def transformIterable(self, result, encoding):
                 # Fix XHTML layouts with where etree.tostring breaks <![CDATA[
                 if any([b'<![CDATA[' in item for item in iterable]):
                     result.serializer = html.tostring
-            
+
             self.request['plone.app.blocks.enabled'] = True
             return result
         except (AttributeError, TypeError, etree.ParseError) as e:
-             logger.error(e)
-             return None
+            logger.error(e)
+            return None
+
 
 @implementer(ITransform)
 class MergePanels(object):

From e47b9bbccc8b2e0c76a731e8a24ab9c5d88551a9 Mon Sep 17 00:00:00 2001
From: Maurits van Rees <maurits@vanrees.org>
Date: Thu, 6 May 2021 18:14:57 +0200
Subject: [PATCH 3/4] Added tests for the ParseXML class.

Especially we pass both bytes and unicode to the transformIterable method.
---
 plone/app/blocks/tests/test_transforms.py | 78 +++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/plone/app/blocks/tests/test_transforms.py b/plone/app/blocks/tests/test_transforms.py
index 467e94fa..4102f312 100644
--- a/plone/app/blocks/tests/test_transforms.py
+++ b/plone/app/blocks/tests/test_transforms.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 from plone.app.blocks.interfaces import IBlocksLayer
 from plone.app.blocks.interfaces import IBlocksTransformEnabled
+from plone.app.blocks.transform import ParseXML
 from plone.app.blocks.testing import BLOCKS_INTEGRATION_TESTING
 from plone.transformchain.zpublisher import applyTransform
 from zope.interface import alsoProvides
@@ -69,3 +70,80 @@ def test_transforms_with_cdata(self):
         request = self.prepare_request(body)
         result = applyTransform(request)
         self.assertIn('<![CDATA[]]>', ''.join(str(result)))
+
+
+class TestParseXML(BaseTestCase):
+    """Test our XMLParser transform with only bytes.
+
+    Things can go wrong when there is more than one item in the iterable,
+    especially when one item is bytes and the other is text.
+    Or at least that is a way that I can more or less reproduce some problems.
+    See https://github.com/plone/plone.app.mosaic/issues/480
+
+    I test various combinations in this and the next test methods.
+    """
+
+    def test_transformBytes_method(self):
+        one = b"<p>one</p>"
+        request = self.prepare_request()
+        parser = ParseXML(request.get("PUBLISHED"), request)
+        result = parser.transformBytes(one, encoding="utf-8")
+        html = result.serialize()
+        self.assertIn(one, html)
+
+    def test_transformUnicode_method(self):
+        one = b"<p>one</p>"
+        request = self.prepare_request()
+        parser = ParseXML(request.get("PUBLISHED"), request)
+        result = parser.transformBytes(one.decode("utf-8"), encoding="utf-8")
+        html = result.serialize()
+        self.assertIn(one, html)
+
+    # The rest of the tests use the transformIterable method.
+    # We use a helper method 'transform' to make this easier.
+
+    def transform(self, iterable):
+        request = self.prepare_request()
+        parser = ParseXML(request.get("PUBLISHED"), request)
+        result = parser.transformIterable(iterable, encoding="utf-8")
+        return result.serialize()
+
+    def test_transform_one_byte(self):
+        one = b"<p>one</p>"
+        html = self.transform([one])
+        self.assertIn(one, html)
+
+    def test_transform_one_unicode(self):
+        one = b"<p>one</p>"
+        # Note: decoding creates a unicode (string on PY3).
+        html = self.transform([one.decode("utf-8")])
+        # Note: the html result is always bytes, so we must compare with bytes.
+        self.assertIn(one, html)
+
+    def test_transform_two_bytes(self):
+        one = b"<p>one</p>"
+        two = b"<p>two</p>"
+        html = self.transform([one, two])
+        self.assertIn(one, html)
+        self.assertIn(two, html)
+
+    def test_transform_two_unicodes(self):
+        one = b"<p>one</p>"
+        two = b"<p>two</p>"
+        html = self.transform([one.decode("utf-8"), two.decode("utf-8")])
+        self.assertIn(one, html)
+        self.assertIn(two, html)
+
+    def test_transform_byte_unicode(self):
+        one = b"<p>one</p>"
+        two = b"<p>two</p>"
+        html = self.transform([one, two.decode("utf-8")])
+        self.assertIn(one, html)
+        self.assertIn(two, html)
+
+    def test_transform_unicode_byte(self):
+        one = b"<p>one</p>"
+        two = b"<p>two</p>"
+        html = self.transform([one.decode("utf-8"), two])
+        self.assertIn(one, html)
+        self.assertIn(two, html)

From 82b6a6d29235154a5435595bc6597c9ea77cd865 Mon Sep 17 00:00:00 2001
From: Maurits van Rees <maurits@vanrees.org>
Date: Thu, 6 May 2021 18:17:59 +0200
Subject: [PATCH 4/4] Fixed parsing of unicodes in ParseXML.transformIterable.

Especially fix parsing combinations of bytes and unicode.
I think this improves the fix from https://github.com/plone/plone.app.blocks/pull/85
which fixed https://github.com/plone/plone.app.mosaic/issues/480
---
 plone/app/blocks/transform.py | 39 ++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/plone/app/blocks/transform.py b/plone/app/blocks/transform.py
index 094c880f..178e1bed 100644
--- a/plone/app/blocks/transform.py
+++ b/plone/app/blocks/transform.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from Products.CMFPlone.utils import safe_unicode
 from lxml import etree
 from lxml import html
 from plone.app.blocks import panel
@@ -15,6 +14,13 @@
 import re
 import logging
 
+try:
+    # Plone 5.2+
+    from Products.CMFPlone.utils import safe_bytes
+except ImportError:
+    # BBB for Plone 5.1 and lower
+    from Products.CMFPlone.utils import safe_encode as safe_bytes
+
 
 logger = logging.getLogger(__name__)
 
@@ -66,7 +72,6 @@ def __init__(self, published, request):
         self.request = request
 
     def transformBytes(self, result, encoding):
-        result = safe_unicode(result, encoding)
         return self.transformIterable([result], encoding)
 
     def transformUnicode(self, result, encoding):
@@ -89,25 +94,17 @@ def transformIterable(self, result, encoding):
         try:
             # Fix layouts with CR[+LF] line endings not to lose their heads
             # (this has been seen with downloaded themes with CR[+LF] endings)
-
-            if six.PY2:
-                iterable = [
-                    re.sub('&#13;', '\n', re.sub('&#13;\n', '\n', safe_unicode(item)))  # noqa
-                    for item in result if item]
-                result = getHTMLSerializer(
-                    iterable, pretty_print=self.pretty_print, encoding=encoding)
-                # Fix XHTML layouts with where etree.tostring breaks <![CDATA[
-                if any(['<![CDATA[' in item for item in iterable]):
-                    result.serializer = html.tostring
-            else:
-                iterable = [
-                    re.sub('&#13;'.encode('utf-8'), '\n'.encode('utf-8'), re.sub('&#13;\n'.encode('utf-8'), '\n'.encode('utf-8'), item))  # noqa
-                    for item in result if item]
-                result = getHTMLSerializer(
-                    iterable, pretty_print=self.pretty_print, encoding=encoding)
-                # Fix XHTML layouts with where etree.tostring breaks <![CDATA[
-                if any([b'<![CDATA[' in item for item in iterable]):
-                    result.serializer = html.tostring
+            # The html serializer much prefers only bytes, no unicode/text,
+            # and it return a serializer that returns bytes.
+            # So we start with ensuring all items in the iterable are bytes.
+            iterable = [
+                re.sub(b'&#13;', b'\n', re.sub(b'&#13;\n', b'\n', safe_bytes(item)))
+                for item in result if item]
+            result = getHTMLSerializer(
+                iterable, pretty_print=self.pretty_print, encoding=encoding)
+            # Fix XHTML layouts with where etree.tostring breaks <![CDATA[
+            if any([b'<![CDATA[' in item for item in iterable]):
+                result.serializer = html.tostring
 
             self.request['plone.app.blocks.enabled'] = True
             return result