Recursively cleanup multipart payloads for encoding.

Should fix Pylons/pyramid_mailer#16.
repoze · May 3, 2012 · fa8d66d · fa8d66d
1 parent 4821855
commit fa8d66d
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 3 deletions.
diff --git a/repoze/sendmail/encoding.py b/repoze/sendmail/encoding.py
@@ -21,10 +21,10 @@
                  'content-disposition')
 
 
-def encode_message(message,
+def cleanup_message(message,
                    addr_headers=ADDR_HEADERS, param_headers=PARAM_HEADERS):
     """
-    Encode a `Message` handling headers and payloads.
+    Cleanup a `Message` handling header and payload charsets.
 
     Headers are handled in the most sane way possible.  Address names
     are left in `ascii` if possible or encoded to `latin_1` or `utf-8`
@@ -36,7 +36,8 @@ def encode_message(message,
     encoding.  Finally, all other header are left in `ascii` if
     possible or encoded to `latin_1` or `utf-8` as a whole.
 
-    The return is a byte string of the whole message.
+    The message is modified in place and is also returned in such a
+    state that it can be safely encoded to ascii.
     """
     for key, value in message.items():
         if key.lower() in addr_headers:
@@ -72,7 +73,31 @@ def encode_message(message,
         if PY_2:
             payload = encoded
         message.set_payload(payload, charset=best)
+    elif isinstance(payload, list):
+        for part in payload:
+            cleanup_message(part)
+
+    return message
+
+
+def encode_message(message,
+                   addr_headers=ADDR_HEADERS, param_headers=PARAM_HEADERS):
+    """
+    Encode a `Message` handling headers and payloads.
+
+    Headers are handled in the most sane way possible.  Address names
+    are left in `ascii` if possible or encoded to `latin_1` or `utf-8`
+    and finally encoded according to RFC 2047 without encoding the
+    address, something the `email` stdlib package doesn't do.
+    Parameterized headers such as `filename` in the
+    `Content-Disposition` header, have their values encoded properly
+    while leaving the rest of the header to be handled without
+    encoding.  Finally, all other header are left in `ascii` if
+    possible or encoded to `latin_1` or `utf-8` as a whole.
 
+    The return is a byte string of the whole message.
+    """
+    cleanup_message(message)
     return message.as_string().encode('ascii')
 
 

diff --git a/repoze/sendmail/tests/test_encoding.py b/repoze/sendmail/tests/test_encoding.py
@@ -207,3 +207,29 @@ def test_binary_body(self):
         encoded = self._callFUT(message)
 
         self.assertTrue(encodestring(body) in encoded)
+
+    def test_encoding_multipart(self):
+        from email.mime import multipart
+        from email.mime import nonmultipart
+        from repoze.sendmail._compat import encodestring
+        from repoze.sendmail._compat import b
+
+        message = multipart.MIMEMultipart('alternative')
+
+        utf_8_encoded = b('mo \xe2\x82\xac')
+        utf_8 = utf_8_encoded.decode('utf_8')
+
+        plain_string = utf_8
+        plain_part = nonmultipart.MIMENonMultipart('plain', 'plain')
+        plain_part.set_payload(plain_string)
+        message.attach(plain_part)
+
+        html_string = '<p>'+utf_8+'</p>'
+        html_part = nonmultipart.MIMENonMultipart('text', 'html')
+        html_part.set_payload(html_string)
+        message.attach(html_part)
+
+        encoded = self._callFUT(message)
+
+        self.assertTrue(encodestring(plain_string.encode('utf_8')) in encoded)
+        self.assertTrue(encodestring(html_string.encode('utf_8')) in encoded)