Skip to content

Commit

Permalink
Recursively cleanup multipart payloads for encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
rpatterson committed May 3, 2012
1 parent 4821855 commit fa8d66d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 3 deletions.
31 changes: 28 additions & 3 deletions repoze/sendmail/encoding.py
Expand Up @@ -21,10 +21,10 @@
'content-disposition')


def encode_message(message,
def cleanup_message(message,
addr_headers=ADDR_HEADERS, param_headers=PARAM_HEADERS):
"""
Encode a `Message` handling headers and payloads.
Cleanup a `Message` handling header and payload charsets.
Headers are handled in the most sane way possible. Address names
are left in `ascii` if possible or encoded to `latin_1` or `utf-8`
Expand All @@ -36,7 +36,8 @@ def encode_message(message,
encoding. Finally, all other header are left in `ascii` if
possible or encoded to `latin_1` or `utf-8` as a whole.
The return is a byte string of the whole message.
The message is modified in place and is also returned in such a
state that it can be safely encoded to ascii.
"""
for key, value in message.items():
if key.lower() in addr_headers:
Expand Down Expand Up @@ -72,7 +73,31 @@ def encode_message(message,
if PY_2:
payload = encoded
message.set_payload(payload, charset=best)
elif isinstance(payload, list):
for part in payload:
cleanup_message(part)

return message


def encode_message(message,
addr_headers=ADDR_HEADERS, param_headers=PARAM_HEADERS):
"""
Encode a `Message` handling headers and payloads.
Headers are handled in the most sane way possible. Address names
are left in `ascii` if possible or encoded to `latin_1` or `utf-8`
and finally encoded according to RFC 2047 without encoding the
address, something the `email` stdlib package doesn't do.
Parameterized headers such as `filename` in the
`Content-Disposition` header, have their values encoded properly
while leaving the rest of the header to be handled without
encoding. Finally, all other header are left in `ascii` if
possible or encoded to `latin_1` or `utf-8` as a whole.
The return is a byte string of the whole message.
"""
cleanup_message(message)
return message.as_string().encode('ascii')


Expand Down
26 changes: 26 additions & 0 deletions repoze/sendmail/tests/test_encoding.py
Expand Up @@ -207,3 +207,29 @@ def test_binary_body(self):
encoded = self._callFUT(message)

self.assertTrue(encodestring(body) in encoded)

def test_encoding_multipart(self):
from email.mime import multipart
from email.mime import nonmultipart
from repoze.sendmail._compat import encodestring
from repoze.sendmail._compat import b

message = multipart.MIMEMultipart('alternative')

utf_8_encoded = b('mo \xe2\x82\xac')
utf_8 = utf_8_encoded.decode('utf_8')

plain_string = utf_8
plain_part = nonmultipart.MIMENonMultipart('plain', 'plain')
plain_part.set_payload(plain_string)
message.attach(plain_part)

html_string = '<p>'+utf_8+'</p>'
html_part = nonmultipart.MIMENonMultipart('text', 'html')
html_part.set_payload(html_string)
message.attach(html_part)

encoded = self._callFUT(message)

self.assertTrue(encodestring(plain_string.encode('utf_8')) in encoded)
self.assertTrue(encodestring(html_string.encode('utf_8')) in encoded)

0 comments on commit fa8d66d

Please sign in to comment.