Skip to content

Commit

Permalink
pythongh-76511: Fix email.Message.as_string() for non-ASCII message w…
Browse files Browse the repository at this point in the history
…ith ASCII charset (pythonGH-116125)
  • Loading branch information
serhiy-storchaka committed Mar 5, 2024
1 parent df59401 commit f97f25e
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Lib/email/generator.py
Expand Up @@ -243,7 +243,7 @@ def _handle_text(self, msg):
# existing message.
msg = deepcopy(msg)
del msg['content-transfer-encoding']
msg.set_payload(payload, charset)
msg.set_payload(msg._payload, charset)
payload = msg.get_payload()
self._munge_cte = (msg['content-transfer-encoding'],
msg['content-type'])
Expand Down
2 changes: 1 addition & 1 deletion Lib/email/message.py
Expand Up @@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None):
return
if not isinstance(charset, Charset):
charset = Charset(charset)
payload = payload.encode(charset.output_charset)
payload = payload.encode(charset.output_charset, 'surrogateescape')
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_email/test_email.py
Expand Up @@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self):
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)

def test_nonascii_as_string_with_ascii_charset(self):
m = textwrap.dedent("""\
MIME-Version: 1.0
Content-type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 8bit
Test if non-ascii messages with no Content-Transfer-Encoding set
can be as_string'd:
Föö bär
""")
source = m.encode('iso-8859-1')
expected = source.decode('ascii', 'replace')
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)

def test_nonascii_as_string_without_content_type_and_cte(self):
m = textwrap.dedent("""\
MIME-Version: 1.0
Expand Down
@@ -0,0 +1,4 @@
Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
a message that claims to be in the ascii character set actually has non-ascii
characters. Non-ascii characters are now replaced with the U+FFFD replacement
character, like in the ``replace`` error handler.

0 comments on commit f97f25e

Please sign in to comment.