diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ae670c2353c858..37310de13d7424 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -403,7 +403,11 @@ class BytesGenerator(Generator): """ def write(self, s): - self._fp.write(s.encode('ascii', 'surrogateescape')) + try: + s = s.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + s = s.encode('ascii', 'replace') + self._fp.write(s) def _new_buffer(self): return BytesIO() diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index 89e7edeb63a892..d3ddda371d6d97 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -271,6 +271,21 @@ def test_cte_type_7bit_transforms_8bit_cte(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) + def test_flatten_charset_utf8_with_nonascii(self): + source = textwrap.dedent("""\ + Subject: Defective email + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + + I think thatâ**s the way to go. + """) + expected = source.encode('ascii', 'replace') + msg = message_from_string(source) + s = io.BytesIO() + g = BytesGenerator(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + def test_smtputf8_policy(self): msg = EmailMessage() msg['From'] = "Páolo " diff --git a/Misc/NEWS.d/next/Library/2020-01-18-23-07-16.bpo-39384.jI9ged.rst b/Misc/NEWS.d/next/Library/2020-01-18-23-07-16.bpo-39384.jI9ged.rst new file mode 100644 index 00000000000000..d7c609ba597bb5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-01-18-23-07-16.bpo-39384.jI9ged.rst @@ -0,0 +1,2 @@ +Fixed an issue in email.generator.BytesGenerator.flatten() which would throw +UnicodeEncodeError with a message with a non-ascii body .