From b9f52881832c74c004f4a162cda9623d07b69e10 Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher Date: Mon, 27 May 2019 19:19:04 -0700 Subject: [PATCH 1/5] bpo-36520: reset the encoded word offset when starting a new line during an email header folding operation --- Lib/email/_header_value_parser.py | 1 + Lib/test/test_email/test_message.py | 74 +++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 649f1539fa02ab..18bab90165489e 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2661,6 +2661,7 @@ def _refold_parse_tree(parse_tree, *, policy): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): lines.append(newline + tstr) + last_ew = None continue if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index f3a57df9e9cfd0..cf752ffe264a91 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -784,6 +784,80 @@ def test_str_defaults_to_utf8(self): m['Subject'] = 'unicöde' self.assertEqual(str(m), 'Subject: unicöde\n\n') + def test_folding_with_utf8_encoding_1(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = 'Hello Wörld! Hello Wörld! '\ + 'Hello Wörld! Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'\ + b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'\ + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + + def test_folding_with_utf8_encoding_2(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = 'Hello Wörld! Hello Wörld! '\ + 'Hello Wörlds123! Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'\ + b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'\ + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_3(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! '\ + 'Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'\ + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'\ + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_4(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello'\ + ' Wörld!Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'\ + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'\ + b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_5(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = '123456789 123456789 123456789 123456789 123456789'\ + ' 123456789 123456789 Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: 123456789 123456789 123456789 123456789'\ + b' 123456789 123456789 123456789\n'\ + b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_6(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!'\ + ' 123456789 123456789 123456789 123456789 123456789'\ + ' 123456789' + self.assertEqual(bytes(m), \ + b'Subject: 123456789 123456789 123456789 123456789'\ + b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '\ + b'123456789 123456789 123456789 123456789 '\ + b'123456789\n\n') + + def test_folding_with_utf8_encoding_7(self): + # issue #36520 + m = EmailMessage() + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ + '123456789-123456789 123456789 Hello Wörld! 123456789'\ + ' 123456789' + self.assertEqual(bytes(m), \ + b'Subject: 123456789 123456789 Hello =?utf-8?q?'\ + b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'\ + b' 123456789-123456789 123456789 Hello '\ + b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n') class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two From d989f75a2b53cfe1b96c6f118da848d757e91ddf Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" Date: Tue, 28 May 2019 02:37:01 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst diff --git a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst new file mode 100644 index 00000000000000..28eee259682295 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst @@ -0,0 +1 @@ +Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. \ No newline at end of file From d4e969ba8a0c156e84ec2a297d7d98b3310f4cbb Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher Date: Wed, 29 May 2019 16:46:31 -0700 Subject: [PATCH 3/5] bpo-36520: add an additional test case, and provide descriptive comments for the test_folding_with_utf8_encoding_* tests --- Lib/test/test_email/test_message.py | 57 +++++++++++++++++++ .../2019-05-28-02-37-00.bpo-36520.W4tday.rst | 2 +- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index cf752ffe264a91..b8d35bc827eac8 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -786,6 +786,12 @@ def test_str_defaults_to_utf8(self): def test_folding_with_utf8_encoding_1(self): # issue #36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached within an ASCII + # word. + m = EmailMessage() m['Subject'] = 'Hello Wörld! Hello Wörld! '\ 'Hello Wörld! Hello Wörld!Hello Wörld!' @@ -797,6 +803,12 @@ def test_folding_with_utf8_encoding_1(self): def test_folding_with_utf8_encoding_2(self): # issue #36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached at the end of an + # encoded word. + m = EmailMessage() m['Subject'] = 'Hello Wörld! Hello Wörld! '\ 'Hello Wörlds123! Hello Wörld!Hello Wörld!' @@ -807,6 +819,12 @@ def test_folding_with_utf8_encoding_2(self): def test_folding_with_utf8_encoding_3(self): # issue #36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached at the end of the + # first word. + m = EmailMessage() m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! '\ 'Hello Wörld!Hello Wörld!' @@ -817,6 +835,12 @@ def test_folding_with_utf8_encoding_3(self): def test_folding_with_utf8_encoding_4(self): # issue #36520 + # + # Fold a line that contains UTF-8 words before + # and after the fold point, where the first + # word is UTF-8 and the fold point is within + # the word. + m = EmailMessage() m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello'\ ' Wörld!Hello Wörld!' @@ -827,6 +851,10 @@ def test_folding_with_utf8_encoding_4(self): def test_folding_with_utf8_encoding_5(self): # issue #36520 + # + # Fold a line that contains a UTF-8 word after + # the fold point. + m = EmailMessage() m['Subject'] = '123456789 123456789 123456789 123456789 123456789'\ ' 123456789 123456789 Hello Wörld!' @@ -837,6 +865,10 @@ def test_folding_with_utf8_encoding_5(self): def test_folding_with_utf8_encoding_6(self): # issue #36520 + # + # Fold a line that contains a UTF-8 word before + # the fold point and ASCII words after + m = EmailMessage() m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!'\ ' 123456789 123456789 123456789 123456789 123456789'\ @@ -849,6 +881,11 @@ def test_folding_with_utf8_encoding_6(self): def test_folding_with_utf8_encoding_7(self): # issue #36520 + # + # Fold a line twice that contains UTF-8 words before + # and after the first fold point, and ASCII words + # after the second fold point. + m = EmailMessage() m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ '123456789-123456789 123456789 Hello Wörld! 123456789'\ @@ -859,6 +896,26 @@ def test_folding_with_utf8_encoding_7(self): b' 123456789-123456789 123456789 Hello '\ b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n') + def test_folding_with_utf8_encoding_8(self): + # issue #36520 + # + # Fold a line twice that contains UTF-8 words before + # the first fold point, and ASCII words after the + # first fold point, and UTF-8 words after the second + # fold point. + + m = EmailMessage() + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ + '123456789 123456789 123456789 123456789 123456789 '\ + '123456789-123456789 123456789 Hello Wörld! 123456789'\ + ' 123456789' + self.assertEqual(bytes(m), \ + b'Subject: 123456789 123456789 Hello '\ + b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '\ + b'123456789 123456789 123456789 123456789 '\ + b'123456789-123456789\n 123456789 Hello '\ + b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') + class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two # classes are almost identical. But what if they drift apart? So we do diff --git a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst index 28eee259682295..8171bfe9e2df12 100644 --- a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst +++ b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst @@ -1 +1 @@ -Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. \ No newline at end of file +Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher. From a34251995d2d80e8c164a25d5cb8eca95304c977 Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher Date: Wed, 29 May 2019 17:17:49 -0700 Subject: [PATCH 4/5] bpo-36520: fix whitespace issue --- Lib/test/test_email/test_message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index b8d35bc827eac8..b2d5541aa5ff8f 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -903,7 +903,7 @@ def test_folding_with_utf8_encoding_8(self): # the first fold point, and ASCII words after the # first fold point, and UTF-8 words after the second # fold point. - + m = EmailMessage() m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ '123456789 123456789 123456789 123456789 123456789 '\ From 8133eeb2cb7a7b3822cb5d9585a08a1b7a77ca1a Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher Date: Tue, 4 Jun 2019 14:57:49 -0700 Subject: [PATCH 5/5] bpo-36520: changes per reviewer request -- remove extraneous backslashes; add whitespace between terminating quotes and line-continuation backslashes; use "bpo-" instead of "issue #" in comments --- Lib/test/test_email/test_message.py | 94 ++++++++++++++--------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index b2d5541aa5ff8f..5dc46e1b812c5d 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -785,7 +785,7 @@ def test_str_defaults_to_utf8(self): self.assertEqual(str(m), 'Subject: unicöde\n\n') def test_folding_with_utf8_encoding_1(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains UTF-8 words before # and after the whitespace fold point, where the @@ -793,16 +793,16 @@ def test_folding_with_utf8_encoding_1(self): # word. m = EmailMessage() - m['Subject'] = 'Hello Wörld! Hello Wörld! '\ + m['Subject'] = 'Hello Wörld! Hello Wörld! ' \ 'Hello Wörld! Hello Wörld!Hello Wörld!' - self.assertEqual(bytes(m), \ - b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'\ - b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'\ + self.assertEqual(bytes(m), + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W' + b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n' b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') def test_folding_with_utf8_encoding_2(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains UTF-8 words before # and after the whitespace fold point, where the @@ -810,15 +810,15 @@ def test_folding_with_utf8_encoding_2(self): # encoded word. m = EmailMessage() - m['Subject'] = 'Hello Wörld! Hello Wörld! '\ + m['Subject'] = 'Hello Wörld! Hello Wörld! ' \ 'Hello Wörlds123! Hello Wörld!Hello Wörld!' - self.assertEqual(bytes(m), \ - b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'\ - b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'\ + self.assertEqual(bytes(m), + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W' + b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n' b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') def test_folding_with_utf8_encoding_3(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains UTF-8 words before # and after the whitespace fold point, where the @@ -826,15 +826,15 @@ def test_folding_with_utf8_encoding_3(self): # first word. m = EmailMessage() - m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! '\ + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \ 'Hello Wörld!Hello Wörld!' self.assertEqual(bytes(m), \ - b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'\ - b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'\ + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W' + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n' b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') def test_folding_with_utf8_encoding_4(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains UTF-8 words before # and after the fold point, where the first @@ -842,62 +842,62 @@ def test_folding_with_utf8_encoding_4(self): # the word. m = EmailMessage() - m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello'\ + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \ ' Wörld!Hello Wörld!' - self.assertEqual(bytes(m), \ - b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'\ - b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'\ + self.assertEqual(bytes(m), + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W' + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n' b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') def test_folding_with_utf8_encoding_5(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains a UTF-8 word after # the fold point. m = EmailMessage() - m['Subject'] = '123456789 123456789 123456789 123456789 123456789'\ + m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \ ' 123456789 123456789 Hello Wörld!' - self.assertEqual(bytes(m), \ - b'Subject: 123456789 123456789 123456789 123456789'\ - b' 123456789 123456789 123456789\n'\ + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 123456789 123456789' + b' 123456789 123456789 123456789\n' b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n') def test_folding_with_utf8_encoding_6(self): - # issue #36520 + # bpo-36520 # # Fold a line that contains a UTF-8 word before # the fold point and ASCII words after m = EmailMessage() - m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!'\ - ' 123456789 123456789 123456789 123456789 123456789'\ + m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \ + ' 123456789 123456789 123456789 123456789 123456789' \ ' 123456789' - self.assertEqual(bytes(m), \ - b'Subject: 123456789 123456789 123456789 123456789'\ - b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '\ - b'123456789 123456789 123456789 123456789 '\ + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 123456789 123456789' + b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 ' + b'123456789 123456789 123456789 123456789 ' b'123456789\n\n') def test_folding_with_utf8_encoding_7(self): - # issue #36520 + # bpo-36520 # # Fold a line twice that contains UTF-8 words before # and after the first fold point, and ASCII words # after the second fold point. m = EmailMessage() - m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ - '123456789-123456789 123456789 Hello Wörld! 123456789'\ + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \ + '123456789-123456789 123456789 Hello Wörld! 123456789' \ ' 123456789' - self.assertEqual(bytes(m), \ - b'Subject: 123456789 123456789 Hello =?utf-8?q?'\ - b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'\ - b' 123456789-123456789 123456789 Hello '\ + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 Hello =?utf-8?q?' + b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n' + b' 123456789-123456789 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n') def test_folding_with_utf8_encoding_8(self): - # issue #36520 + # bpo-36520 # # Fold a line twice that contains UTF-8 words before # the first fold point, and ASCII words after the @@ -905,15 +905,15 @@ def test_folding_with_utf8_encoding_8(self): # fold point. m = EmailMessage() - m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '\ - '123456789 123456789 123456789 123456789 123456789 '\ - '123456789-123456789 123456789 Hello Wörld! 123456789'\ + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \ + '123456789 123456789 123456789 123456789 123456789 ' \ + '123456789-123456789 123456789 Hello Wörld! 123456789' \ ' 123456789' - self.assertEqual(bytes(m), \ - b'Subject: 123456789 123456789 Hello '\ - b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '\ - b'123456789 123456789 123456789 123456789 '\ - b'123456789-123456789\n 123456789 Hello '\ + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 Hello ' + b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 ' + b'123456789 123456789 123456789 123456789 ' + b'123456789-123456789\n 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') class TestMIMEPart(TestEmailMessageBase, TestEmailBase):