Skip to content

Commit

Permalink
bpo-36520: Email header folded incorrectly (#13608)
Browse files Browse the repository at this point in the history
* bpo-36520: reset the encoded word offset when starting a new
line during an email header folding operation

* 📜🤖 Added by blurb_it.

* bpo-36520: add an additional test case, and provide descriptive
comments for the test_folding_with_utf8_encoding_* tests

* bpo-36520: fix whitespace issue

* bpo-36520: changes per reviewer request -- remove extraneous
backslashes; add whitespace between terminating quotes and
line-continuation backslashes; use "bpo-" instead of
"issue #" in comments
  • Loading branch information
websurfer5 authored and warsaw committed Jun 6, 2019
1 parent e0c0c7e commit f6713e8
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 0 deletions.
1 change: 1 addition & 0 deletions Lib/email/_header_value_parser.py
Expand Up @@ -2789,6 +2789,7 @@ def _refold_parse_tree(parse_tree, *, policy):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
lines.append(newline + tstr)
last_ew = None
continue
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
Expand Down
131 changes: 131 additions & 0 deletions Lib/test/test_email/test_message.py
Expand Up @@ -784,6 +784,137 @@ def test_str_defaults_to_utf8(self):
m['Subject'] = 'unicöde'
self.assertEqual(str(m), 'Subject: unicöde\n\n')

def test_folding_with_utf8_encoding_1(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached within an ASCII
# word.

m = EmailMessage()
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
'Hello Wörld! Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')


def test_folding_with_utf8_encoding_2(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached at the end of an
# encoded word.

m = EmailMessage()
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
'Hello Wörlds123! Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_3(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the whitespace fold point, where the
# line length limit is reached at the end of the
# first word.

m = EmailMessage()
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
'Hello Wörld!Hello Wörld!'
self.assertEqual(bytes(m), \
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_4(self):
# bpo-36520
#
# Fold a line that contains UTF-8 words before
# and after the fold point, where the first
# word is UTF-8 and the fold point is within
# the word.

m = EmailMessage()
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
' Wörld!Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_5(self):
# bpo-36520
#
# Fold a line that contains a UTF-8 word after
# the fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
' 123456789 123456789 Hello Wörld!'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 123456789 123456789'
b' 123456789 123456789 123456789\n'
b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')

def test_folding_with_utf8_encoding_6(self):
# bpo-36520
#
# Fold a line that contains a UTF-8 word before
# the fold point and ASCII words after

m = EmailMessage()
m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
' 123456789 123456789 123456789 123456789 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 123456789 123456789'
b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
b'123456789 123456789 123456789 123456789 '
b'123456789\n\n')

def test_folding_with_utf8_encoding_7(self):
# bpo-36520
#
# Fold a line twice that contains UTF-8 words before
# and after the first fold point, and ASCII words
# after the second fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
'123456789-123456789 123456789 Hello Wörld! 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 Hello =?utf-8?q?'
b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
b' 123456789-123456789 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')

def test_folding_with_utf8_encoding_8(self):
# bpo-36520
#
# Fold a line twice that contains UTF-8 words before
# the first fold point, and ASCII words after the
# first fold point, and UTF-8 words after the second
# fold point.

m = EmailMessage()
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
'123456789 123456789 123456789 123456789 123456789 ' \
'123456789-123456789 123456789 Hello Wörld! 123456789' \
' 123456789'
self.assertEqual(bytes(m),
b'Subject: 123456789 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
b'123456789 123456789 123456789 123456789 '
b'123456789-123456789\n 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')

class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
# Doing the full test run here may seem a bit redundant, since the two
Expand Down
@@ -0,0 +1 @@
Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.

0 comments on commit f6713e8

Please sign in to comment.