Skip to content

Commit

Permalink
Merge pull request #213 from abravalheri/issue-setuptools-4033
Browse files Browse the repository at this point in the history
"Fix" interoperability of `rfc822_escape` with stblib's `email` package
  • Loading branch information
jaraco committed Jan 7, 2024
2 parents 53bfdb2 + 0ece987 commit fb5c570
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 9 deletions.
41 changes: 41 additions & 0 deletions distutils/tests/test_dist.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Tests for distutils.dist."""
import os
import io
import email
import email.policy
import email.generator
import sys
import warnings
import textwrap
Expand Down Expand Up @@ -510,3 +513,41 @@ def test_read_metadata(self):
assert metadata.platforms is None
assert metadata.obsoletes is None
assert metadata.requires == ['foo']

def test_round_trip_through_email_generator(self):
"""
In pypa/setuptools#4033, it was shown that once PKG-INFO is
re-generated using ``email.generator.Generator``, some control
characters might cause problems.
"""
# Given a PKG-INFO file ...
attrs = {
"name": "package",
"version": "1.0",
"long_description": "hello\x0b\nworld\n",
}
dist = Distribution(attrs)
metadata = dist.metadata

with io.StringIO() as buffer:
metadata.write_pkg_file(buffer)
msg = buffer.getvalue()

# ... when it is read and re-written using stdlib's email library,
orig = email.message_from_string(msg)
policy = email.policy.EmailPolicy(
utf8=True,
mangle_from_=False,
max_line_length=0,
)
with io.StringIO() as buffer:
email.generator.Generator(buffer, policy=policy).flatten(orig)

buffer.seek(0)
regen = email.message_from_file(buffer)

# ... then it should be the same as the original
# (except for the specific line break characters)
orig_desc = set(orig["Description"].splitlines())
regen_desc = set(regen["Description"].splitlines())
assert regen_desc == orig_desc
59 changes: 53 additions & 6 deletions distutils/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""Tests for distutils.util."""
import email
import email.policy
import email.generator
import io
import os
import sys
import sysconfig as stdlib_sysconfig
Expand Down Expand Up @@ -184,12 +188,55 @@ def test_strtobool(self):
for n in no:
assert not strtobool(n)

def test_rfc822_escape(self):
header = 'I am a\npoor\nlonesome\nheader\n'
res = rfc822_escape(header)
wanted = ('I am a%(8s)spoor%(8s)slonesome%(8s)s' 'header%(8s)s') % {
'8s': '\n' + 8 * ' '
}
indent = 8 * ' '

@pytest.mark.parametrize(
"given,wanted",
[
# 0x0b, 0x0c, ..., etc are also considered a line break by Python
("hello\x0b\nworld\n", f"hello\x0b{indent}\n{indent}world\n{indent}"),
("hello\x1eworld", f"hello\x1e{indent}world"),
("", ""),
(
"I am a\npoor\nlonesome\nheader\n",
f"I am a\n{indent}poor\n{indent}lonesome\n{indent}header\n{indent}",
),
],
)
def test_rfc822_escape(self, given, wanted):
"""
We want to ensure a multi-line header parses correctly.
For interoperability, the escaped value should also "round-trip" over
`email.generator.Generator.flatten` and `email.message_from_*`
(see pypa/setuptools#4033).
The main issue is that internally `email.policy.EmailPolicy` uses
`splitlines` which will split on some control chars. If all the new lines
are not prefixed with spaces, the parser will interrupt reading
the current header and produce an incomplete value, while
incorrectly interpreting the rest of the headers as part of the payload.
"""
res = rfc822_escape(given)

policy = email.policy.EmailPolicy(
utf8=True,
mangle_from_=False,
max_line_length=0,
)
with io.StringIO() as buffer:
raw = f"header: {res}\nother-header: 42\n\npayload\n"
orig = email.message_from_string(raw)
email.generator.Generator(buffer, policy=policy).flatten(orig)
buffer.seek(0)
regen = email.message_from_file(buffer)

for msg in (orig, regen):
assert msg.get_payload() == "payload\n"
assert msg["other-header"] == "42"
# Generator may replace control chars with `\n`
assert set(msg["header"].splitlines()) == set(res.splitlines())

assert res == wanted

def test_dont_write_bytecode(self):
Expand Down
12 changes: 9 additions & 3 deletions distutils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,12 @@ def rfc822_escape(header):
"""Return a version of the string escaped for inclusion in an
RFC-822 header, by ensuring there are 8 spaces space after each newline.
"""
lines = header.split('\n')
sep = '\n' + 8 * ' '
return sep.join(lines)
indent = 8 * " "
lines = header.splitlines(keepends=True)

# Emulate the behaviour of `str.split`
# (the terminal line break in `splitlines` does not result in an extra line):
ends_in_newline = lines and lines[-1].splitlines()[0] != lines[-1]
suffix = indent if ends_in_newline else ""

return indent.join(lines) + suffix

0 comments on commit fb5c570

Please sign in to comment.