Skip to content

Commit

Permalink
Always open tar files with UTF-8
Browse files Browse the repository at this point in the history
The UTF-8 encoding was assumed in an sdist, but without explicit
specifying, extraction may fail on obscure systems where the default
encoding is not UTF-8.

Co-Authored-By: Chris Hunt <chrahunt@gmail.com>
  • Loading branch information
uranusjr and chrahunt committed Apr 24, 2021
1 parent 4b8004a commit dc9efc8
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
1 change: 1 addition & 0 deletions news/7667.bugfix.rst
@@ -0,0 +1 @@
Fix extraction of files with utf-8 encoded paths from tars.
2 changes: 1 addition & 1 deletion src/pip/_internal/utils/unpacking.py
Expand Up @@ -178,7 +178,7 @@ def untar_file(filename, location):
filename,
)
mode = "r:*"
tar = tarfile.open(filename, mode)
tar = tarfile.open(filename, mode, encoding="utf-8")
try:
leading = has_leading_dir([member.name for member in tar.getmembers()])
for member in tar.getmembers():
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/test_utils_unpacking.py
Expand Up @@ -168,6 +168,25 @@ def test_unpack_tar_success(self):
untar_file(test_tar, self.tempdir)


def test_unpack_tar_unicode(tmpdir):
test_tar = tmpdir / "test.tar"
# tarfile tries to decode incoming
with tarfile.open(
test_tar, "w", format=tarfile.PAX_FORMAT, encoding="utf-8"
) as f:
metadata = tarfile.TarInfo("dir/åäö_日本語.py")
f.addfile(metadata, "hello world")

output_dir = tmpdir / "output"
output_dir.mkdir()

untar_file(test_tar, str(output_dir))

output_dir_name = str(output_dir)
contents = os.listdir(output_dir_name)
assert u"åäö_日本語.py" in contents


@pytest.mark.parametrize('args, expected', [
# Test the second containing the first.
(('parent/sub', 'parent/'), False),
Expand Down

0 comments on commit dc9efc8

Please sign in to comment.