diff --git a/news/7667.bugfix.rst b/news/7667.bugfix.rst new file mode 100644 index 00000000000..e42e128e97e --- /dev/null +++ b/news/7667.bugfix.rst @@ -0,0 +1 @@ +Fix extraction of files with utf-8 encoded paths from tars. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 44ac475357d..bffb3cd653e 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -178,7 +178,7 @@ def untar_file(filename, location): filename, ) mode = "r:*" - tar = tarfile.open(filename, mode) + tar = tarfile.open(filename, mode, encoding="utf-8") try: leading = has_leading_dir([member.name for member in tar.getmembers()]) for member in tar.getmembers(): diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index aea70efbc07..760b09cf1f8 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -168,6 +168,25 @@ def test_unpack_tar_success(self): untar_file(test_tar, self.tempdir) +def test_unpack_tar_unicode(tmpdir): + test_tar = tmpdir / "test.tar" + # tarfile tries to decode incoming + with tarfile.open( + test_tar, "w", format=tarfile.PAX_FORMAT, encoding="utf-8" + ) as f: + metadata = tarfile.TarInfo("dir/åäö_日本語.py") + f.addfile(metadata, "hello world") + + output_dir = tmpdir / "output" + output_dir.mkdir() + + untar_file(test_tar, str(output_dir)) + + output_dir_name = str(output_dir) + contents = os.listdir(output_dir_name) + assert u"åäö_日本語.py" in contents + + @pytest.mark.parametrize('args, expected', [ # Test the second containing the first. (('parent/sub', 'parent/'), False),