Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions tools/please_pex/pex/pex_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
from importlib.abc import MetaPathFinder
from importlib.metadata import Distribution
from importlib.util import spec_from_loader
from zipfile import ZipFile, ZipInfo, is_zipfile
import itertools
import os
import re
import runpy
import sys
import zipfile


try:
Expand Down Expand Up @@ -56,11 +56,11 @@ def getsitepackages(prefixes=[sys.prefix, sys.exec_prefix]):
PEX_STAMP = '__PEX_STAMP__'

# Workaround for https://bugs.python.org/issue15795
class ZipFileWithPermissions(ZipFile):
class ZipFileWithPermissions(zipfile.ZipFile):
""" Custom ZipFile class handling file permissions. """

def _extract_member(self, member, targetpath, pwd):
if not isinstance(member, ZipInfo):
if not isinstance(member, zipfile.ZipInfo):
member = self.getinfo(member)

targetpath = super(ZipFileWithPermissions, self)._extract_member(
Expand All @@ -80,7 +80,7 @@ def __init__(self):
self.suffixes_by_length = sorted(self.suffixes, key=lambda x: -len(x))
# Identify all the possible modules we could handle.
self.modules = {}
if is_zipfile(sys.argv[0]):
if zipfile.is_zipfile(sys.argv[0]):
zf = ZipFileWithPermissions(sys.argv[0])
for name in zf.namelist():
path, _ = self.splitext(name)
Expand Down Expand Up @@ -156,7 +156,10 @@ def read_text(self, filename):
return zf.read(name).decode(encoding="utf-8")

def locate_file(self, path):
raise RuntimeError("This distribution has no real file system")
return zipfile.Path(
self._pex_file,
at=os.path.join(self._prefix, path) if self._prefix else path,
)
Comment on lines +159 to +162
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

importlib.metadata's files method is slow - it calls locate_file for every file listed in the distribution's RECORD file, then confirms that each of those paths exists by calling exists on the zipfile.Path that is returned. This triggers a sequential scan for the file's local file header in the pex file for every file in the distribution.

//test:distribution_metadata_test, which calls files for pygments, takes 0.9 seconds to run on Python >= 3.12. In earlier Python versions, importlib.metadata takes it as given that the files listed in RECORD actually exist. I guess this is a cheap operation for real files on a real file system and they weren't thinking of any use case more exotic than that. I'm not sure there's much we can or should do about this here.


read_text.__doc__ = Distribution.read_text.__doc__

Expand All @@ -174,7 +177,7 @@ def __init__(self, module_dir=MODULE_DIR):

def _find_all_distributions(self, module_dir):
dists = {}
if is_zipfile(sys.argv[0]):
if zipfile.is_zipfile(sys.argv[0]):
zf = ZipFileWithPermissions(sys.argv[0])
for name in zf.namelist():
if name and (m := re.search(
Expand Down