Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix race condition on namespace package installation #9159

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 33 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ xattr = { version = "^1.0.0", markers = "sys_platform == 'darwin'" }

[tool.poetry.group.dev.dependencies]
pre-commit = ">=2.10"
pytest-benchmark = "^4.0.0"

[tool.poetry.group.test.dependencies]
coverage = ">=7.2.0"
Expand Down
46 changes: 44 additions & 2 deletions src/poetry/installation/wheel_installer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

import logging
import os
import platform
import sys
import tempfile

from pathlib import Path
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -65,9 +67,49 @@ def write_to_fs(
return RecordEntry(path, Hash(self.hash_algorithm, hash_), size)


class WheelDestinationCopy(SchemeDictionaryDestination):
"""SchemeDictionaryDestination that copies the files with os.replace to avoid race condition"""

def write_to_fs(
self,
scheme: Scheme,
path: str,
stream: BinaryIO,
is_executable: bool,
) -> RecordEntry:
from installer.records import Hash
from installer.records import RecordEntry
from installer.utils import copyfileobj_with_hashing
from installer.utils import make_file_executable

target_path = Path(self.scheme_dict[scheme]) / path

parent_folder = target_path.parent
if not parent_folder.exists():
# Due to the parallel installation it can happen
# that two threads try to create the directory.
parent_folder.mkdir(parents=True, exist_ok=True)

with tempfile.NamedTemporaryFile(delete=False) as fp:
with open(fp.name, "wb") as f:
hash_, size = copyfileobj_with_hashing(stream, f, self.hash_algorithm)
if target_path.exists():
# Contrary to the base library we don't raise an error here since it can
# break pkgutil-style and pkg_resource-style namespace packages.
# We instead log a warning and ignore it. See issue #9158.
logger.warning(f"{target_path} already exists. Overwritting.")
os.replace(fp.name, target_path)

if is_executable:
make_file_executable(target_path)

return RecordEntry(path, Hash(self.hash_algorithm, hash_), size)


class WheelInstaller:
def __init__(self, env: Env) -> None:
def __init__(self, env: Env, wheel_destination_class=WheelDestination) -> None:
self._env = env
self.wheel_destination_class = wheel_destination_class

script_kind: LauncherKind
if not WINDOWS:
Expand Down Expand Up @@ -99,7 +141,7 @@ def install(self, wheel: Path) -> None:
scheme_dict["headers"] = str(
Path(scheme_dict["include"]) / source.distribution
)
destination = WheelDestination(
destination = self.wheel_destination_class(
scheme_dict,
interpreter=str(self._env.python),
script_kind=self._script_kind,
Expand Down
21 changes: 3 additions & 18 deletions src/poetry/masonry/builders/editable.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import csv
import hashlib
import json
import locale
import os

from base64 import urlsafe_b64encode
from pathlib import Path
from typing import TYPE_CHECKING

Expand All @@ -17,6 +15,7 @@
from poetry.utils._compat import WINDOWS
from poetry.utils._compat import decode
from poetry.utils.env import build_environment
from poetry.utils.helpers import get_file_hash_urlsafe
from poetry.utils.helpers import is_dir_writable
from poetry.utils.pip import pip_install

Expand Down Expand Up @@ -261,26 +260,12 @@ def _add_dist_info(self, added_files: list[Path]) -> None:
with record.open("w", encoding="utf-8", newline="") as f:
csv_writer = csv.writer(f)
for path in added_files:
hash = self._get_file_hash(path)
size = path.stat().st_size
csv_writer.writerow((path, f"sha256={hash}", size))
hash_, size = get_file_hash_urlsafe(path)
csv_writer.writerow((path, f"sha256={hash_}", size))

# RECORD itself is recorded with no hash or size
csv_writer.writerow((record, "", ""))

def _get_file_hash(self, filepath: Path) -> str:
hashsum = hashlib.sha256()
with filepath.open("rb") as src:
while True:
buf = src.read(1024 * 8)
if not buf:
break
hashsum.update(buf)

src.seek(0)

return urlsafe_b64encode(hashsum.digest()).decode("ascii").rstrip("=")

def _debug(self, msg: str) -> None:
if self._io.is_debug():
self._io.write_line(msg)
20 changes: 20 additions & 0 deletions src/poetry/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import base64
import hashlib
import io
import logging
Expand All @@ -18,6 +19,7 @@
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import BinaryIO
from typing import overload

import requests
Expand Down Expand Up @@ -332,6 +334,24 @@ def get_file_hash(path: Path, hash_name: str = "sha256") -> str:
return h.hexdigest()


def get_file_hash_urlsafe(
file: Path | BinaryIO, hash_algorithm: str = "sha256"
) -> tuple[str, int]:
hashsum = hashlib.new(hash_algorithm)
with file.open("rb") if isinstance(file, Path) else file as src:
size = 0
while True:
buf = src.read(1024 * 8)
if not buf:
break
hashsum.update(buf)
size += len(buf)

src.seek(0)

return base64.urlsafe_b64encode(hashsum.digest()).decode("ascii").rstrip("="), size


def get_highest_priority_hash_type(
hash_types: set[str], archive_name: str
) -> str | None:
Expand Down
Binary file not shown.
75 changes: 75 additions & 0 deletions tests/installation/test_wheel_installer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import re
import shutil

from pathlib import Path
from typing import TYPE_CHECKING
Expand All @@ -9,6 +10,7 @@

from poetry.core.constraints.version import parse_constraint

from poetry.installation.wheel_installer import WheelDestinationCopy
from poetry.installation.wheel_installer import WheelInstaller
from poetry.utils.env import MockEnv

Expand All @@ -29,6 +31,11 @@ def demo_wheel(fixture_dir: FixtureDirGetter) -> Path:
return fixture_dir("distributions/demo-0.1.0-py2.py3-none-any.whl")


@pytest.fixture(scope="module")
def django_wheel(fixture_dir: FixtureDirGetter) -> Path:
return fixture_dir("distributions/Django-5.0.3-py3-none-any.whl")


@pytest.fixture(scope="module")
def default_installation(tmp_path_factory: TempPathFactory, demo_wheel: Path) -> Path:
env = MockEnv(path=tmp_path_factory.mktemp("default_install"))
Expand Down Expand Up @@ -81,3 +88,71 @@ def test_enable_bytecode_compilation(
assert not list(cache_dir.glob("*.opt-2.pyc"))
else:
assert not cache_dir.exists()


def delete_tmp_dir_func_factory(tmp_path: Path):
def delete_tmp_dir():
shutil.rmtree(tmp_path)
tmp_path.mkdir()

return delete_tmp_dir


@pytest.mark.benchmark(
group="demo_wheel",
)
def test_bench_installer_copy_demo(
env: MockEnv, demo_wheel: Path, benchmark, tmp_path: Path
) -> None:
installer = WheelInstaller(env, wheel_destination_class=WheelDestinationCopy)
benchmark.pedantic(
installer.install,
args=(demo_wheel,),
setup=delete_tmp_dir_func_factory(tmp_path),
rounds=1000,
)


@pytest.mark.benchmark(
group="demo_wheel",
)
def test_bench_installer_demo(
env: MockEnv, demo_wheel: Path, benchmark, tmp_path: Path
) -> None:
installer = WheelInstaller(env)
benchmark.pedantic(
installer.install,
args=(demo_wheel,),
setup=delete_tmp_dir_func_factory(tmp_path),
rounds=1000,
)


@pytest.mark.benchmark(
group="django_wheel",
)
def test_bench_installer_copy_django(
env: MockEnv, django_wheel: Path, benchmark, tmp_path: Path
) -> None:
installer = WheelInstaller(env, wheel_destination_class=WheelDestinationCopy)
benchmark.pedantic(
installer.install,
args=(django_wheel,),
setup=delete_tmp_dir_func_factory(tmp_path),
rounds=30,
)


@pytest.mark.benchmark(
group="django_wheel",
)
def test_bench_installer_django(
env: MockEnv, django_wheel: Path, benchmark, tmp_path: Path
) -> None:
installer = WheelInstaller(env)
benchmark.pedantic(
installer.install,
args=(django_wheel,),
setup=delete_tmp_dir_func_factory(tmp_path),
rounds=30,
)