From 7e1c8dde09d0a472596c310f597237b6527cd4ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Oct 2022 09:46:27 +0100 Subject: [PATCH 1/3] Speed up builds with large number of ignored files Poetry checks a lot of files against the list of excluded files. If there are many files ignored by git this can severly slow down builds. Currently, if a directory is ignored by git then all files within that directory get added to set of excluded files to check. --- src/poetry/core/masonry/builders/builder.py | 66 ++++++++++++++++----- src/poetry/core/vcs/git.py | 4 +- tests/masonry/builders/test_builder.py | 36 ++++++++++- 3 files changed, 87 insertions(+), 19 deletions(-) diff --git a/src/poetry/core/masonry/builders/builder.py b/src/poetry/core/masonry/builders/builder.py index 9f569299e..54cbf1b87 100644 --- a/src/poetry/core/masonry/builders/builder.py +++ b/src/poetry/core/masonry/builders/builder.py @@ -8,6 +8,7 @@ from collections import defaultdict from pathlib import Path from typing import TYPE_CHECKING +from typing import Container if TYPE_CHECKING: @@ -41,7 +42,7 @@ def __init__( self._poetry = poetry self._package = poetry.package self._path: Path = poetry.file.parent - self._excluded_files: set[str] | None = None + self._excluded_files: _IgnoredFilesChecker | None = None self._executable = Path(executable or sys.executable) packages = [] @@ -100,7 +101,7 @@ def default_target_dir(self) -> Path: def build(self, target_dir: Path | None) -> Path: raise NotImplementedError() - def find_excluded_files(self, fmt: str | None = None) -> set[str]: + def find_excluded_files(self, fmt: str | None = None) -> Container[str]: if self._excluded_files is None: from poetry.core.vcs import get_vcs @@ -129,27 +130,25 @@ def find_excluded_files(self, fmt: str | None = None) -> set[str]: Path(included).relative_to(self._path).as_posix() ) + # Note: `vcs_ignored_files` might return whole directories to + # ignore, so removing the list of `explicitly_included` isn't enough + # by itself. ignored = (vcs_ignored_files | explicitly_excluded) - explicitly_included for ignored_file in ignored: logger.debug(f"Ignoring: {ignored_file}") - self._excluded_files = ignored + for included_file in explicitly_included: + logger.debug(f"Including: {included_file}") + + self._excluded_files = _IgnoredFilesChecker( + included_files=explicitly_included, + excluded_files=ignored, + ) return self._excluded_files def is_excluded(self, filepath: str | Path) -> bool: - exclude_path = Path(filepath) - - while True: - if exclude_path.as_posix() in self.find_excluded_files(fmt=self.format): - return True - - if len(exclude_path.parts) > 1: - exclude_path = exclude_path.parent - else: - break - - return False + return filepath in self.find_excluded_files(fmt=self.format) def find_files_to_add(self, exclude_build: bool = True) -> set[BuildIncludeFile]: """ @@ -396,3 +395,40 @@ def relative_to_source_root(self) -> Path: return self.path.relative_to(self.source_root) return self.path + + +class _IgnoredFilesChecker: + """A container of files that should be ignored. + + Args: + included_files: A set of files or directories that should not be + ignored, even if they're also in the set of excluded files. + excluded_files: A set of files or directories that should be ignored. + """ + + def __init__(self, included_files: set[str], excluded_files: set[str]) -> None: + self._excluded_files = excluded_files + self._included_files = included_files + + def __contains__(self, filepath: object) -> bool: + if not isinstance(filepath, str | Path): + raise TypeError("value must be a string or path-like object") + + # We handle the fact that the included and excluded files may be + # directories by first checking the actual file and then recursively + # checking each parent directory. + exclude_path = Path(filepath) + + while True: + if exclude_path.as_posix() in self._included_files: + return False + + if exclude_path.as_posix() in self._excluded_files: + return True + + if len(exclude_path.parts) > 1: + exclude_path = exclude_path.parent + else: + break + + return False diff --git a/src/poetry/core/vcs/git.py b/src/poetry/core/vcs/git.py index aeccdf37c..6b7186abb 100644 --- a/src/poetry/core/vcs/git.py +++ b/src/poetry/core/vcs/git.py @@ -324,6 +324,8 @@ def get_current_branch(self, folder: Path | None = None) -> str: return output.strip() def get_ignored_files(self, folder: Path | None = None) -> list[str]: + """Returns the list of files and directories that are ignored by Git.""" + args = [] if folder is None and self._work_dir: folder = self._work_dir @@ -336,7 +338,7 @@ def get_ignored_files(self, folder: Path | None = None) -> list[str]: folder.as_posix(), ] - args += ["ls-files", "--others", "-i", "--exclude-standard"] + args += ["ls-files", "--others", "-i", "--exclude-standard", "--directory"] output = self.run(*args) return output.strip().split("\n") diff --git a/tests/masonry/builders/test_builder.py b/tests/masonry/builders/test_builder.py index a6182b2bf..c1f2c4baa 100644 --- a/tests/masonry/builders/test_builder.py +++ b/tests/masonry/builders/test_builder.py @@ -3,8 +3,10 @@ import sys from email.parser import Parser +from glob import glob from pathlib import Path from typing import TYPE_CHECKING +from typing import Collection import pytest @@ -16,6 +18,18 @@ from pytest_mock import MockerFixture +def get_excluded_files(builder: Builder) -> set[str]: + """Get all excluded files in the project directory""" + + relative_paths = [ + path.relative_to(builder._path).as_posix() + for path in builder._path.rglob("*") + if path.is_file() + ] + + return {path for path in relative_paths if path in builder.find_excluded_files()} + + def test_builder_find_excluded_files(mocker: MockerFixture) -> None: p = mocker.patch("poetry.core.vcs.git.Git.get_ignored_files") p.return_value = [] @@ -24,7 +38,23 @@ def test_builder_find_excluded_files(mocker: MockerFixture) -> None: Factory().create_poetry(Path(__file__).parent / "fixtures" / "complete") ) - assert builder.find_excluded_files() == {"my_package/sub_pkg1/extra_file.xml"} + assert get_excluded_files(builder) == {"my_package/sub_pkg1/extra_file.xml"} + + +def test_builder_find_excluded_files_whole_directory(mocker: MockerFixture) -> None: + p = mocker.patch("poetry.core.vcs.git.Git.get_ignored_files") + p.return_value = [ + "my_package/sub_pkg1", + ] + + builder = Builder( + Factory().create_poetry(Path(__file__).parent / "fixtures" / "complete") + ) + + assert get_excluded_files(builder) == { + "my_package/sub_pkg1/extra_file.xml", + "my_package/sub_pkg1/__init__.py", + } @pytest.mark.xfail( @@ -41,7 +71,7 @@ def test_builder_find_case_sensitive_excluded_files(mocker: MockerFixture) -> No ) ) - assert builder.find_excluded_files() == { + assert get_excluded_files(builder) == { "my_package/FooBar/Bar.py", "my_package/FooBar/lowercasebar.py", "my_package/Foo/SecondBar.py", @@ -68,7 +98,7 @@ def test_builder_find_invalid_case_sensitive_excluded_files( ) ) - assert {"my_package/Bar/foo/bar/Foo.py"} == builder.find_excluded_files() + assert {"my_package/Bar/foo/bar/Foo.py"} == get_excluded_files(builder) def test_get_metadata_content() -> None: From 7cfeb2a5d7a3997df99d1b0f573ae1e45da2c430 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Oct 2022 09:51:46 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/masonry/builders/test_builder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/masonry/builders/test_builder.py b/tests/masonry/builders/test_builder.py index c1f2c4baa..e42a78279 100644 --- a/tests/masonry/builders/test_builder.py +++ b/tests/masonry/builders/test_builder.py @@ -3,10 +3,8 @@ import sys from email.parser import Parser -from glob import glob from pathlib import Path from typing import TYPE_CHECKING -from typing import Collection import pytest From 76a42d3b4c117ea70dc0228d9e9ff629b72f6f2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Oct 2022 10:54:52 +0100 Subject: [PATCH 3/3] Fix isinstance check --- src/poetry/core/masonry/builders/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/poetry/core/masonry/builders/builder.py b/src/poetry/core/masonry/builders/builder.py index 54cbf1b87..8da209d52 100644 --- a/src/poetry/core/masonry/builders/builder.py +++ b/src/poetry/core/masonry/builders/builder.py @@ -411,7 +411,7 @@ def __init__(self, included_files: set[str], excluded_files: set[str]) -> None: self._included_files = included_files def __contains__(self, filepath: object) -> bool: - if not isinstance(filepath, str | Path): + if not isinstance(filepath, (str, Path)): raise TypeError("value must be a string or path-like object") # We handle the fact that the included and excluded files may be