From ed042a685783a87ccc0599203a3b7556e7bd8f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Wed, 6 Aug 2025 20:35:08 +0545 Subject: [PATCH] exp run: fix failure when dependencies are from inside submodules Fixes #7186. Fixes #10823. --- dvc/repo/experiments/executor/base.py | 8 +++-- dvc/repo/scm_context.py | 4 ++- dvc/scm.py | 36 ++++++++++++++++++++-- pyproject.toml | 2 +- tests/func/experiments/test_experiments.py | 15 +++++++++ tests/unit/repo/test_scm_context.py | 6 ++-- 6 files changed, 62 insertions(+), 9 deletions(-) diff --git a/dvc/repo/experiments/executor/base.py b/dvc/repo/experiments/executor/base.py index 6ac5315327..76514e46f7 100644 --- a/dvc/repo/experiments/executor/base.py +++ b/dvc/repo/experiments/executor/base.py @@ -298,7 +298,9 @@ def save( stages = dvc.commit([], recursive=recursive, force=True, relink=False) exp_hash = cls.hash_exp(stages) if include_untracked: - dvc.scm.add(include_untracked, force=True) # type: ignore[call-arg] + from dvc.scm import add_no_submodules + + add_no_submodules(dvc.scm, include_untracked, force=True) # type: ignore[call-arg] with cls.auto_push(dvc): cls.commit( @@ -513,7 +515,9 @@ def reproduce( stages = dvc.reproduce(*args, **kwargs) if paths := cls._get_top_level_paths(dvc): logger.debug("Staging top-level files: %s", paths) - dvc.scm_context.add(paths) + from dvc.scm import add_no_submodules + + add_no_submodules(dvc.scm, paths) exp_hash = cls.hash_exp(stages) if not repro_dry: diff --git a/dvc/repo/scm_context.py b/dvc/repo/scm_context.py index bea823de20..c3979cec49 100644 --- a/dvc/repo/scm_context.py +++ b/dvc/repo/scm_context.py @@ -41,8 +41,10 @@ def _make_git_add_cmd(paths: Union[str, Iterable[str]]) -> str: def add(self, paths: Union[str, Iterable[str]]) -> None: from scmrepo.exceptions import UnsupportedIndexFormat + from dvc.scm import add_no_submodules + try: - return self.scm.add(paths) + add_no_submodules(self.scm, paths) except UnsupportedIndexFormat: link = "https://github.com/iterative/dvc/issues/610" add_cmd = self._make_git_add_cmd([relpath(path) for path in paths]) diff --git a/dvc/scm.py b/dvc/scm.py index 1e6de3e72b..f99209b86d 100644 --- a/dvc/scm.py +++ b/dvc/scm.py @@ -1,17 +1,18 @@ """Manages source control systems (e.g. Git).""" import os -from collections.abc import Iterator, Mapping +from collections.abc import Iterable, Iterator, Mapping from contextlib import contextmanager from functools import partial from typing import TYPE_CHECKING, Literal, Optional, Union, overload from funcy import group_by -from scmrepo.base import Base # noqa: F401 +from scmrepo.base import Base # noqa: TC002 from scmrepo.git import Git from scmrepo.noscm import NoSCM from dvc.exceptions import DvcException +from dvc.log import logger from dvc.progress import Tqdm if TYPE_CHECKING: @@ -19,6 +20,8 @@ from dvc.fs import FileSystem +logger = logger.getChild(__name__) + class SCMError(DvcException): """Base class for source control management errors.""" @@ -283,3 +286,32 @@ def lfs_prefetch(fs: "FileSystem", paths: list[str]): include=[(path if path.startswith("/") else f"/{path}") for path in paths], progress=pbar.update_git, ) + + +def add_no_submodules( + scm: "Base", + paths: Union[str, Iterable[str]], + **kwargs, +) -> None: + """Stage paths to Git, excluding those inside submodules.""" + + if isinstance(paths, str): + paths = [paths] + + submodule_roots = {os.path.join(scm.root_dir, sub) for sub in scm.list_submodules()} + + repo_paths: list[str] = [] + skipped_paths: list[str] = [] + + for p in paths: + abs_path = os.path.abspath(p) + if abs_path in submodule_roots or abs_path.startswith(tuple(submodule_roots)): + skipped_paths.append(p) + else: + repo_paths.append(p) + + if skipped_paths: + msg = "Skipping staging for path(s) inside submodules: %s" + logger.debug(msg, ", ".join(skipped_paths)) + + scm.add(repo_paths, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 86703d5154..013c89ed2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ dependencies = [ "requests>=2.22", "rich>=12", "ruamel.yaml>=0.17.11", - "scmrepo>=3.3.8,<4", + "scmrepo>=3.5.2,<4", "shortuuid>=0.5", "shtab<2,>=1.3.4", "tabulate>=0.8.7", diff --git a/tests/func/experiments/test_experiments.py b/tests/func/experiments/test_experiments.py index 69ab6ba1be..2782319563 100644 --- a/tests/func/experiments/test_experiments.py +++ b/tests/func/experiments/test_experiments.py @@ -793,3 +793,18 @@ def test_custom_commit_message(tmp_dir, scm, dvc, tmp): ) ) assert scm.resolve_commit(exp).message == "custom commit message" + + +@pytest.mark.parametrize("dep", ["submodule", "submodule/file"]) +def test_experiments_run_with_submodule_dependencies(dvc, scm, make_tmp_dir, dep): + external_repo = make_tmp_dir("external_repo", scm=True) + external_repo.scm_gen("file", "content", commit="add file") + + submodules = scm.pygit2.repo.submodules + submodules.add(os.fspath(external_repo), "submodule") + submodules.update(init=True) + scm.add_commit([".gitmodules"], message="add submodule") + + dvc.stage.add(cmd="echo foo", deps=[dep], name="foo") + + assert dvc.experiments.run() diff --git a/tests/unit/repo/test_scm_context.py b/tests/unit/repo/test_scm_context.py index 21978c0a15..d9ff258e35 100644 --- a/tests/unit/repo/test_scm_context.py +++ b/tests/unit/repo/test_scm_context.py @@ -39,7 +39,7 @@ def test_scm_track_changed_files(scm_context): scm_context.track_file("foo") scm_context.track_changed_files() - scm_context.scm.add.assert_called_once_with({"foo"}) + scm_context.scm.add.assert_called_once_with(["foo"]) def test_ignore(scm_context): @@ -73,7 +73,7 @@ def test_scm_context_autostage_changed_files(scm_context): assert not scm_context.files_to_track assert not scm_context.ignored_paths - scm_context.scm.add.assert_called_once_with({"foo"}) + scm_context.scm.add.assert_called_once_with(["foo"]) def test_scm_context_clears_ignores_on_error(scm_context): @@ -141,4 +141,4 @@ def test_method(repo, *args, **kwargs): method = mocker.MagicMock(wraps=test_method) decorator(method, autostage=True)(repo, "arg", kw=1) method.assert_called_once_with(repo, "arg", kw=1) - scm_context.scm.add.assert_called_once_with({"foo"}) + scm_context.scm.add.assert_called_once_with(["foo"])