diff --git a/src/scmrepo/git/backend/dulwich/__init__.py b/src/scmrepo/git/backend/dulwich/__init__.py index 71def17b..9833b775 100644 --- a/src/scmrepo/git/backend/dulwich/__init__.py +++ b/src/scmrepo/git/backend/dulwich/__init__.py @@ -17,6 +17,7 @@ ) from dulwich.config import ConfigFile, StackedConfig +from dulwich.walk import ORDER_DATE from funcy import cached_property, reraise from scmrepo.exceptions import AuthError, CloneError, InvalidRemote, RevError, SCMError @@ -473,7 +474,25 @@ def list_tags(self) -> Iterable[str]: return sorted(ref[len(base) :] for ref in self.iter_refs(base)) def list_all_commits(self) -> Iterable[str]: - raise NotImplementedError + from dulwich.objects import Tag + + repo = self.repo + starting_points: list[bytes] = [] + + # HEAD + head_rev = self.get_ref("HEAD") + if head_rev: + starting_points.append(head_rev.encode("utf-8")) + + # Branches and remotes + for ref in repo.refs: + if ref.startswith((b"refs/heads/", b"refs/remotes/", b"refs/tags/")): + if isinstance(repo.refs[ref], Tag): + ref = self.repo.get_peeled(repo.refs[ref]) + starting_points.append(repo.refs[ref]) + + walker = self.repo.get_walker(include=starting_points, order=ORDER_DATE) + return [e.commit.id.decode() for e in walker] def get_tree_obj(self, rev: str, **kwargs) -> DulwichObject: from dulwich.objectspec import parse_tree diff --git a/src/scmrepo/git/backend/pygit2/__init__.py b/src/scmrepo/git/backend/pygit2/__init__.py index c914ece0..2d9266be 100644 --- a/src/scmrepo/git/backend/pygit2/__init__.py +++ b/src/scmrepo/git/backend/pygit2/__init__.py @@ -452,7 +452,32 @@ def list_tags(self) -> Iterable[str]: return sorted(ref[len(base) :] for ref in self.iter_refs(base)) def list_all_commits(self) -> Iterable[str]: - raise NotImplementedError + import pygit2 + from pygit2.enums import SortMode + + # Add HEAD + starting_points: list[Union[Oid, str]] = [] + if not self.repo.head_is_unborn: + starting_points.append(self.repo.head.target) + + # Add all branches, remotes, and tags + for ref in self.repo.references: + if ref.startswith(("refs/heads/", "refs/remotes/")): + oid = self.repo.revparse_single(ref).id + starting_points.append(oid) + elif ref.startswith("refs/tags/"): + tag_obj = self.repo.revparse_single(ref) + if isinstance(tag_obj, pygit2.Tag): + starting_points.append(tag_obj.target) + else: + starting_points.append(tag_obj.id) + + # Walk all commits + walker = self.repo.walk(None) + for oid in starting_points: + walker.push(oid) + walker.sort(SortMode.TIME) + return [str(commit.id) for commit in walker] def get_tree_obj(self, rev: str, **kwargs) -> Pygit2Object: tree = self.repo[rev].tree diff --git a/tests/test_git.py b/tests/test_git.py index 34985e40..63995d28 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -1,5 +1,6 @@ import os import shutil +import time from pathlib import Path from typing import Any, Optional @@ -422,21 +423,170 @@ def test_iter_remote_refs( } == set(git.iter_remote_refs(remote)) -@pytest.mark.skip_git_backend("dulwich", "pygit2") +def _gen(scm: Git, s: str, commit_timestamp: Optional[float] = None) -> str: + with open(s, mode="w") as f: + f.write(s) + scm.dulwich.add([s]) + scm.dulwich.repo.do_commit( + message=s.encode("utf-8"), commit_timestamp=commit_timestamp + ) + return scm.get_rev() + + def test_list_all_commits(tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher]): - def _gen(s): - tmp_dir.gen(s, s) - scm.add_commit(s, message=s) - return scm.get_rev() + assert git.list_all_commits() == [] + # https://github.com/libgit2/libgit2/issues/6336 + now = time.time() + + rev_a = _gen(scm, "a", commit_timestamp=now - 10) + rev_b = _gen(scm, "b", commit_timestamp=now - 8) + rev_c = _gen(scm, "c", commit_timestamp=now - 5) + rev_d = _gen(scm, "d", commit_timestamp=now - 2) + + assert git.list_all_commits() == [rev_d, rev_c, rev_b, rev_a] + + scm.gitpython.git.reset(rev_b, hard=True) + assert git.list_all_commits() == [rev_b, rev_a] - rev_a = _gen("a") - rev_b = _gen("b") + +def test_list_all_commits_branch( + tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher] +): + revs = {} + now = time.time() + + revs["1"] = _gen(scm, "a", commit_timestamp=now - 10) + + scm.checkout("branch", create_new=True) + revs["3"] = _gen(scm, "c", commit_timestamp=now - 9) + + scm.checkout("master") + revs["2"] = _gen(scm, "b", commit_timestamp=now - 7) + + scm.checkout("branch") + revs["5"] = _gen(scm, "e", commit_timestamp=now - 6) + + scm.checkout("master") + revs["4"] = _gen(scm, "d", commit_timestamp=now - 5) + + scm.checkout("branch") + revs["6"] = _gen(scm, "f", commit_timestamp=now - 4) + + scm.checkout("master") + revs["7"] = _gen(scm, "g", commit_timestamp=now - 3) + revs["8"] = scm.merge("branch", msg="merge branch") + + inv_map = {v: k for k, v in revs.items()} + assert [inv_map[k] for k in git.list_all_commits()] == [ + "8", + "7", + "6", + "4", + "5", + "2", + "3", + "1", + ] + + +def test_list_all_tags(tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher]): + rev_a = _gen(scm, "a") scm.tag("tag") - rev_c = _gen("c") + rev_b = _gen(scm, "b") + scm.tag("annotated", annotated=True, message="Annotated Tag") + rev_c = _gen(scm, "c") + rev_d = _gen(scm, "d") + assert git.list_all_commits() == matcher.unordered(rev_d, rev_c, rev_b, rev_a) + + rev_e = _gen(scm, "e") + scm.tag( + "annotated2", + target="refs/tags/annotated", + annotated=True, + message="Annotated Tag", + ) + assert git.list_all_commits() == matcher.unordered( + rev_e, rev_d, rev_c, rev_b, rev_a + ) + + rev_f = _gen(scm, "f") + scm.tag( + "annotated3", + target="refs/tags/annotated2", + annotated=True, + message="Annotated Tag 3", + ) + assert git.list_all_commits() == matcher.unordered( + rev_f, rev_e, rev_d, rev_c, rev_b, rev_a + ) + + scm.gitpython.git.reset(rev_a, hard=True) + assert git.list_all_commits() == matcher.unordered(rev_b, rev_a) + + +def test_list_all_commits_dangling_annotated_tag(tmp_dir: TmpDir, scm: Git, git: Git): + rev_a = _gen(scm, "a") + scm.tag("annotated", annotated=True, message="Annotated Tag") + + _gen(scm, "b") + + # Delete branch pointing to rev_a + scm.checkout(rev_a) + scm.gitpython.repo.delete_head("master", force=True) + + assert git.list_all_commits() == [rev_a] # Only reachable via the tag + + +def test_list_all_commits_orphan( + tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher] +): + rev_a = _gen(scm, "a") + + # Make an orphan branch + scm.gitpython.git.checkout("--orphan", "orphan-branch") + rev_orphan = _gen(scm, "orphanfile") + + assert rev_orphan != rev_a + assert git.list_all_commits() == matcher.unordered(rev_orphan, rev_a) + + +def test_list_all_commits_refs( + tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher] +): + assert git.list_all_commits() == [] + + rev_a = _gen(scm, "a") + + assert git.list_all_commits() == [rev_a] + rev_b = _gen(scm, "b") + scm.set_ref("refs/remotes/origin/feature", rev_b) + assert git.list_all_commits() == matcher.unordered(rev_b, rev_a) + + # also add refs/exps/foo/bar + rev_c = _gen(scm, "c") + scm.set_ref("refs/exps/foo/bar", rev_c) + assert git.list_all_commits() == matcher.unordered(rev_c, rev_b, rev_a) + + # Dangling/broken ref --- + scm.set_ref("refs/heads/bad-ref", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + with pytest.raises(Exception): # noqa: B017, PT011 + git.list_all_commits() + scm.remove_ref("refs/heads/bad-ref") + scm.gitpython.git.reset(rev_a, hard=True) - scm.set_ref("refs/foo/bar", rev_c) + assert git.list_all_commits() == matcher.unordered(rev_b, rev_a) + + +def test_list_all_commits_detached_head( + tmp_dir: TmpDir, scm: Git, git: Git, matcher: type[Matcher] +): + rev_a = _gen(scm, "a") + rev_b = _gen(scm, "b") + rev_c = _gen(scm, "c") + scm.checkout(rev_b) - assert git.list_all_commits() == matcher.unordered(rev_a, rev_b) + assert scm.pygit2.repo.head_is_detached + assert git.list_all_commits() == matcher.unordered(rev_c, rev_b, rev_a) @pytest.mark.skip_git_backend("pygit2")