From bea8551d76ea11f4a7ca1666e9804e4886d7ad1b Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Tue, 30 Jul 2019 21:14:20 -0400 Subject: [PATCH] RF: gitrepo: Rewrite get_submodules() to avoid GitPython In addition to keeping with our general direction of moving away from GitPython, this avoids an unresolved issue in GitPython's handling of submodules [0]. As documented by the new test, there is a change in behavior when get_submodules() is called in a repository that doesn't have a commit checked out: it now returns any registered submodules instead of an empty list. This is consistent with the behavior of 'git submodule' and 'datalad subdataset', and there doesn't seem to be an obvious reason not to support it. [0]: https://github.com/datalad/datalad/pull/3508#issuecomment-508462763 --- datalad/support/gitrepo.py | 53 +++++++++++++++++++++++---- datalad/support/tests/test_gitrepo.py | 10 +++++ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/datalad/support/gitrepo.py b/datalad/support/gitrepo.py index d69c26b346..b10b76fe97 100644 --- a/datalad/support/gitrepo.py +++ b/datalad/support/gitrepo.py @@ -14,6 +14,7 @@ from itertools import chain import logging from collections import OrderedDict +from collections import namedtuple import re import shlex import time @@ -555,6 +556,10 @@ def update(self, op_code, cur_count, max_count=None, message=''): self._pbar.refresh() +# Compatibility kludge. See GitRepo.get_submodules(). +Submodule = namedtuple("Submodule", ["name", "path", "url"]) + + @add_metaclass(Flyweight) class GitRepo(RepoInterface): """Representation of a git repository @@ -2432,15 +2437,47 @@ def get_submodules_(self, paths=None): props.update(modinfo.get(path, {})) yield props - def get_submodules(self, sorted_=True): - """Return a list of git.Submodule instances for all submodules""" - # check whether we have anything in the repo. if not go home early - if not self.repo.head.is_valid(): - return [] - submodules = self.repo.submodules + def get_submodules(self, sorted_=True, paths=None, compat=True): + """Return list of submodules. + + Parameters + ---------- + sorted_ : bool, optional + Sort submodules by path name. + paths : list(pathlib.PurePath), optional + Restrict submodules to those under `paths`. + compat : bool, optional + If true, return a namedtuple that incompletely mimics the + attributes of GitPython's Submodule object in hope of backwards + compatibility with previous callers. Note that this form should be + considered temporary and callers should be updated; this flag will + be removed in a future release. + + Returns + ------- + List of submodule namedtuples if `compat` is true or otherwise a list + of dictionaries as returned by `get_submodules_`. + """ + xs = self.get_submodules_(paths=paths) + if compat: + warnings.warn("The attribute-based return value of get_submodules() " + "exists for compatibility purposes and will be removed " + "in an upcoming release", + DeprecationWarning) + xs = (Submodule(name=p["gitmodule_name"], + path=text_type(p["path"].relative_to(self.pathobj)), + url=p["gitmodule_url"]) + for p in xs) + if sorted_: - submodules = sorted(submodules, key=lambda x: x.path) - return submodules + if compat: + def key(x): + return x.path + else: + def key(x): + return x["path"] + xs = sorted(xs, key=key) + return list(xs) def is_submodule_modified(self, name, options=[]): """Whether a submodule has new commits diff --git a/datalad/support/tests/test_gitrepo.py b/datalad/support/tests/test_gitrepo.py index 9ebeca2d82..0df8bb4e04 100644 --- a/datalad/support/tests/test_gitrepo.py +++ b/datalad/support/tests/test_gitrepo.py @@ -1012,6 +1012,16 @@ def test_GitRepo_get_submodules(): raise SkipTest("TODO") +@with_tempfile +def test_get_submodules_parent_on_unborn_branch(path): + repo = GitRepo(path, create=True) + subrepo = GitRepo(op.join(path, "sub"), create=True) + subrepo.commit(msg="s", options=["--allow-empty"]) + repo.add_submodule(path="sub") + eq_([s.name for s in repo.get_submodules()], + ["sub"]) + + def test_kwargs_to_options(): class Some(object):