Skip to content

Commit

Permalink
RF: Use the exact same code path for adding and updating a submodule
Browse files Browse the repository at this point in the history
For additional improvement TODOs see the diff.

Importantly, this consolidation makes it obsolete to have the result
record be consistent with the one that is yielded by `add()` (which was
used on submodule update). An unfortunate setup, that was criticized
already a long time ago
datalad#3398 (comment)

Minus the issue of not breaking the promise of result invariance
over time, we are no free to use sensible result records.
  • Loading branch information
mih committed Dec 13, 2020
1 parent f8897c2 commit 2e03482
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
7 changes: 7 additions & 0 deletions datalad/core/distributed/tests/test_push.py
Expand Up @@ -867,6 +867,13 @@ def test_nested_pushclone_cycle_allplatforms(origpath, storepath, clonepath):
orig_super = Dataset(Path(origpath, 'super'))
orig_sub = Dataset(orig_super.pathobj / 'sub')

(orig_super.pathobj / 'file1.txt').write_text('some1')
(orig_sub.pathobj / 'file2.txt').write_text('some1')
with chpwd(orig_super.path):
run(['datalad', 'save', '--recursive'])

assert_repo_status(orig_super.path)

# the "true" branch that sub is on, and the gitsha of the HEAD commit of it
orig_sub_corr_branch = \
orig_sub.repo.get_corresponding_branch() or orig_sub.repo.get_active_branch()
Expand Down
38 changes: 24 additions & 14 deletions datalad/support/gitrepo.py
Expand Up @@ -4111,7 +4111,7 @@ def save_(self, message=None, paths=None, _status=None, **kwargs):
# without a partial commit an AnnexRepo would ignore any submodule
# path in its add helper, hence `git add` them explicitly
to_stage_submodules = {
str(f.relative_to(self.pathobj)): props
f: props
for f, props in status.items()
if props.get('state', None) in ('modified', 'untracked')
and props.get('type', None) == 'dataset'}
Expand All @@ -4121,10 +4121,9 @@ def save_(self, message=None, paths=None, _status=None, **kwargs):
len(to_stage_submodules), self,
to_stage_submodules
if len(to_stage_submodules) < 10 else '')
for r in GitRepo._save_add(
self,
to_stage_submodules,
git_opts=None):
for r in self._save_add_submodules(to_stage_submodules):
if r.get('status', None) == 'ok':
added_submodule = True
yield r

if added_submodule or vanished_subds:
Expand Down Expand Up @@ -4200,7 +4199,7 @@ def _save_add(self, files, git_opts=None):
raise

def _save_add_submodules(self, paths):
"""Add new submodules
"""Add new submodules, or updates records of existing ones
This method does not use `git submodule add`, but aims to be more
efficient by limiting the scope to mere in-place registration of
Expand Down Expand Up @@ -4239,25 +4238,36 @@ def _save_add_submodules(self, paths):
url = './{}'.format(rpath)
subm_id = subm.config.get('datalad.dataset.id', None)
info.append(
dict(path=path, rpath=rpath, commit=subm_commit, id=subm_id,
dict(
# if we have additional information on this path, pass it on.
# if not, treat it as an untracked directory
paths[path] if isinstance(paths, dict)
else dict(type='directory', state='untracked'),
path=path, rpath=rpath, commit=subm_commit, id=subm_id,
url=url))

# bypass any convenience or safe-manipulator for speed reasons
# use case: saving many new subdatasets in a single run
with (self.pathobj / '.gitmodules').open('a') as gmf, \
(self.pathobj / '.git' / 'config').open('a') as gcf:
for i in info:
# we update the subproject commit unconditionally
self.call_git([
'update-index', '--add', '--replace', '--cacheinfo', '160000',
i['commit'], i['rpath']
])
gmprops = dict(path=i['rpath'], url=i['url'])
if i['id']:
gmprops['datalad-id'] = i['id']
write_config_section(
gmf, 'submodule', i['rpath'], gmprops)
write_config_section(
gcf, 'submodule', i['rpath'], dict(active='true', url=i['url']))
# only write the .gitmodules/.config changes when this is not yet
# a subdataset
# TODO: we could update the URL, and branch info at this point,
# even for previously registered subdatasets
if i['type'] != 'dataset':
gmprops = dict(path=i['rpath'], url=i['url'])
if i['id']:
gmprops['datalad-id'] = i['id']
write_config_section(
gmf, 'submodule', i['rpath'], gmprops)
write_config_section(
gcf, 'submodule', i['rpath'], dict(active='true', url=i['url']))

# This mirrors the result structure yielded for
# to_stage_submodules below.
Expand Down

0 comments on commit 2e03482

Please sign in to comment.