Skip to content

Commit

Permalink
Merge pull request #29 from martinal/martinal/topic-merge-conflict-re…
Browse files Browse the repository at this point in the history
…solution

Work on merge and strategies for automatic conflict resolution
  • Loading branch information
Martin Sandve Alnæs committed Mar 15, 2016
2 parents 72fc51f + d60af42 commit 6660a4a
Show file tree
Hide file tree
Showing 10 changed files with 570 additions and 69 deletions.
50 changes: 50 additions & 0 deletions nbdime/diff_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from .log import NBDiffFormatError


# TODO: Move some of the less official utilities in here to another submodule


class DiffEntry(dict):
def __getattr__(self, name):
return self[name]
Expand Down Expand Up @@ -47,6 +50,9 @@ def make_op(op, *args):
elif op == "patch":
key, diff = args
return DiffEntry(op=op, key=key, diff=diff)
elif op == "keep":
key, = args
return DiffEntry(op=op, key=key)
else:
raise NBDiffFormatError("Invalid op {}.".format(op))

Expand All @@ -61,6 +67,13 @@ class Diff(object):
REMOVERANGE = "removerange"
PATCH = "patch"

# Not yet used in official diffs but possibly in
# internal tools or for future consideration
_KEEP = "keep"
#_MOVE = "move"
#_KEEPRANGE = "keeprange"
#_MOVERANGE = "moverange"


class SequenceDiff(Diff):

Expand Down Expand Up @@ -96,6 +109,12 @@ def remove(self, key, length):
def patch(self, key, diff):
self.append(make_op(Diff.PATCH, key, diff))

def addrange(self, key, valuelist):
self.append(make_op(Diff.ADDRANGE, key, valuelist))

def removerange(self, key, length):
self.append(make_op(Diff.REMOVERANGE, key, length))


class MappingDiff(Diff):

Expand Down Expand Up @@ -245,6 +264,32 @@ def to_diffentry_dicts(di): # TODO: Better name, validate_diff? as_diff?
return di


def decompress_sequence_diff(di, n):
"""Convert sequence diff into pairs of (op, arg) for each n entries in base sequence.
This is for internal use in algorithms where no
insertions occur, making the mapping
index -> (op, arg)
possible with op in (KEEP, REMOVE, PATCH, REPLACE).
"""
offset = 0
decompressed = [make_op(Diff._KEEP, i) for i in range(n)]
for e in di:
op = e.op
if op in (Diff.PATCH, Diff.REPLACE, Diff.REMOVE):
decompressed[e.key] = e
elif op == Diff.REMOVERANGE:
for i in range(e.length):
decompressed[e.key + i] = make_op(Diff.REMOVE, e.key + i)
elif op in (Diff.ADDRANGE, Diff.ADD):
raise ValueError("Not expexting insertions.")
else:
raise ValueError("Unknown op {}.".format(op))
return decompressed


def as_dict_based_diff(di):
"""Converting to dict-based diff format for dicts for convenience.
Expand All @@ -255,6 +300,11 @@ def as_dict_based_diff(di):
return {e.key: e for e in di}


def revert_as_dict_based_diff(di):
"Reverts as_dict_based_diff."
return [di[k] for k in sorted(di)]


def to_json_patch(d, path=""):
"""Convert nbdime diff object into the RFC6902 JSON Patch format.
Expand Down
8 changes: 4 additions & 4 deletions nbdime/diffing/lcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ def diff_from_lcs(A, B, A_indices, B_indices):
i = A_indices[r]
j = B_indices[r]
if i > x:
di.remove(x, i-x)
di.removerange(x, i-x)
if j > y:
di.add(x, B[y:j])
di.addrange(x, B[y:j])
x = i + 1
y = j + 1
if x < N:
di.remove(x, N-x)
di.removerange(x, N-x)
if y < M:
di.add(x, B[y:M])
di.addrange(x, B[y:M])
return di.diff # XXX
8 changes: 4 additions & 4 deletions nbdime/diffing/seq_difflib.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ def opcodes_to_diff(a, b, opcodes):
# Unlike difflib we don't represent equal stretches explicitly
pass
elif action == "replace":
di.remove(abegin, asize)
di.add(abegin, b[bbegin:bend])
di.removerange(abegin, asize)
di.addrange(abegin, b[bbegin:bend])
elif action == "insert":
di.add(abegin, b[bbegin:bend])
di.addrange(abegin, b[bbegin:bend])
elif action == "delete":
di.remove(abegin, asize)
di.removerange(abegin, asize)
else:
raise RuntimeError("Unknown action {}".format(action))
return di.diff # XXX
Expand Down
4 changes: 2 additions & 2 deletions nbdime/diffing/snakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ def compute_diff_from_snakes(a, b, snakes, path="", predicates=None, differs=Non
i0, j0, i1, j1 = 0, 0, len(a), len(b)
for i, j, n in snakes + [(i1, j1, 0)]:
if i > i0:
di.remove(i0, i-i0)
di.removerange(i0, i-i0)
if j > j0:
di.add(i0, b[j0:j])
di.addrange(i0, b[j0:j])

for k in range(n):
aval = a[i + k]
Expand Down
240 changes: 240 additions & 0 deletions nbdime/merging/autoresolve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
# coding: utf-8

# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from __future__ import unicode_literals

from six import string_types

from ..diff_format import SequenceDiff, MappingDiff, Diff, make_op
from ..diff_format import as_dict_based_diff, revert_as_dict_based_diff, decompress_sequence_diff
from ..patching import patch


def __find_strategy(path, strategies):
# Use closest parent strategy if specific entry is missing
strategy = strategies.get(path)
ppath = path
while strategy is None and ppath:
i = ppath.rfind("/")
if i >= 0:
ppath = ppath[:i]
strategy = strategies.get(path)
else:
break
return strategy


# Strategies for handling conflicts TODO: Implement these and refine further!
#generic_conflict_strategies = ("mergetool", "use-base", "use-local", "use-remote", "fail")
#source_conflict_strategies = generic_conflict_strategies + ("inline",)
#transient_conflict_strategies = generic_conflict_strategies + ("clear",)
#output_conflict_strategies = transient_conflict_strategies + ("use-all",)


# Sentinel object
Deleted = object()

def __patch_item(value, diffentry):
op = diffentry.op
if op == Diff.REPLACE:
return diffentry.value
elif op == Diff.PATCH:
return patch(value, diffentry.diff)
elif op == Diff.REMOVE:
return Deleted
else:
raise ValueError("Invalid item patch op {}".format(op))

def __make_join_diffentry(value, le, re):
# Joining e.g. an outputs list means concatenating all items
lvalue = patch_item(value, le)
rvalue = patch_item(value, re)

if lvalue is Deleted:
lvalue = []
if rvalue is Deleted:
rvalue = []
newvalue = value + lvalue + rvalue
e = FIXME
return e

def __make_inline_diffentry(value, le, re):
# FIXME implement
e = FIXME
return e



def cleared_value(value):
if isinstance(value, list):
# Clearing e.g. an outputs list means setting it to an empty list
return []
elif isinstance(value, dict):
# Clearing e.g. a metadata dict means setting it to an empty dict
return {}
elif isinstance(value, string_types):
# Clearing e.g. a source string means setting it to an empty string
return ""
else:
# Clearing anything else (atomic values) means setting it to None
return None


def resolve_single_conflict(value, le, re, strategy, path):
assert le.key == re.key

if strategy == "fail":
raise RuntimeError("Not expecting a conflict at path {}.".format(path))

elif strategy == "mergetool":
e, le, re = None, le, re

elif strategy == "use-base":
e, le, re = None, None, None

elif strategy == "use-local":
e, le, re = le, None, None

elif strategy == "use-remote":
e, le, re = re, None, None

elif strategy == "clear":
v = cleared_value(value)
e = make_op(Diff.REPLACE, le.key, v)
le, re = None, None

# FIXME: Implement
#elif strategy == "inline":
# e = make_inline_diffentry(value, le, re)
# le, re = None

# FIXME: Implement
#elif strategy == "join":
# e = make_join_diffentry(value, le, re)
# le, re = None

else:
raise RuntimeError("Invalid strategy {}.".format(strategy))

return e, le, re


def autoresolve_lists(merged, lcd, rcd, strategies, path):
key = "*"
subpath = "/".join((path, key))
strategy = strategies.get(subpath)

n = len(merged)
local = decompress_sequence_diff(lcd, n)
remote = decompress_sequence_diff(rcd, n)

resolutions = SequenceDiff()
newlcd = SequenceDiff()
newrcd = SequenceDiff()
for key, value in enumerate(merged):
# Figure out what lcd and rcd wants to do with merged[key]
le = local[key]
re = remote[key]

assert (le.op == Diff._KEEP) == (re.op == Diff._KEEP)

if le.op == Diff._KEEP or re.op == Diff._KEEP:
# Skip items without conflict
pass
elif strategy is not None:
# Autoresolve conflicts for this key
e, le, re = resolve_single_conflict(value, le, re, strategy, subpath)
if e is not None:
resolutions.append(e)
if le is not None:
newlcd.append(le)
if re is not None:
newrcd.append(re)
elif le.op == Diff.PATCH and re.op == Diff.PATCH:
# Recurse if we have no strategy for this key but diffs available for the subdocument
di, ldi, rdi = autoresolve(value, le.diff, re.diff, strategies, subpath)
if di:
resolutions.patch(key, di)
if ldi:
newlcd.patch(key, ldi)
if rdi:
newrcd.patch(key, rdi)
else:
# Alternatives if we don't have PATCH, are:
# - INSERT: not happening
# - REPLACE: technically possible, if so we can can convert it to PATCH, but does it happen?
# - REMOVE: more likely, but resolving subdocument diff will still leave us with a full conflict on parent here
# No resolution, keep conflicts le, re
newlcd.append(le)
newrcd.append(re)

return resolutions.diff, newlcd.diff, newrcd.diff


def autoresolve_dicts(merged, lcd, rcd, strategies, path):
# Converting to dict-based diff format for dicts for convenience
# This step will be unnecessary if we change the diff format to work this way always
lcd = as_dict_based_diff(lcd)
rcd = as_dict_based_diff(rcd)

# We can't have a one-sided conflict so keys must match
assert set(lcd) == set(rcd)

resolutions = MappingDiff()
newlcd = MappingDiff()
newrcd = MappingDiff()

for key in sorted(lcd):
# Query out how to handle conflicts in this part of the document
subpath = "/".join((path, key))
strategy = strategies.get(subpath)

# Get value and conflicts
value = merged[key]
le = lcd[key]
re = rcd[key]
assert le.key == key
assert re.key == key

if strategy is not None:
# Autoresolve conflicts for this key
e, le, re = resolve_single_conflict(value, le, re, strategy, subpath)
if e is not None:
resolutions.append(e)
if le is not None:
newlcd.append(le)
if re is not None:
newrcd.append(re)
elif le.op == Diff.PATCH and re.op == Diff.PATCH:
# Recurse if we have no strategy for this key but diffs available for the subdocument
di, ldi, rdi = autoresolve(value, le.diff, re.diff, strategies, subpath)
if di:
resolutions.patch(key, di)
if ldi:
newlcd.patch(key, ldi)
if rdi:
newrcd.patch(key, rdi)
else:
# Alternatives if we don't have PATCH, are:
# - INSERT: not happening
# - REPLACE: technically possible, if so we can can convert it to PATCH, but does it happen?
# - REMOVE: more likely, but resolving subdocument diff will still leave us with a full conflict on parent here
# No resolution, keep conflicts le, re
newlcd.append(le)
newrcd.append(re)

return resolutions.diff, newlcd.diff, newrcd.diff


def autoresolve(merged, local_diff, remote_diff, strategies, path):
"""
Returns: resolution_diff, unresolved_local_diff, unresolved_remote_diff
"""
if isinstance(merged, dict):
return autoresolve_dicts(merged, local_diff, remote_diff, strategies, path)
elif isinstance(merged, list):
return autoresolve_lists(merged, local_diff, remote_diff, strategies, path)
else:
raise RuntimeError("Invalid merged type {} at path {}".format(type(merged).__name__), path)

0 comments on commit 6660a4a

Please sign in to comment.