Merge pull request #29 from martinal/martinal/topic-merge-conflict-re…

…solution Work on merge and strategies for automatic conflict resolution
jupyter · Mar 15, 2016 · 6660a4a · 6660a4a
2 parents 72fc51f + d60af42
commit 6660a4a
Show file tree

Hide file tree

Showing 10 changed files with 570 additions and 69 deletions.
diff --git a/nbdime/diff_format.py b/nbdime/diff_format.py
@@ -12,6 +12,9 @@
 from .log import NBDiffFormatError
 
 
+# TODO: Move some of the less official utilities in here to another submodule
+
+
 class DiffEntry(dict):
     def __getattr__(self, name):
         return self[name]
@@ -47,6 +50,9 @@ def make_op(op, *args):
     elif op == "patch":
         key, diff = args
         return DiffEntry(op=op, key=key, diff=diff)
+    elif op == "keep":
+        key, = args
+        return DiffEntry(op=op, key=key)
     else:
         raise NBDiffFormatError("Invalid op {}.".format(op))
 
@@ -61,6 +67,13 @@ class Diff(object):
     REMOVERANGE = "removerange"
     PATCH = "patch"
 
+    # Not yet used in official diffs but possibly in
+    # internal tools or for future consideration
+    _KEEP = "keep"
+    #_MOVE = "move"
+    #_KEEPRANGE = "keeprange"
+    #_MOVERANGE = "moverange"
+
 
 class SequenceDiff(Diff):
 
@@ -96,6 +109,12 @@ def remove(self, key, length):
     def patch(self, key, diff):
         self.append(make_op(Diff.PATCH, key, diff))
 
+    def addrange(self, key, valuelist):
+        self.append(make_op(Diff.ADDRANGE, key, valuelist))
+
+    def removerange(self, key, length):
+        self.append(make_op(Diff.REMOVERANGE, key, length))
+
 
 class MappingDiff(Diff):
 
@@ -245,6 +264,32 @@ def to_diffentry_dicts(di):  # TODO: Better name, validate_diff? as_diff?
         return di
 
 
+def decompress_sequence_diff(di, n):
+    """Convert sequence diff into pairs of (op, arg) for each n entries in base sequence.
+
+    This is for internal use in algorithms where no
+    insertions occur, making the mapping
+
+        index -> (op, arg)
+
+    possible with op in (KEEP, REMOVE, PATCH, REPLACE).
+    """
+    offset = 0
+    decompressed = [make_op(Diff._KEEP, i) for i in range(n)]
+    for e in di:
+        op = e.op
+        if op in (Diff.PATCH, Diff.REPLACE, Diff.REMOVE):
+            decompressed[e.key] = e
+        elif op == Diff.REMOVERANGE:
+            for i in range(e.length):
+                decompressed[e.key + i] = make_op(Diff.REMOVE, e.key + i)
+        elif op in (Diff.ADDRANGE, Diff.ADD):
+            raise ValueError("Not expexting insertions.")
+        else:
+            raise ValueError("Unknown op {}.".format(op))
+    return decompressed
+
+
 def as_dict_based_diff(di):
     """Converting to dict-based diff format for dicts for convenience.
 
@@ -255,6 +300,11 @@ def as_dict_based_diff(di):
     return {e.key: e for e in di}
 
 
+def revert_as_dict_based_diff(di):
+    "Reverts as_dict_based_diff."
+    return [di[k] for k in sorted(di)]
+
+
 def to_json_patch(d, path=""):
     """Convert nbdime diff object into the RFC6902 JSON Patch format.
 

diff --git a/nbdime/diffing/lcs.py b/nbdime/diffing/lcs.py
@@ -23,13 +23,13 @@ def diff_from_lcs(A, B, A_indices, B_indices):
         i = A_indices[r]
         j = B_indices[r]
         if i > x:
-            di.remove(x, i-x)
+            di.removerange(x, i-x)
         if j > y:
-            di.add(x, B[y:j])
+            di.addrange(x, B[y:j])
         x = i + 1
         y = j + 1
     if x < N:
-        di.remove(x, N-x)
+        di.removerange(x, N-x)
     if y < M:
-        di.add(x, B[y:M])
+        di.addrange(x, B[y:M])
     return di.diff  # XXX
diff --git a/nbdime/diffing/seq_difflib.py b/nbdime/diffing/seq_difflib.py
@@ -23,12 +23,12 @@ def opcodes_to_diff(a, b, opcodes):
             # Unlike difflib we don't represent equal stretches explicitly
             pass
         elif action == "replace":
-            di.remove(abegin, asize)
-            di.add(abegin, b[bbegin:bend])
+            di.removerange(abegin, asize)
+            di.addrange(abegin, b[bbegin:bend])
         elif action == "insert":
-            di.add(abegin, b[bbegin:bend])
+            di.addrange(abegin, b[bbegin:bend])
         elif action == "delete":
-            di.remove(abegin, asize)
+            di.removerange(abegin, asize)
         else:
             raise RuntimeError("Unknown action {}".format(action))
     return di.diff  # XXX

diff --git a/nbdime/diffing/snakes.py b/nbdime/diffing/snakes.py
@@ -83,9 +83,9 @@ def compute_diff_from_snakes(a, b, snakes, path="", predicates=None, differs=Non
     i0, j0, i1, j1 = 0, 0, len(a), len(b)
     for i, j, n in snakes + [(i1, j1, 0)]:
         if i > i0:
-            di.remove(i0, i-i0)
+            di.removerange(i0, i-i0)
         if j > j0:
-            di.add(i0, b[j0:j])
+            di.addrange(i0, b[j0:j])
 
         for k in range(n):
             aval = a[i + k]

diff --git a/nbdime/merging/autoresolve.py b/nbdime/merging/autoresolve.py
@@ -0,0 +1,240 @@
+# coding: utf-8
+
+# Copyright (c) Jupyter Development Team.
+# Distributed under the terms of the Modified BSD License.
+
+from __future__ import unicode_literals
+
+from six import string_types
+
+from ..diff_format import SequenceDiff, MappingDiff, Diff, make_op
+from ..diff_format import as_dict_based_diff, revert_as_dict_based_diff, decompress_sequence_diff
+from ..patching import patch
+
+
+def __find_strategy(path, strategies):
+    # Use closest parent strategy if specific entry is missing
+    strategy = strategies.get(path)
+    ppath = path
+    while strategy is None and ppath:
+        i = ppath.rfind("/")
+        if i >= 0:
+            ppath = ppath[:i]
+            strategy = strategies.get(path)
+        else:
+            break
+    return strategy
+
+
+# Strategies for handling conflicts  TODO: Implement these and refine further!
+#generic_conflict_strategies = ("mergetool", "use-base", "use-local", "use-remote", "fail")
+#source_conflict_strategies = generic_conflict_strategies + ("inline",)
+#transient_conflict_strategies = generic_conflict_strategies + ("clear",)
+#output_conflict_strategies = transient_conflict_strategies + ("use-all",)
+
+
+# Sentinel object
+Deleted = object()
+
+def __patch_item(value, diffentry):
+    op = diffentry.op
+    if op == Diff.REPLACE:
+        return diffentry.value
+    elif op == Diff.PATCH:
+        return patch(value, diffentry.diff)
+    elif op == Diff.REMOVE:
+        return Deleted
+    else:
+        raise ValueError("Invalid item patch op {}".format(op))
+
+def __make_join_diffentry(value, le, re):
+    # Joining e.g. an outputs list means concatenating all items
+    lvalue = patch_item(value, le)
+    rvalue = patch_item(value, re)
+
+    if lvalue is Deleted:
+        lvalue = []
+    if rvalue is Deleted:
+        rvalue = []
+    newvalue = value + lvalue + rvalue
+    e = FIXME
+    return e
+
+def __make_inline_diffentry(value, le, re):
+    # FIXME implement
+    e = FIXME
+    return e
+
+
+
+def cleared_value(value):
+    if isinstance(value, list):
+        # Clearing e.g. an outputs list means setting it to an empty list
+        return []
+    elif isinstance(value, dict):
+        # Clearing e.g. a metadata dict means setting it to an empty dict
+        return {}
+    elif isinstance(value, string_types):
+        # Clearing e.g. a source string means setting it to an empty string
+        return ""
+    else:
+        # Clearing anything else (atomic values) means setting it to None
+        return None
+
+
+def resolve_single_conflict(value, le, re, strategy, path):
+    assert le.key == re.key
+
+    if strategy == "fail":
+        raise RuntimeError("Not expecting a conflict at path {}.".format(path))
+
+    elif strategy == "mergetool":
+        e, le, re = None, le, re
+
+    elif strategy == "use-base":
+        e, le, re = None, None, None
+
+    elif strategy == "use-local":
+        e, le, re = le, None, None
+
+    elif strategy == "use-remote":
+        e, le, re = re, None, None
+
+    elif strategy == "clear":
+        v = cleared_value(value)
+        e = make_op(Diff.REPLACE, le.key, v)
+        le, re = None, None
+
+    # FIXME: Implement
+    #elif strategy == "inline":
+    #    e = make_inline_diffentry(value, le, re)
+    #    le, re = None
+
+    # FIXME: Implement
+    #elif strategy == "join":
+    #    e = make_join_diffentry(value, le, re)
+    #    le, re = None
+
+    else:
+        raise RuntimeError("Invalid strategy {}.".format(strategy))
+
+    return e, le, re
+
+
+def autoresolve_lists(merged, lcd, rcd, strategies, path):
+    key = "*"
+    subpath = "/".join((path, key))
+    strategy = strategies.get(subpath)
+
+    n = len(merged)
+    local = decompress_sequence_diff(lcd, n)
+    remote = decompress_sequence_diff(rcd, n)
+
+    resolutions = SequenceDiff()
+    newlcd = SequenceDiff()
+    newrcd = SequenceDiff()
+    for key, value in enumerate(merged):
+        # Figure out what lcd and rcd wants to do with merged[key]
+        le = local[key]
+        re = remote[key]
+
+        assert (le.op == Diff._KEEP) == (re.op == Diff._KEEP)
+
+        if le.op == Diff._KEEP or re.op == Diff._KEEP:
+            # Skip items without conflict
+            pass
+        elif strategy is not None:
+            # Autoresolve conflicts for this key
+            e, le, re = resolve_single_conflict(value, le, re, strategy, subpath)
+            if e is not None:
+                resolutions.append(e)
+            if le is not None:
+                newlcd.append(le)
+            if re is not None:
+                newrcd.append(re)
+        elif le.op == Diff.PATCH and re.op == Diff.PATCH:
+            # Recurse if we have no strategy for this key but diffs available for the subdocument
+            di, ldi, rdi = autoresolve(value, le.diff, re.diff, strategies, subpath)
+            if di:
+                resolutions.patch(key, di)
+            if ldi:
+                newlcd.patch(key, ldi)
+            if rdi:
+                newrcd.patch(key, rdi)
+        else:
+            # Alternatives if we don't have PATCH, are:
+            #  - INSERT: not happening
+            #  - REPLACE: technically possible, if so we can can convert it to PATCH, but does it happen?
+            #  - REMOVE: more likely, but resolving subdocument diff will still leave us with a full conflict on parent here
+            # No resolution, keep conflicts le, re
+            newlcd.append(le)
+            newrcd.append(re)
+
+    return resolutions.diff, newlcd.diff, newrcd.diff
+
+
+def autoresolve_dicts(merged, lcd, rcd, strategies, path):
+    # Converting to dict-based diff format for dicts for convenience
+    # This step will be unnecessary if we change the diff format to work this way always
+    lcd = as_dict_based_diff(lcd)
+    rcd = as_dict_based_diff(rcd)
+
+    # We can't have a one-sided conflict so keys must match
+    assert set(lcd) == set(rcd)
+
+    resolutions = MappingDiff()
+    newlcd = MappingDiff()
+    newrcd = MappingDiff()
+
+    for key in sorted(lcd):
+        # Query out how to handle conflicts in this part of the document
+        subpath = "/".join((path, key))
+        strategy = strategies.get(subpath)
+
+        # Get value and conflicts
+        value = merged[key]
+        le = lcd[key]
+        re = rcd[key]
+        assert le.key == key
+        assert re.key == key
+
+        if strategy is not None:
+            # Autoresolve conflicts for this key
+            e, le, re = resolve_single_conflict(value, le, re, strategy, subpath)
+            if e is not None:
+                resolutions.append(e)
+            if le is not None:
+                newlcd.append(le)
+            if re is not None:
+                newrcd.append(re)
+        elif le.op == Diff.PATCH and re.op == Diff.PATCH:
+            # Recurse if we have no strategy for this key but diffs available for the subdocument
+            di, ldi, rdi = autoresolve(value, le.diff, re.diff, strategies, subpath)
+            if di:
+                resolutions.patch(key, di)
+            if ldi:
+                newlcd.patch(key, ldi)
+            if rdi:
+                newrcd.patch(key, rdi)
+        else:
+            # Alternatives if we don't have PATCH, are:
+            #  - INSERT: not happening
+            #  - REPLACE: technically possible, if so we can can convert it to PATCH, but does it happen?
+            #  - REMOVE: more likely, but resolving subdocument diff will still leave us with a full conflict on parent here
+            # No resolution, keep conflicts le, re
+            newlcd.append(le)
+            newrcd.append(re)
+
+    return resolutions.diff, newlcd.diff, newrcd.diff
+
+
+def autoresolve(merged, local_diff, remote_diff, strategies, path):
+    """
+    Returns: resolution_diff, unresolved_local_diff, unresolved_remote_diff
+    """
+    if isinstance(merged, dict):
+        return autoresolve_dicts(merged, local_diff, remote_diff, strategies, path)
+    elif isinstance(merged, list):
+        return autoresolve_lists(merged, local_diff, remote_diff, strategies, path)
+    else:
+        raise RuntimeError("Invalid merged type {} at path {}".format(type(merged).__name__), path)