Skip to content

Commit

Permalink
Introduce "enums" for diff entry actions.
Browse files Browse the repository at this point in the history
Also use custom exception for better error capturing.
  • Loading branch information
Martin Sandve Alnæs committed Dec 2, 2015
1 parent a9c66c6 commit 6ac8994
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 182 deletions.
112 changes: 68 additions & 44 deletions nbdime/dformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,90 +6,111 @@
from six import string_types
from six.moves import xrange as range

from .log import error
from .log import error, NBDiffFormatError

# Valid values for the action field in diff entries
PATCH = u"!"
INSERT = u"+"
DELETE = u"-"
REPLACE = u":"
SEQINSERT = u"++"
SEQDELETE = u"--"
SEQREPLACE = u"::"

ACTIONS = [
PATCH,
INSERT,
DELETE,
REPLACE,
SEQINSERT,
SEQDELETE,
SEQREPLACE,
]

sequence_types = string_types + (list,)

def error_invalid_diff_entry(s):
error("Invalid diff entry '{}'.".format(s))

def is_valid_diff(diff, deep=False):
try:
validate_diff(diff, deep=deep)
result = True
except:
except NBDiffFormatError as e:
result = False
return result


def validate_diff(diff, deep=False):
if not isinstance(diff, list):
error("Diff must be a list.")
raise NBDiffFormatError("Diff must be a list.")
for s in diff:
validate_diff_entry(s, deep=deep)



def validate_diff_entry(s, deep=False):
"""Check that s is a well formed diff entry.
The diff entry format is a list
s[0] # action (one of "+" (insert), "-" (delete), ":" (replace), "!" (patch))
s[0] # action (one of PATCH, INSERT, DELETE, REPLACE)
s[1] # key (str for diff of dict, int for diff of sequence (list or str))
s[2] # action specific argument, omitted if action is "-"
Additional experimental sequence actions "++", "--", "::" are also allowed.
For sequences (lists and strings) the actions
SEQINSERT, SEQDELETE, SEQREPLACE are also allowed.
"""
# Sequence types, allowing both list and string in sequence insert ("++")
sequence = string_types + (list,)

# Entry is always a list with 3 items, or 2 in the special case of single item deletion
if not isinstance(s, list):
error("Diff entry '{}' is not a list.".format(s))
raise NBDiffFormatError("Diff entry '{}' is not a list.".format(s))
n = len(s)
if not (n == 3 or (n == 2 and s[0] == "-")):
error("Diff entry '{}' has the wrong size.".format(s))
if not (n == 3 or (n == 2 and s[0] == DELETE)):
raise NBDiffFormatError("Diff entry '{}' has the wrong size.".format(s))

# Check key (list or str uses int key, dict uses str key)
is_sequence = isinstance(s[1], int)
is_mapping = isinstance(s[1], string_types)
if not (is_sequence or is_mapping):
error("Diff entry key '{}' has type '{}', expecting int or unicode/str.".format(s[1], type(s[1])))
raise NBDiffFormatError("Diff entry key '{}' has type '{}', expecting int or unicode/str.".format(s[1], type(s[1])))

# Experimental sequence diff actions ++, --, :: are not valid for mapping diffs
if is_mapping and len(s[0]) > 1:
error("Diff action '{}' only valid in diff of sequence.".format(s[0]))
raise NBDiffFormatError("Diff action '{}' only valid in diff of sequence.".format(s[0]))

if s[0] == "+":
if s[0] == INSERT:
# s[2] is a single value to insert at key
pass
elif s[0] == "-":
elif s[0] == DELETE:
# no s[2] argument
pass
elif s[0] == ":":
elif s[0] == REPLACE:
# s[2] is a single value to replace value at key with
pass
elif s[0] == "!":
elif s[0] == PATCH:
# s[2] is itself a diff, check it recursively if the "deep" argument is true
# (the "deep" argument is here to avoid recursion and potential O(>n) performance pitfalls)
if deep:
validate_diff(s[2], deep=deep)
# Experimental sequence diff actions
elif s[0] == "++":
elif s[0] == SEQINSERT:
# For sequence insert, s[2] is a list of values to insert.
if not isinstance(s[2], sequence):
error("Diff sequence insert expects list of values, not '{}'.".format(s[2]))
elif s[0] == "--":
if not isinstance(s[2], sequence_types):
raise NBDiffFormatError("Diff sequence insert expects list of values, not '{}'.".format(s[2]))
elif s[0] == SEQDELETE:
# s[2] is the number of items to delete from sequence
if not isinstance(s[2], int):
error("Diff sequence delete expects integer number of values, not '{}'.".format(s[2]))
elif s[0] == "::":
raise NBDiffFormatError("Diff sequence delete expects integer number of values, not '{}'.".format(s[2]))
elif s[0] == SEQREPLACE:
# For sequence replace, s[2] is a list of values to
# replace the next len(s[2]) values starting at key.
if not isinstance(s[2], sequence):
error("Diff sequence replace expects list of values, not '{}'.".format(s[2]))
if not isinstance(s[2], sequence_types):
raise NBDiffFormatError("Diff sequence replace expects list of values, not '{}'.".format(s[2]))
else:
# Unknown action
error("Unknown diff action '{}'.".format(s[0]))
raise NBDiffFormatError("Unknown diff action '{}'.".format(s[0]))

# Note that false positives are possible, for example
# we're not checking the values in any way


def decompress_diff(sequence_diff):
"""Split all sequence diff actions ++,--,:: into single-line actions +,-,:.
Expand All @@ -98,44 +119,47 @@ def decompress_diff(sequence_diff):
d = []
for s in sequence_diff:
action = s[0]
if action in ("+", "-", ":", "!"):
if action in (INSERT, DELETE, REPLACE, PATCH):
d.append(s)
elif action == "++":
elif action == SEQINSERT:
for i, v in enumerate(s[2]):
d.append(["+", s[1], v])
elif action == "--":
d.append([INSERT, s[1], v])
elif action == SEQDELETE:
for i in range(s[2]):
d.append(["-", s[1] + i])
elif action == "::":
d.append([DELETE, s[1] + i])
elif action == SEQREPLACE:
for i, v in enumerate(s[2]):
d.append([":", s[1] + i, v])
d.append([REPLACE, s[1] + i, v])
else:
raise RuntimeError("Invalid action '{}'".format(action))
raise NBDiffFormatError("Invalid action '{}'".format(action))
return d


#def compress_diff(diff):
# """Combine contiguous single-line actions +,-,: into sequence diff actions ++,--,:: everywhere."""
# TODO


def count_consumed_symbols(e):
"Count how many symbols are consumed from each sequence by a single sequence diff entry."
action = e[0]
if action == "+":
if action == INSERT:
return 0, 1
elif action == "-":
elif action == DELETE:
return 1, 0
elif action == ":":
elif action == REPLACE:
return 1, 1
elif action == "!":
elif action == PATCH:
return 1, 1
elif action == "++":
elif action == SEQINSERT:
return 0, len(e[2])
elif action == "--":
elif action == SEQDELETE:
return e[2], 0
elif action == "::":
elif action == SEQREPLACE:
return len(e[2]), len(e[2])
else:
error_invalid_diff_entry(e)
raise NBDiffFormatError("Invalid action '{}'".format(action))


def get_equal_ranges(a, b, d):
"Return list of tuples [(i,j,n)] such that a[i:i+n] == b[j:j+n] given a diff d of sequences a and b."
Expand Down
13 changes: 7 additions & 6 deletions nbdime/diffing/deep.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from six.moves import xrange as range
import operator

from ..dformat import PATCH, INSERT, DELETE, REPLACE, SEQINSERT, SEQDELETE, SEQREPLACE
from ..dformat import validate_diff, count_consumed_symbols
from .sequences import diff_strings, diff_sequence

Expand Down Expand Up @@ -41,7 +42,7 @@ def deep_diff_lists(a, b, compare=operator.__eq__, shallow_diff=None):
if not is_atomic(aval):
d = deep_diff(aval, bval, compare)
if d:
pdi.append(["!", acons+i, d])
pdi.append([PATCH, acons+i, d])

# Count consumed items
askip, bskip = count_consumed_symbols(e)
Expand All @@ -62,7 +63,7 @@ def deep_diff_lists(a, b, compare=operator.__eq__, shallow_diff=None):
if not is_atomic(aval):
d = deep_diff(aval, bval, compare)
if d:
pdi.append(["!", acons+i, d])
pdi.append([PATCH, acons+i, d])

# Sanity check
acons += n
Expand Down Expand Up @@ -93,7 +94,7 @@ def deep_diff_dicts(a, b, compare=operator.__eq__):

# Delete keys in a but not in b
for key in sorted(akeys - bkeys):
d.append(["-", key])
d.append([DELETE, key])

# Handle values for keys in both a and b
for key in sorted(akeys & bkeys):
Expand All @@ -104,15 +105,15 @@ def deep_diff_dicts(a, b, compare=operator.__eq__):
dd = deep_diff(avalue, bvalue, compare)
if dd:
# Patch value at key with nonzero diff dd
d.append(["!", key, dd])
d.append([PATCH, key, dd])
else:
if not compare(avalue, bvalue): # TODO: Use != or not compare() here?
# Replace value at key with bvalue
d.append([":", key, bvalue])
d.append([REPLACE, key, bvalue])

# Add keys in b but not in a
for key in sorted(bkeys - akeys):
d.append(["+", key, b[key]])
d.append([INSERT, key, b[key]])

return d

Expand Down
27 changes: 14 additions & 13 deletions nbdime/diffing/linebasedcelldiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import copy
import nbformat

from ..dformat import PATCH, INSERT, DELETE, REPLACE, SEQINSERT, SEQDELETE, SEQREPLACE
from ..dformat import decompress_diff
from ..patching import patch

Expand Down Expand Up @@ -67,26 +68,26 @@ def build_line_to_line_number_mapping(lines_a, lines_b, line_diff):
assert a2b[aln] is None
assert b2a[bln] is None

if action == "+":
if action == INSERT:
# Line s[2] inserted before lines_a[s[1]]
assert s[1] == aln
assert s[2] is lines_b[bln]
b2a[bln] = -1
bln += 1
elif action == "-":
elif action == DELETE:
# Line lines_a[s[1]] deleted
assert s[1] == aln
a2b[aln] = -1
aln += 1
elif action == ":":
elif action == REPLACE:
# Line lines_a[s[1]] replaced with s[2]
assert s[1] == aln
assert s[2] is lines_b[bln]
a2b[aln] = bln
b2a[bln] = aln
aln += 1
bln += 1
elif action == "!":
elif action == PATCH:
# Line lines_a[s[1]] patched with diff s[2] to produce lines_b[bln]
# (I don't think this occurs at the time being)
assert s[1] == aln
Expand Down Expand Up @@ -140,7 +141,7 @@ def build_cell_to_cell_numbers_mapping(a2bc, b2ac,
for ac, nbcs in enumerate(ac2nbcs):
# Delete cell ac if it doesn't map to any cell in b
if nbcs == 0:
d.append(["-", ac])
d.append([DELETE, ac])
continue

# Insert cells from b
Expand Down Expand Up @@ -232,7 +233,7 @@ def diff_cells_linebased(cells_a, cells_b):
line_number_a = s[1]
cell_number_a = origin_cell_numbers_a[line_number_a]
local_line_number_a = line_number_a - cell_offsets_a[cell_number_a]
if act != "-":
if act != DELETE:
line_number_b = s[2]
cell_number_b = origin_cell_numbers_b[line_number_b]
local_line_number_b = line_number_b - cell_offsets_b[cell_number_b]
Expand All @@ -242,19 +243,19 @@ def diff_cells_linebased(cells_a, cells_b):
# TODO: Do something about that? Or use include_equals in diff_lines call?
# Translate line diff action to cell diff action
if act == "-":
if act == DELETE:
# Delete line from cell
# TODO: Figure out when to delete cell itself
t = ["-", [cell_number_a, local_line_number_a]]
elif act == "+":
t = [DELETE, [cell_number_a, local_line_number_a]]
elif act == INSERT:
# TODO: Figure out when to add new cell
# Add line to cell
#value = s[2]
t = ["+", [cell_number_a, local_line_number_a], [cell_number_b, local_line_number_b]]
elif act == "!":
t = [INSERT, [cell_number_a, local_line_number_a], [cell_number_b, local_line_number_b]]
elif act == PATCH:
# FIXME:
value = s[2]
t = ["!", [cell_number_a, local_line_number_a], [cell_number_b, local_line_number_b]]
t = [PATCH, [cell_number_a, local_line_number_a], [cell_number_b, local_line_number_b]]
else:
raise RuntimeError("Invalid diff action {}.".format(act))
Expand All @@ -270,7 +271,7 @@ def diff_cells_linebased(cells_a, cells_b):
# Keep track of the last cell and line we handled in a and b
prev_line_a = line_number_a
prev_cell_a = cell_number_a
if act != "-":
if act != DELETE:
prev_line_b = line_number_b
prev_cell_b = cell_number_b
'''
Expand Down
3 changes: 2 additions & 1 deletion nbdime/diffing/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import operator

from ..dformat import PATCH, INSERT, DELETE, REPLACE, SEQINSERT, SEQDELETE, SEQREPLACE
from ..dformat import decompress_diff

from .comparing import strings_are_similar
Expand Down Expand Up @@ -63,6 +64,6 @@ def diff_notebooks(nba, nbb):
# Then add specialized cells diff
cdiff = diff_cells(acells, bcells)
if cdiff:
nbdiff.append(["!", "cells", cdiff])
nbdiff.append([PATCH, "cells", cdiff])

return nbdiff

0 comments on commit 6ac8994

Please sign in to comment.