diff --git a/.ditz/issue-9465e033d968b2c64f02b71bce62d5a0323a60be.yaml b/.ditz/issue-9465e033d968b2c64f02b71bce62d5a0323a60be.yaml index 4f052d1..57457f2 100644 --- a/.ditz/issue-9465e033d968b2c64f02b71bce62d5a0323a60be.yaml +++ b/.ditz/issue-9465e033d968b2c64f02b71bce62d5a0323a60be.yaml @@ -7,8 +7,8 @@ type: :bugfix component: json_diff release: reporter: Matej Cepl -status: :unstarted -disposition: +status: :closed +disposition: :fixed creation_time: 2011-10-10 21:31:46.437278 Z references: [] @@ -18,3 +18,14 @@ log_events: - Matej Cepl - created - "" +- - 2011-10-24 21:25:12.273479 Z + - Matej Cepl + - closed with disposition fixed + - |- + Fixed as part of larger first working version. + Programmatically fill excuded_attributes tuple, or add (even repeatedly) + -x parameter to the command line. So, + + json_diff.py -x spam old.json new.json + + Ignores all mentions of the horrendous stuff. diff --git a/hinnerup b/hinnerup index 99e4146..5dd7de3 160000 --- a/hinnerup +++ b/hinnerup @@ -1 +1 @@ -Subproject commit 99e4146bc3f49d19b9769f2e1f4e9870514acb94 +Subproject commit 5dd7de36d0b636c25aebf72d97c1ac2e618e8a1f diff --git a/json_diff.js b/json_diff.js deleted file mode 100755 index aa18561..0000000 --- a/json_diff.js +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/rhino -debug -var jsonBoxA, jsonBoxB; - -// compareTree(objA, objB, "root", results); - -function JSONDiff(fn1, fn2) { - this.obj1 = JSON.parse(readFile(fn1)); - this.obj2 = JSON.parse(readFile(fn2)); -} - -/** - * Compare two objects recursively - * - * - * For example, comparing object A - * - * { "a": 1, - * "b": 2, - * "son" : { - * "name": "Janošek" - * } - * } - * - * and - * - * { "a": 2, - * "c": 3 - * "daughter" : { - * "name": "Maruška" - * } - * } - * - * we get - * - * { - * "insert": [ - * { "c": 3 }, - * { - * "daughter" : { - * "name": "Maruška" - * } - * } - * ], - * "delete": [ - * { "b": 2 }, - * { - * "son" : { - * "name": "Janošek" - * } - * } - * ], - * "update": { "a": 2 } - * ] - * } - */ -JSONDiff.prototype.compareTree = function compareTree(a, b, name) { - function typeofReal(value) { - return Array.isArray(value) ? "array" : typeof value; - } - - function isScalar(value) { - var typeStr = typeofReal(value); - return !((typeStr == "array") || (typeStr == "object")); - } - - var equal = false; - var elements = {}; - - for (var key in a) { - if a.hasOwnProperty(key) { - elements[key] = null; - } - } - for (var key in b) { - if b.hasOwnProperty(key) { - elements[key] = null; - } - } - -// print("compareTree: name = " + name); - var typeA = typeofReal(a); - var typeB = typeofReal(b); - - if (typeA !== typeB) { - // There is not much to be done when the objects are not of - // the same type - return { - 'deleted': a, - 'inserted': b - } - } - - // Now we have both objects of the same type, so - // we can evaluate just type of one - // If it is array ... - if (typeA === "array") { - var results = { - 'updated': {} - }; - var maxLen = a.length > b.length ? a.length : b.length; - for (var i = 0; i < maxLen; i++) { - if (isScalar(a[i]) && isScalar(b[i])) { - if (a[i] !== b[i]) { - results['updated'][i] = b[i]; - } - } - } - } - - if (typeA === "object") { - } - -/* -two trees are equal when: -- they have same keys, -- properties of the same keys have same values -==== -if keys are not same, then whole subobject ==> ADDED/DELETED -if property values are not same && value is scalar, ==> UPDATED -if trees are not same, go one level down and compare two siblings - */ - - if (a === undefined) { - this.results['inserted'].push(b); - } - else if (b === undefined) { - this.results['deleted'].push(a); - } - else if (typeA !== typeB || (typeA !== "object" && typeA !== "array" && a !== b)) { - this.results['updated'].push(b); - } - - if (typeA === "object" || typeA === "array" || typeB === "object" || typeB === "array") { - var keys = []; - for (var i in a) { - if (a.hasOwnProperty(i)) { - keys.push(i); - } - } - for (var i in b) { - if (b.hasOwnProperty(i)) { - keys.push(i); - } - } - keys.sort(); - - for (var i = 0; i < keys.length; i++) { - if (keys[i] === keys[i-1]) { - continue; - } - this.compareTree(a && a[keys[i]], b && b[keys[i]], keys[i]); - } - } -}; - -JSONDiff.prototype.diff = function diff() { - this.compareTree(this.obj1, this.obj2, "root"); - return this.results; -}; - -if (arguments.length == 2) { - var diffObj = new JSONDiff(arguments[0], arguments[1]); -// print(diffObj); - var diff = diffObj.diff(); - print (JSON.stringify(diff)); -} - - -/* vim: set ts=2 et sw=2 tw=80: */ diff --git a/json_diff.py b/json_diff.py new file mode 100644 index 0000000..1dadbf5 --- /dev/null +++ b/json_diff.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +Script for comparing two objects +""" +import json +from optparse import OptionParser +import logging + +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.INFO) + +class Comparator(object): + """ + Main workhorse, the object itself + """ + def __init__(self, fn1=None, fn2=None, excluded_attrs=()): + if fn1: + self.obj1 = json.load(fn1) + if fn2: + self.obj2 = json.load(fn2) + self.excluded_attributes = excluded_attrs + if (fn1 and fn2): + logging.debug("self.obj1 = %s\nself.obj2 = %s\nself.excluded_attrs = %s", \ + (self.obj1, self.obj2, self.excluded_attributes)) + + @staticmethod + def _get_keys(obj): + """ + Getter for the current object's keys. + """ + out = set() + for key in obj.keys(): + out.add(key) + return out + + @staticmethod + def _is_scalar(value): + """ + Primitive version, relying on the fact that JSON cannot + contain any more complicated data structures. + """ + return not isinstance(value, (list, tuple, dict)) + + def _compare_arrays(self, old_arr, new_arr): + inters = min(old_arr, new_arr) + + result = { + u"append": {}, + u"remove": {}, + u"update": {} + } + for idx in range(len(inters)): + # changed objects, new value is new_arr + if (type(old_arr[idx]) != type(new_arr[idx])): + result['update'][idx] = new_arr[idx] + # another simple variant ... scalars + elif (self._is_scalar(old_arr)): + if old_arr[idx] != new_arr[idx]: + result['update'][idx] = new_arr[idx] + # recursive arrays + elif (isinstance(old_arr[idx], list)): + res_arr = self._compare_arrays(old_arr[idx], \ + new_arr[idx]) + if (len(res_arr) > 0): + result['update'][idx] = res_arr + # and now nested dicts + elif isinstance(old_arr[idx], dict): + res_dict = self.compare_dicts(old_arr[idx], new_arr[idx]) + if (len(res_dict) > 0): + result['update'][idx] = res_dict + + # Clear out unused inters in result + out_result = {} + for key in result: + if len(result[key]) > 0: + out_result[key] = result[key] + + return out_result + + def compare_dicts(self, old_obj=None, new_obj=None): + """ + The real workhorse + """ + if not old_obj and hasattr(self, "obj1"): + old_obj = self.obj1 + if not new_obj and hasattr(self, "obj2"): + new_obj = self.obj2 + + old_keys = set() + new_keys = set() + if old_obj and len(old_obj) > 0: + old_keys = self._get_keys(old_obj) + if new_obj and len(new_obj) > 0: + new_keys = self._get_keys(new_obj) + + keys = old_keys | new_keys + + result = { + u"append": {}, + u"remove": {}, + u"update": {} + } + for name in keys: + # Explicitly excluded arguments + if (name in self.excluded_attributes): + continue + # old_obj is missing + if name not in old_obj: + result['append'][name] = new_obj[name] + # new_obj is missing + elif name not in new_obj: + result['remove'][name] = old_obj[name] + # changed objects, new value is new_obj + elif (type(old_obj[name]) != type(new_obj[name])): + result['update'][name] = new_obj[name] + # last simple variant ... scalars + elif (self._is_scalar(old_obj[name])): + if old_obj[name] != new_obj[name]: + result['update'][name] = new_obj[name] + # now arrays + elif (isinstance(old_obj[name], list)): + res_arr = self._compare_arrays(old_obj[name], \ + new_obj[name]) + if (len(res_arr) > 0): + result['update'][name] = res_arr + # and now nested dicts + elif isinstance(old_obj[name], dict): + res_dict = self.compare_dicts(old_obj[name], new_obj[name]) + if (len(res_dict) > 0): + result['update'][name] = res_dict + + # Clear out unused keys in result + out_result = {} + for key in result: + if len(result[key]) > 0: + out_result[key] = result[key] + + return out_result + + +if __name__ == "__main__": + usage = "usage: %prog [options] old.json new.json" + parser = OptionParser(usage=usage) + parser.add_option("-x", "--exclude", + action="append", dest="exclude", metavar="ATTR", default=[], + help="attributes which should be ignored when comparing") + (options, args) = parser.parse_args() + logging.debug("options = %s", str(options)) + logging.debug("args = %s", str(args)) + if len(args) != 2: + parser.error("Script requires two positional arguments, names for old and new JSON file.") + + diff = Comparator(file(args[0]), file(args[1]), options.exclude) + print json.dumps(diff.compare_dicts(), indent=4, ensure_ascii=False) \ No newline at end of file diff --git a/test_json_diff.py b/test_json_diff.py new file mode 100644 index 0000000..3a514eb --- /dev/null +++ b/test_json_diff.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +""" +PyUnit unit tests +""" +import unittest +import json +import json_diff +from StringIO import StringIO + +SIMPLE_OLD = u""" +{ + "a": 1, + "b": true, + "c": "Janošek" +} +""" + +SIMPLE_NEW = u""" +{ + "b": false, + "c": "Maruška", + "d": "přidáno" +} +""" + +SIMPLE_DIFF = u""" +{ + "append": { + "d": "přidáno" + }, + "remove": { + "a": 1 + }, + "update": { + "c": "Maruška", + "b": false + } +} +""" + +NESTED_OLD = u""" +{ + "a": 1, + "b": 2, + "son": { + "name": "Janošek" + } +} +""" + +NESTED_NEW = u""" +{ + "a": 2, + "c": 3, + "daughter": { + "name": "Maruška" + } +} +""" + +NESTED_DIFF = u""" +{ + "append": { + "c": 3, + "daughter": { + "name": "Maruška" + } + }, + "remove": { + "b": 2, + "son": { + "name": "Janošek" + } + }, + "update": { + "a": 2 + } +} +""" + +class TestXorgAnalyze(unittest.TestCase): + def test_empty(self): + diffator = json_diff.Comparator({}, {}) + diff = diffator.compare_dicts() + self.assertEqual(json.dumps(diff).strip(), "{}", \ + "Empty objects diff.\n\nexpected = %s\n\nobserved = %s" % \ + (str({}), str(diff))) + + def test_simple(self): + diffator = json_diff.Comparator(StringIO(SIMPLE_OLD), StringIO(SIMPLE_NEW)) + diff = diffator.compare_dicts() + expected = json.loads(SIMPLE_DIFF) + self.assertEqual(diff, expected, "All-scalar objects diff." + \ + "\n\nexpected = %s\n\nobserved = %s" % \ + (str(expected), str(diff))) + + def test_realFile(self): + diffator = json_diff.Comparator(open("test/old.json"), open("test/new.json")) + diff = diffator.compare_dicts() + expected = json.load(open("test/diff.json")) + self.assertEqual(diff, expected, "Simply nested objects (from file) diff." + \ + "\n\nexpected = %s\n\nobserved = %s" % \ + (str(expected), str(diff))) + + def test_nested(self): + diffator = json_diff.Comparator(StringIO(NESTED_OLD), StringIO(NESTED_NEW)) + diff = diffator.compare_dicts() + expected = json.loads(NESTED_DIFF) + self.assertEqual(diff, expected, "Nested objects diff. " + \ + "\n\nexpected = %s\n\nobserved = %s" % \ + (str(expected), str(diff))) + def test_large_with_exclusions(self): + diffator = json_diff.Comparator(open("test/old-testing-data.json"), \ + open("test/new-testing-data.json"), ('command', 'time')) + diff = diffator.compare_dicts() + expected = json.load(open("test/diff-testing-data.json")) + self.assertEqual(diff, expected, "Large objects with exclusions diff." + \ + "\n\nexpected = %s\n\nobserved = %s" % \ + (str(expected), str(diff))) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file