From 3c148e2ee7f244fccd246365010055dcc946abe0 Mon Sep 17 00:00:00 2001 From: Stuart Halloway Date: Tue, 12 Oct 2010 13:12:40 -0400 Subject: [PATCH] #448 structural diff Signed-off-by: Stuart Halloway --- build.xml | 1 + src/clj/clojure/data.clj | 108 +++++++++++++++++++++++++++++ test/clojure/test_clojure.clj | 1 + test/clojure/test_clojure/data.clj | 27 ++++++++ 4 files changed, 137 insertions(+) create mode 100644 src/clj/clojure/data.clj create mode 100644 test/clojure/test_clojure/data.clj diff --git a/build.xml b/build.xml index 1fc9cc9429..2cb386501e 100644 --- a/build.xml +++ b/build.xml @@ -119,6 +119,7 @@ + diff --git a/src/clj/clojure/data.clj b/src/clj/clojure/data.clj new file mode 100644 index 0000000000..fc96204997 --- /dev/null +++ b/src/clj/clojure/data.clj @@ -0,0 +1,108 @@ +; Copyright (c) Rich Hickey. All rights reserved. +; The use and distribution terms for this software are covered by the +; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +; which can be found in the file epl-v10.html at the root of this distribution. +; By using this software in any fashion, you are agreeing to be bound by +; the terms of this license. +; You must not remove this notice, or any other, from this software. + +(ns + ^{:author "Stuart Halloway", + :doc "Non-core data functions."} + clojure.data + (:require [clojure.set :as set])) + +(defn- atom-diff + "Internal helper for diff." + [a b] + (if (= a b) [nil nil a] [a b nil])) + +;; for big things a sparse vector class would be better +(defn- vectorize + "Convert an associative-by-numeric-index collection into + an equivalent vector, with nil for any missing keys" + [m] + (when (seq m) + (reduce + (fn [result [k v]] (assoc result k v)) + (vec (repeat (apply max (keys m)) nil)) + m))) + +(declare diff) + +(defprotocol EqualityPartition + "Implementation detail. Subject to change." + (equality-partition [x] "Implementation detail. Subject to change.")) + +(defprotocol Diff + "Implementation detail. Subject to change." + (diff-similar [a b] "Implementation detail. Subject to change.")) + +(extend nil + Diff + {:diff-similar atom-diff}) + +(extend Object + Diff + {:diff-similar atom-diff} + EqualityPartition + {:equality-partition (fn [x] (if (.. x getClass isArray) :sequential :atom))}) + +(defn- diff-associative + "Diff associative things a and b, comparing only keys in ks." + [a b ks] + (reduce + (fn [diff1 diff2] + (map merge diff1 diff2)) + [nil nil nil] + (map + (fn [k] (map #(when % {k %}) (diff (get a k) (get b k)))) + ks))) + +(extend-protocol EqualityPartition + nil + (equality-partition [x] :atom) + + java.util.Set + (equality-partition [x] :set) + + java.util.List + (equality-partition [x] :sequential) + + java.util.Map + (equality-partition [x] :map)) + +(extend-protocol Diff + java.util.Set + (diff-similar [a b] + [(not-empty (set/difference a b)) + (not-empty (set/difference b a)) + (not-empty (set/intersection a b))]) + + java.util.List + (diff-similar [a b] + (vec (map vectorize (diff-associative + (if (vector? a) a (vec a)) + (if (vector? b) b (vec b)) + (range (max (count a) (count b))))))) + + java.util.Map + (diff-similar [a b] + (diff-associative a b (set/union (keys a) (keys b))))) + +(defn diff + "Recursively compares a and b, returning a tuple of + [things-only-in-a things-only-in-b things-in-both]. + Comparison rules: + + * Maps are subdiffed where keys match and values differ. + * Sets are never subdiffed. + * All sequential things are treated as associative collections + by their indexes, with results returned as vectors. + * Everything else (including strings!) is treated as + an atom and compared for equality." + [a b] + (if (= (equality-partition a) (equality-partition b)) + (diff-similar a b) + (atom-diff a b))) + diff --git a/test/clojure/test_clojure.clj b/test/clojure/test_clojure.clj index fa482d88c8..a82ed0ae47 100644 --- a/test/clojure/test_clojure.clj +++ b/test/clojure/test_clojure.clj @@ -64,6 +64,7 @@ :transients :def :keywords + :data ]) (def test-namespaces diff --git a/test/clojure/test_clojure/data.clj b/test/clojure/test_clojure/data.clj new file mode 100644 index 0000000000..5c96c86db0 --- /dev/null +++ b/test/clojure/test_clojure/data.clj @@ -0,0 +1,27 @@ +; Copyright (c) Rich Hickey. All rights reserved. +; The use and distribution terms for this software are covered by the +; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +; which can be found in the file epl-v10.html at the root of this distribution. +; By using this software in any fashion, you are agreeing to be bound by +; the terms of this license. +; You must not remove this notice, or any other, from this software. + +(ns clojure.test-clojure.data + (:use clojure.data clojure.test)) + +(deftest diff-test + (are [d x y] (= d (diff x y)) + [nil nil nil] nil nil + [1 2 nil] 1 2 + [nil nil [1 2 3]] [1 2 3] '(1 2 3) + [1 [:a :b] nil] 1 [:a :b] + [{:a 1} :b nil] {:a 1} :b + [:team #{:p1 :p2} nil] :team #{:p1 :p2} + [{0 :a} [:a] nil] {0 :a} [:a] + [nil [nil 2] [1]] [1] [1 2] + [nil nil [1 2]] [1 2] (into-array [1 2]) + [#{:a} #{:b} #{:c :d}] #{:a :c :d} #{:b :c :d} + [nil nil {:a 1}] {:a 1} {:a 1} + [{:a #{2}} {:a #{4}} {:a #{3}}] {:a #{2 3}} {:a #{3 4}} + [{:a {:c [1]}} {:a {:c [0]}} {:a {:c [nil 2] :b 1}}] {:a {:b 1 :c [1 2]}} {:a {:b 1 :c [0 2]}})) +