-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
a_star.cljc
564 lines (510 loc) · 17.6 KB
/
a_star.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
;;
;; Copyright (c) Huahai Yang. All rights reserved.
;; The use and distribution terms for this software are covered by the
;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file LICENSE at the root of this distribution.
;; By using this software in any fashion, you are agreeing to be bound by
;; the terms of this license.
;; You must not remove this notice, or any other, from this software.
;;
(ns editscript.diff.a-star
(:require [editscript.edit :as e]
[editscript.diff.quick :as q]
[editscript.util.pairing :as pa]
[editscript.util.common :as co]
#?(:cljs [goog.math.Long]))
#?(:clj (:import [clojure.lang PersistentVector Keyword]
[java.io Writer]
[java.lang Comparable])))
#?(:clj (set! *warn-on-reflection* true))
#?(:clj (set! *unchecked-math* :warn-on-boxed))
;; indexing
(defprotocol INode
(get-path [this] "Get the path to the node from root")
(get-value [this] "Get the actual data")
(get-children [this] "Get all children node in a map")
(add-child [this node] "Add a child node")
(get-key [this] "Get the key of this node")
(get-parent [this] "Get the parent node")
(get-first [this] "Get the first child node")
(get-last [this] "Get the last child node")
(get-next [this] "Get the next sibling node")
(set-next [this node] "Set the next sibling node")
(set-order [this o] "Set the traversal order of this node")
(^long get-order [this] "Get the order of this node in traversal")
(^long get-size [this] "Get the size of sub-tree, used to estimate cost")
(set-size [this s] "Set the size of sub-tree"))
(deftype Node [^PersistentVector path
value
parent
^:unsynchronized-mutable children
^:unsynchronized-mutable first
^:unsynchronized-mutable last
^:unsynchronized-mutable next
^:unsynchronized-mutable index
^:unsynchronized-mutable ^long order
^:unsynchronized-mutable ^long size]
INode
(get-path [_] path)
(get-key [this] (-> this get-path peek))
(get-value [_] value)
(get-parent [_] parent)
(get-children [_] children)
(get-first [_] first)
(get-last [_] last)
(get-next [_] next)
(set-next [_ n] (set! next n))
(get-order [_] order)
(set-order [this o] (set! order (long o)) this)
(get-size [_] size)
(set-size [this s] (set! size (long s)) this)
(add-child [_ node]
(set! children (assoc children (get-key node) node))
(when last (set-next last node))
(when-not first (set! first node))
(set! last node)
node))
#?(:clj
(defmethod print-method Node
[x ^Writer writer]
(print-method {:value (get-value x)
:order (get-order x)
:children (get-children x)}
writer)))
;; using defn instead of declare, see http://dev.clojure.org/jira/browse/CLJS-1871
(defn ^:declared index* [order path data parent])
(defn- associative-children
"map and vector are associative"
[order path data parent]
(reduce-kv
(fn [_ k v]
(index* order (conj path k) v parent))
nil
data))
(defn- set-children
"set is a map of keys to themselves"
[order path data parent]
(doseq [x data]
(index* order (conj path x) x parent)))
(defn- list-children
"add index as key"
[order path data parent]
(reduce
(fn [i x]
(index* order (conj path i) x parent)
(inc ^long i))
0
data))
(defn- inc-order
[order]
(vswap! order (fn [o] (inc ^long o))))
(defn- index-collection
[type order path data parent]
(let [node (->Node path data parent {} nil nil nil 0 0 1)]
(add-child parent node)
(case type
(:map :vec) (associative-children order path data node)
:set (set-children order path data node)
:lst (list-children order path data node))
(let [^long cs (->> (get-children node) vals (map get-size) (reduce +))]
(doto node
(set-order @order)
(set-size (+ (get-size node) cs))))
(inc-order order)
node))
(defn- index-value
[order path data parent]
(let [node (->Node path data parent nil nil nil nil 0 @order 1)]
(add-child parent node)
(inc-order order)
node))
(defn- index*
[order path data parent]
(let [type (e/get-type data)]
(if (= type :val)
(index-value order path data parent)
(index-collection type order path data parent))))
(defn- index
"Traverse data to build an indexing tree of Nodes,
compute path, sizes of sub-trees, siblings, etc. for each Node.
This takes little time"
[data]
(let [order (volatile! 0)]
(index* order [] data (->Node [] ::dummy nil {} nil nil nil 0 -1 0))))
;; diffing
(defn- coord-hash
[a b]
(co/szudzik (get-order a) (get-order b)))
#?(:clj
(deftype Coord [^Node a
^Node b]
;; Java's native hash is too slow,
;; overriding hashCode significantly speeds things up
Object
(hashCode [_] (coord-hash a b))
(equals [this that]
(= (.hashCode this) (.hashCode that)))
(toString [_]
(str "[" (get-value a) "," (get-value b) "]"))
Comparable
(compareTo [this that]
(- (.hashCode this) (.hashCode that))))
:cljs
(deftype Coord [^Node a
^Node b]
IHash
(-hash [_] (coord-hash a b))
IEquiv
(-equiv [this that]
(= (-hash this) (-hash that)))
IComparable
(-compare [this that]
(- (-hash this) (-hash that)))))
(defn- get-coord
[^Coord coord]
[(.-a coord) (.-b coord)])
(defprotocol IStep
(operator [this] "Operator to try")
(current [this] "Starting pair of nodes")
(neighbor [this] "Destination pair of nodes"))
(deftype Step [^Keyword op
^Coord cur
^Coord nbr]
IStep
(operator [_] op)
(current [_] cur)
(neighbor [_] nbr))
#?(:clj
(defmethod print-method Step
[x ^Writer writer]
(print-method {:op (operator x)
:cur (current x)
:nbr (neighbor x)}
writer)))
(defn- get-step
[step]
((juxt operator current neighbor) step))
(defprotocol IState
(get-came [this] "Get the local succession map")
(set-came [this came] "Set the local succession map")
(get-open [this] "Get the open priority queue")
(set-open [this open] "Set the open priority queue")
(get-g [this] "Get the g cost map")
(set-g [this g] "Set the g cost map"))
(deftype State [^:unsynchronized-mutable came
^:unsynchronized-mutable open
^:unsynchronized-mutable g]
IState
(get-came [_] came)
(set-came [this c] (set! came c) this)
(get-open [_] open)
(set-open [this o] (set! open o) this)
(get-g [_] g)
(set-g [this got] (set! g got) this))
(defn- get-state
[state]
((juxt get-came get-open get-g) state))
(defn- access-g
[g cur]
(get g cur #?(:clj Long/MAX_VALUE
:cljs (goog.math.Long/getMaxValue))))
(defn ^:declared diff* [ra rb came])
(defn- compute-cost
[^Coord cur came g op]
(let [^long gc (access-g g cur)]
(case op
:= gc
;; delete only cost 1, for not including deleted data in script
:- (inc gc)
;; these cost the size of included data, plus 1
(:a :i) (let [sb (get-size (.-b cur))]
(+ gc (inc ^long sb)))
:r (+ gc ^long (diff* (.-a cur) (.-b cur) came)))))
(defn- heuristic
"A simplistic but optimistic estimate of the cost to reach goal when at (x y).
For sequences with positive goal differential (delta), the optimal number of
edits is deletion dependent, equals to 2p+delta, where p is number of deletions.
Optimistically assuming no new deletion will be needed after (x, y), the number
of edits is delta-k, where k=y-x. The same logic applies to negative delta.
For nested structure, multiple deletion may be merged into one.
Also, because addition/replacement requires new value to be present in
editscript, whereas deletion does not, we assign estimate differently. "
^long [type cur end [gx gy]]
(case type
(:map :set) 0
(:vec :lst) (let [[na nb] (get-coord cur)
[ra rb] (get-coord end)
x (if (identical? ra na) gx (get-order na))
y (if (identical? rb nb) gy (get-order nb))
delta (- ^long gy ^long gx)
k (- ^long y ^long x)
cost (- delta k)]
(if (= cost 0)
0
(if (>= delta 0)
(if (> k delta) 1 0)
(if (< k delta) (inc cost) 0))))))
(defn- explore
[type end came goal state step]
(let [[came' open g] (get-state state)
[op cur nbr] (get-step step)
tmp-g (compute-cost cur came g op)]
(if (>= ^long tmp-g ^long (access-g g nbr))
state
(doto state
(set-came (assoc! came' nbr [cur op]))
(set-open (assoc open nbr
(+ ^long tmp-g ^long (heuristic type nbr end goal))))
(set-g (assoc! g nbr tmp-g))))))
(defn- values=?
[va vb]
(or (identical? va vb)
(and (= :val (e/get-type va) (e/get-type vb))
#?(:clj (if va
(.equals ^Object va vb)
(= va vb))
:cljs (= va vb)))))
(defn- next-node
[na ra]
(or (get-next na) ra))
(defn- vec-frontier
[end cur]
(let [[ra rb] (get-coord end)
[na nb] (get-coord cur)
a=b (values=? (get-value na) (get-value nb))
x=gx (identical? na ra)
x<gx (not x=gx)
y<gy (not (identical? nb rb))
na' (next-node na ra)
nb' (next-node nb rb)]
(if (and x<gx y<gy a=b)
[(->Step := cur (->Coord na' nb'))]
(cond-> []
x<gx (conj (->Step :- cur (->Coord na' nb))) ; delete
(and x<gx y<gy) (conj (->Step :r cur (->Coord na' nb'))) ; replace
(and x=gx y<gy) (conj (->Step :a cur (->Coord na nb'))) ; append
(and x<gx y<gy) (conj (->Step :i cur (->Coord na nb'))))))) ; insert
(defn- map-frontier
[^Coord init end cur]
(let [[ra rb] (get-coord end)
[na nb] (get-coord cur)
ka (get-key na)
kb (get-key nb)]
(if (identical? na ra)
;; testing keys of b
[(->Step (if (contains? (get-value ra) kb) := :a)
cur (->Coord ra (next-node nb rb)))]
(let [va (get-value na)
vb (get-value nb)
mb (get-value rb)
na' (next-node na ra)
cb (get-children rb)]
(if (identical? na' ra)
;; transition point from testing keys of a to that of b
(let [startb (->Coord ra (.-b init))
enda (->Coord na (cb ka))]
(if (contains? mb ka)
(if (= ka kb)
[(->Step (if (values=? va vb) := :r) cur startb)]
[(->Step := cur enda)
(->Step :r enda startb)])
[(->Step :- cur startb)]))
;; testing keys of a
[(if (contains? mb ka)
(if (= ka kb)
(->Step (if (values=? va vb) := :r)
cur (->Coord na' (or (cb (get-key na')) nb)))
(->Step := cur (->Coord na (cb ka))))
(->Step :- cur (->Coord na' nb)))])))))
(defn- frontier
[type init end cur]
(case type
(:vec :lst) (vec-frontier end cur)
(:map :set) (map-frontier init end cur)))
(defn- A*
^long [type ra rb came]
(let [end (->Coord ra rb)
init (->Coord (get-first ra) (get-first rb))
goal [(get-order ra) (get-order rb)]]
(loop [state (->State (transient {})
(pa/priority-map init (heuristic type init end goal))
(transient {init 0}))]
(let [[came' open g] (get-state state)]
(if (empty? open)
(throw (ex-info "A* diff fails to find a solution" {:ra ra :rb rb}))
(let [[cur cost] (peek open)]
(if (= cur end)
(do (vswap! came assoc end (persistent! came'))
cost)
(recur (reduce
(partial explore type end came goal)
(set-open state (pop open))
(frontier type init end cur))))))))))
(defn- vec-fn
[node]
(let [v (get-value node)]
(if (= :vec (e/get-type v))
v
(vec v))))
(defn- use-quick
^long [ra rb came]
(loop [[op & ops] (q/vec-edits (vec-fn ra) (vec-fn rb))
na (get-first ra)
nb (get-first rb)
m (transient {})
cost 0]
(if op
(let [na' (next-node na ra)
nb' (next-node nb rb)
cur (->Coord na nb)
sb (get-size nb)]
(if (integer? op)
(recur (if (> ^long op 1) `[~(dec ^long op) ~@ops] ops)
na' nb'
(assoc! m (->Coord na' nb') [cur :=])
(long cost))
(case op
:- (recur ops na' nb
(assoc! m (->Coord na' nb) [cur op])
(inc (long cost)))
:+ (recur ops na nb'
(assoc! m (->Coord na nb')
[cur (if (identical? na ra) :a :i)])
(+ (long cost) 1 (long sb)))
:r (recur ops na' nb'
(assoc! m (->Coord na' nb') [cur op])
(+ (long cost) 1 (long sb))))))
(let [root (->Coord ra rb)]
(vswap! came assoc root (persistent! m))
cost))))
(defn- diff*
^long [ra rb came]
(let [sa ^long (get-size ra)
sb ^long (get-size rb)
va (get-value ra)
vb (get-value rb)
typea (e/get-type va)
update #(vswap! came assoc (->Coord ra rb) {})]
(cond
(identical? va vb)
(do (update)
0)
;; both are leaves, skip or replace
(= 1 sa sb)
(do (update)
(if (values=? va vb)
0
2))
;; one of them is leaf, replace
(or (= 1 sa) (= 1 sb))
(do (update)
(inc ^long sb))
;; non-empty coll with same type, drill down
(= typea (e/get-type vb))
(let [cc+1 #(-> % get-children count inc)]
(if (and (#{:vec :lst} typea)
(or (= sa (cc+1 ra))
(= sb (cc+1 rb))))
;; vec or lst contains leaves only, safe to use quick algo.
(use-quick ra rb came)
;; otherwise run A*
(A* typea ra rb came)))
;; types differ, can only replace
:else
(do (update)
(inc ^long sb)))))
;; generating editscript
(defn- index-key?
[node]
(#{:vec :lst} (-> node get-value e/get-type)))
(defn- adjust-delete-insert
[trie op root path]
(if (= op :=)
path
(loop [newp []
prev []
node root
left path]
(if (seq left)
(let [[k & ks] left
^long d (get-in @trie (conj prev :delta) 0)]
(recur (conj newp (if (index-key? node) (+ ^long k d) k))
(conj prev k)
((get-children node) k)
ks))
(if (index-key? (get-parent node))
(let [seen (conj (if (seq path) (pop path) path) :delta)
^long d (get-in @trie seen 0)]
(vswap! trie assoc-in seen (case op :- (dec d) :i (inc d) d))
newp)
newp)))))
(defn- adjust-append
[trie op na nb path path']
(if (= op :a)
(if (index-key? na)
(conj path' (let [seen (conj path :delta)
^long d (get-in @trie seen 0)]
(vswap! trie assoc-in seen (inc d))
(+ d (-> na get-children count))))
(conj path' (get-key nb)))
path'))
(defn- convert-path
[trie op roota na nb path]
(->> path
(adjust-delete-insert trie op roota)
(adjust-append trie op na nb path)))
(defn- write-script
[steps roota script]
(reduce
(fn [trie [op na nb]]
(let [path (convert-path trie op roota na nb (get-path na))
value (get-value nb)]
(case op
:- (e/delete-data script path)
:r (e/replace-data script path value)
(:a :i) (e/add-data script path value)
nil)
trie))
(volatile! {:delta 0})
steps))
(defn- trace*
[came cur steps]
(if-let [m (came cur)]
(if (seq m)
(loop [v (m cur)]
(if v
(let [[prev op] v
[na nb] (get-coord prev)]
(if (and (came prev) (= op :r))
(trace* came prev steps)
(vswap! steps conj [op na nb]))
(recur (m prev)))
steps))
(let [[ra rb] (get-coord cur)]
(vswap! steps conj [(if (values=? (get-value ra) (get-value rb)) := :r)
ra rb])
steps))
steps))
(defn- trace
([came cur]
@(trace* came cur (volatile! '())))
([came ^Coord cur script]
(-> (trace came cur)
(write-script (.-a cur) script))))
(defn diff
"Create an EditScript that represents the minimal difference between `b` and `a`"
[a b]
(let [script (e/edits->script [])]
(when-not (identical? a b)
(let [roota (index a)
rootb (index b)
came (volatile! {})
_ (diff* roota rootb came)]
;; #?(:clj (let [total (* (get-size roota) (get-size rootb))
;; ^long explored (reduce + (map count (vals @came)))]
;; (printf "cost is %d, explored %d of %d - %.1f%%\n"
;; cost explored total
;; (* 100 (double (/ explored total))))))
(trace @came (->Coord roota rootb) script)
script))
script))