-
Notifications
You must be signed in to change notification settings - Fork 0
/
reduce.clj
373 lines (341 loc) · 15.9 KB
/
reduce.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
(ns instacheck.reduce
(:require [clojure.pprint :refer [pprint]]
[clojure.set :as set]
[instacheck.util :as util]
[instacheck.grammar :as grammar]
[instacheck.weights :as weights]
[instacheck.codegen :as codegen]))
(def memoized-distance-trek
(memoize weights/distance-trek))
;; Weight reducer functions. If parsed-weight is zero ignore (return
;; start-weight). Must eventually return 0.
(defn reducer-zero
"If parsed-weight > 0 then returns 0"
[start-weight parsed-weight]
(if (= parsed-weight 0)
start-weight
0))
(defn reducer-half
"If parsed-weight > 0 then returns start-weight divided in two and
rounded down."
[start-weight parsed-weight]
(if (= parsed-weight 0)
start-weight
(int (/ start-weight 2))))
(defn reducer-div
"If parsed-weight > 0 then returns the next weight in seq-ladder
that is lower than start-weight.Designed to be used as a partial
like this:
(partial reducer-div 2)"
[divisor start-weight parsed-weight]
(if (= parsed-weight 0)
start-weight
(int (/ start-weight divisor))))
(defn reducer-ladder
"If parsed-weight > 0 then returns the next weight in seq-ladder
that is lower than start-weight. Designed to be used as a partial
like this:
(partial reducer-ladder [30 10 3 1])
The values in the ladder will be sorted in descending order and an
implicit zero is added to the end."
[seq-ladder start-weight parsed-weight]
(if (= parsed-weight 0)
start-weight
(let [norm-ladder (-> seq-ladder set (conj 0) sort reverse)]
(or (some #(if (< % start-weight) % nil) norm-ladder) 0))))
(defn reduce-wtrek
"Takes a grammar and wtrek and returns a new reduced wtrek with
weights reduced/propagated according to reduce-mode.
If the optional reduced-subset node set is then only those nodes
will be propagated. If reduced-subset is not specified then all
reducible/weighted nodes will be considered. The former may result
in a wtrek that is not fully reduced but the latter can take a while
for large grammars/wtreks.
The way that weights are reduced/propagated depends on reduce-mode:
:zero
If all siblings are zero, reduce parent edge to zero.
:max-child:
If all siblings of a node have a weight that is less
than parent edge weight then reduce the parent edge weight to
the largest sibling weight.
Algorithm/psuedocode:
- pend <= reduced-subset OR all weighted nodes in the tree
- while pend:
- node <= pop(pend)
- mcw <= get max child weight
- pnodes <= parents(node)
- foreach pnode of pnodes:
- if pnode child weight towards node > mcw
- then:
- push(pend, pnode)
- wtrek[pnode] <= mcw
:reducer:
When all siblings of a node are zero, reduce parent edge weight
by reducer-fn function and distribute the removed weights to
valid (no removed descendant) child edges of node.
Algorithm/psuedocode:
- pend <= reduced-subset OR all weighted nodes in the tree
- while pend:
- node <= pop(pend)
- if all node's children are 0:
- reduce node's parents w/ reducer function, accumulate the
total amount that was reduced
- if all node's parent's direct children are 0, add node's
parent to pend
- for each of node's children with no removed descendants:
- distribute accumulated weight evenly among those
children (rounding up unless all parents are 0 in which
case use 0)
Any zero weights in the :wtrek map represent a node edge that has
been removed. If all edges of a node are 0 then this is represents
a node that has been removed and the removal must be propagated up
the tree to the next weighted node edge. If the propagation of
removed nodes (0 weights) reaches the root/start of the grammar and
cannot propagate further then an exception is thrown because this
represents an invalid weighted grammar: grammar productions could
reach the removed node from the root/start rule (a removed node does
not exist in the sense that epsilon does).
The propagation of node removals continues until there are no more
pending node to remove. A node may have more than one parent which
means the number of nodes being considered during propagation may
increase temporarily but already removed nodes will not be added
again so the process will eventually terminate."
[grammar start-wtrek & [{:keys [reduced-subset reduce-mode reducer-fn]
:or {reduce-mode :zero
reducer-fn reducer-zero}
:as opts}]]
(assert (#{:zero :max-child :reducer} reduce-mode)
(str "Invalid :reduce-mode " reduce-mode))
(when (= :reducer reduce-mode)
(assert reducer-fn ":reducer reduce-mode requires reducer-fn"))
(let [grammar-start [(:start (meta grammar))]
start-pend (set (filter #(grammar/WEIGHTED (last %))
(map pop
(or reduced-subset
(keys (weights/wtrek grammar))))))]
(loop [wtrek start-wtrek
pend start-pend]
(if (not (seq pend))
wtrek
(let [[node & pend-left] pend
kids (grammar/children-of-node grammar node)
kid-weights (vals (select-keys wtrek kids))
;; _ (prn :node node :kids kids :kid-weights kid-weights)
max-kid-w (apply max kid-weights)
;; nparents is weighted child edges of parents leading
;; to node. all-nparents is the same but includes the
;; grammar root path if there is no weighted path
;; between node and the root.
all-nparents (grammar/get-ancestors
grammar node #(or (= grammar-start %)
(and (grammar/WEIGHTED (last (pop %)))
(contains? wtrek %))))
nparents (disj all-nparents grammar-start)
;; big-nparents is reduction candidates. max-kid-w being
;; greater than 0 only applies in the :max-child case.
big-nparents (set (filter #(> (get wtrek %) max-kid-w)
nparents))]
;;(prn :node node :max-kid-w max-kid-w :nparents nparents :big-parent big-nparents)
;; If node is removed and there are no weighted nodes
;; between it and the grammar start/root then it's an
;; invalid state.
(when (and (= 0 max-kid-w)
(contains? all-nparents grammar-start))
(throw (ex-info
(str "Node " node " removed, has root as parent")
{:type :reduce-wtrek
:cause :no-parents
:grammar grammar
:start-wtrek start-wtrek
:opts opts
:start-pend start-pend
:node node
:wtrek wtrek})))
(cond
;; :zero and :reducer reduce-mode only apply when all
;; children are zero. :max-child reduce-mode applies
;; whenever the largest child is less than the parent.
(and (#{:zero :reducer} reduce-mode)
(not= 0 max-kid-w))
(recur wtrek pend-left)
(#{:zero :max-child} reduce-mode)
(let [new-pend (set/union pend-left
(set (map pop big-nparents)))
new-wtrek (reduce (fn [tk p] (assoc tk p max-kid-w))
wtrek
big-nparents)]
(recur new-wtrek new-pend))
:reducer
;; All children of node are zero at this point.
(let [new-wtrek1 (reduce (fn [tk p]
(let [w (get tk p)]
(assoc tk p (reducer-fn w w))))
wtrek
big-nparents)
zerod-parents (set (filter #(and (= 0 (get new-wtrek1 %))
(not= (get wtrek %)
(get new-wtrek1 %)))
big-nparents))
;; we need to recur to check zero'd parents
new-pend (set/union pend-left
(set (map pop zerod-parents)))
;; accumulate the total reduction (might be multiple
;; parents reduced)
acc-weights (reduce
#(+ %1 (- (get wtrek %2) (get new-wtrek1 %2)))
0
big-nparents)
;; only consider kids with no removed descendants
removed? (partial weights/removed-node? grammar new-wtrek1)
valid-kids (filter (fn [k]
(empty? (grammar/get-descendants
grammar k removed?)))
kids)
;;_ (prn :zerod-parents zerod-parents :acc-weights acc-weights)
;; Distribute weight evenly to the valid children
new-wtrek2 (reduce
(fn [tk kid]
(assoc tk kid
(int (Math/ceil
(/ acc-weights
(count valid-kids))))))
new-wtrek1
valid-kids)]
;; (prn :reducer :node node :big-nparents big-nparents :zerod-parents zerod-parents)
(recur new-wtrek2 new-pend))))))))
;; ---------
(defn reduce-wtrek-with-weights
"Takes a grammar, wtrek, a weights-to-reduce map, a reduce-mode
keyword, and a reducer-fn. A path from weights-to-reduce is selected
based on pick-mode. For that path the reducer-fn is called with the
weight for the path from wtrek and the weight for the path from
weights-to-reduce. Based on those two values the reducer-fn should
return a new value to be updated in the wtrek.
pick-mode values:
:weight - randomly pick a node weighted by node weights.
:dist - randomly pick a node weighted by node distances
from the start node
:weight-dist - randomly pick a node weighted by node weights
multiplied by node distances from the start node.
The resulting wtrek will then be passed to the reduce-wtrek function
to propogate the weight reduction according reduce-mode."
[grammar wtrek weights-to-reduce
& [{:keys [reduce-mode reducer-fn pick-mode pick-pred rnd-obj]
:or {reduce-mode :zero
reducer-fn reducer-half
pick-mode :weight-dist
pick-pred identity}
:as opts}]]
(let [big? #(and % (> % 0))
bigs (filter #(and (big? (get weights-to-reduce %))
(big? (get wtrek %)))
(keys weights-to-reduce))
;; _ (prn :bigs)
;; _ (pprint bigs)
distances (memoized-distance-trek grammar)
grouped (group-by #(or (get wtrek %) 0)
bigs)
;; _ (prn :distances distances)
;; _ (prn :grouped grouped)
;; _ (pprint (sort-by key grouped))
weighted-paths (for [[w ps] grouped
p ps
:when (pick-pred p)]
[p (condp = pick-mode
:weight w
:dist (get distances p)
:weight-dist (* w (get distances p)))])
rpath (when (seq weighted-paths)
(util/weighted-rand-nth weighted-paths rnd-obj))]
;; (prn :rpath rpath :wtrek-w (get wtrek rpath) :wtr-w (get weights-to-reduce rpath))
(if rpath
(let [new-wtrek (assoc wtrek rpath (reducer-fn
(get wtrek rpath)
(get weights-to-reduce rpath)))
rsubset #{rpath}]
(reduce-wtrek grammar new-wtrek (assoc opts :reduced-subset rsubset)))
(do
;; (println "******************* no rpath *******************")
wtrek))))
;; ---
(defn prune-node*
"Internal: Used by prune-node* to prune rule bodies/productions
based on :wtrek"
[node wtrek cur-path]
(let [epsilon? #(= :epsilon (:tag %))
tag (:tag node)]
(cond
(and (grammar/CHILD-EDGE (last cur-path))
(grammar/WEIGHTED (last (pop cur-path)))
(contains? #{0 nil} (get wtrek cur-path)))
{:tag :epsilon}
(:parsers node) ;; :alt, :cat
(let [ps (filter
#(not (epsilon? %))
(map-indexed
(fn [idx n]
(prune-node* n wtrek (conj cur-path tag idx)))
(:parsers node)))]
(cond
(= 0 (count ps)) {:tag :epsilon}
(= 1 (count ps)) (first ps)
:else (assoc node :parsers ps)))
(:parser2 node) ;; :ord
(let [p1 (prune-node* (:parser1 node) wtrek (conj cur-path tag 0))
p2 (prune-node* (:parser1 node) wtrek (conj cur-path tag 1))]
(cond (and (epsilon? p1)
(epsilon? p2)) {:tag :epsilon}
(epsilon? p1) p2
(epsilon? p2) p1
:else (merge node {:parser1 p1 :parser2 p2})))
(:parser node) ;; :opt, :start, :plus
(let [n (prune-node* (:parser node) wtrek (conj cur-path tag 0))]
(if (epsilon? n)
n
(assoc node :parser n)))
:else ;; :nt, :string, :regexp, :epsilon
node)))
(defn prune-grammar
"Takes a grammar and returns a pruned grammar based on keys
specified in the options map. Three different prune operations are
performed:
- Removes rules listed in :removed
- Prune rule bodies/productions based on :wtrek
- If :start is specified in the options or is on the meta of the
grammar, then rules are removed that cannot be reached (directly
or indirectly) from the start rule/production.."
[grammar {:keys [wtrek start removed] :as ctx}]
(let [wtrek (or wtrek (weights/wtrek grammar 100))
start (or start (:start (meta grammar)))
;; Remove rules listed in removed
g1 (select-keys
grammar
(set/difference (set (keys grammar)) (set removed)))
;; Prune rule bodies using wtrek
g2 (reduce
(fn [g [r n]] (assoc g r (prune-node* n wtrek [r])))
g1
g1)
;; Remove rules that are never reached from start rule
used (if start
(let [deps (util/tree-deps g2)]
(loop [used #{}
pend #{start}]
(if (seq pend)
(let [new-used (set/union used pend)
pend-deps (apply set/union (vals (select-keys deps pend)))
new-pend (set/difference pend-deps new-used)]
(recur new-used new-pend))
used)))
(set (keys g2)))
g3 (select-keys g2 used)]
g3))
(defn prune-grammar->sorted-ebnf
[grammar {:keys [wtrek cycle-set] :as ctx}]
(let [red-grammar (prune-grammar grammar {:wtrek wtrek})
acyclic-grammar (apply dissoc red-grammar cycle-set)
rule-order (codegen/check-and-order-rules acyclic-grammar)
ordered (concat
(map #(vector % (get acyclic-grammar %)) rule-order)
(select-keys red-grammar cycle-set))]
(grammar/grammar->ebnf (reverse ordered))))