-
Notifications
You must be signed in to change notification settings - Fork 1
/
pegobjects.lisp
338 lines (257 loc) · 11.1 KB
/
pegobjects.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
(in-package #:cl-peg)
;(declaim (optimize (speed 3) (safety 0) (debug 0)))
; ** classes modelling PEG expressions
(defclass parse-element () ((hash-val :type (or fixnum nil)
:initform nil)))
(defclass grouping-element (parse-element) ((expr :initarg :expr :accessor expr)))
(defclass parse-element-with-slot (parse-element) ((feature :initarg :feature
:reader feature)))
(defclass magic-dot (parse-element-with-slot) ())
(defclass eof (parse-element-with-slot) ())
(defclass trivial-match (parse-element-with-slot) ())
(defclass call-rule (parse-element-with-slot) ())
(defclass character-class (parse-element-with-slot) ())
(defclass quoted-char (parse-element-with-slot) ())
(defclass quoted-string (parse-element-with-slot) ())
(defclass lambda-ref (parse-element-with-slot) ())
(defclass match (parse-element-with-slot) ())
; this is the only grouping-element with an extra slot
(defclass named-non-terminal (grouping-element) ((name :initarg :name :accessor name)))
(defclass grammar (grouping-element) ((non-terminal-map :initform nil
:reader non-terminal-map)
(pe-map :initarg :pe-map
:reader pe-map)
(ready-for-matching :initform nil
:reader ready-for-matching)
(parse-table :initform nil
:reader parse-table)
))
(defclass expression-list (grouping-element) ((debugtag :initarg :debugtag)))
(defclass ordered-expr-list (grouping-element) ())
(defclass zero-or-more (grouping-element) ())
(defclass optional (grouping-element) ())
(defclass negated (grouping-element) ())
(defclass followed-by (grouping-element) ())
(defclass at-least-one (grouping-element) ())
(defclass bracketed-rule (grouping-element) ())
(defgeneric prepare-matching-structures (grammar))
(defgeneric clear-match-results (grammar))
; returns a pr struct
(defgeneric parse-and-match (parse-element input-char-list input-offset))
; printing peg objects
(defmethod print-object ((pe parse-element) stream)
(format stream "PE: ~S ~S" (class-of pe) (slot-value pe 'hash-val))
)
(defmethod print-object ((nt named-non-terminal) stream)
(format stream "NT: ~S ~S" (slot-value nt 'name) (slot-value nt 'hash-val))
)
; ** memoization
; methods for comparing objects that we want to memoize
(defgeneric compare (a b))
(defgeneric hash (a))
(defun pe-compare (a b)
(if (not (eq a b))
(cond ((not (eq (class-of a) (class-of b))) nil)
((typep a 'parse-element) (compare a b))
((typep a 'list) (compare a b))
(t (equal a b))
)
t))
(defun pe-hash (a)
(typecase a
(parse-element (progn
(if (null (slot-value a 'hash-val))
(setf (slot-value a 'hash-val) (hash a)))
(slot-value a 'hash-val)))
(t (sxhash a))))
; this is the only way I could find to reset this hash-function
;(remhash 'cl-peg::pehash1 NET.HEXAPODIA.HASHTABLES::*HASH-NICKNAME-MAP*)
;FIXME
(NET.HEXAPODIA.HASHTABLES::register-hash-function 'pehash1 #'pe-hash #'pe-compare)
(defvar *pe-map*)
(setf *pe-map* (NET.HEXAPODIA.HASHTABLES::make-generic-hashtable :test 'pehash1))
(defun memoize (a)
(if (null (NET.HEXAPODIA.HASHTABLES::hashref a *pe-map*))
(setf (NET.HEXAPODIA.HASHTABLES::hashref a *pe-map*) a))
(if (null (NET.HEXAPODIA.HASHTABLES::hashref a *pe-map*))
(break))
(NET.HEXAPODIA.HASHTABLES::hashref a *pe-map*)
)
; this case should never occur, so this functions as an assertion
(defmethod compare ((a parse-element-with-slot) (b parse-element-with-slot))
(if (eq (class-of a) (class-of b))
(break "assertion failed")
nil))
(defmethod compare ((a magic-dot) (b magic-dot))
t)
(defmethod compare ((a eof) (b eof))
t)
(defmethod compare ((a trivial-match) (b trivial-match))
t)
(defmethod compare ((a call-rule) (b call-rule))
(equal (slot-value a 'feature) (slot-value b 'feature)))
(defmethod compare ((a character-class) (b character-class))
(equal (slot-value a 'feature) (slot-value b 'feature)))
(defmethod compare ((a quoted-char) (b quoted-char))
(equal (slot-value a 'feature) (slot-value b 'feature)))
(defmethod compare ((a quoted-string) (b quoted-string))
(equal (slot-value a 'feature) (slot-value b 'feature)))
(defmethod compare ((a match) (b match))
(equal (slot-value a 'feature) (slot-value b 'feature)))
(defmethod hash ((pe parse-element-with-slot))
(logxor (sxhash (class-of pe))
(sxhash (slot-value pe 'feature))))
; grouping elements and the rest
(defmethod hash ((ge grouping-element))
(logxor (sxhash (class-of ge)) (hash (slot-value ge 'expr))))
(defmethod hash ((nt named-non-terminal))
(prog1 (logxor (sxhash (class-of nt)) (sxhash (slot-value nt 'name)) (hash (slot-value nt 'expr)))
; (break "nt ~A" nt)
))
(defmethod hash ((l list))
(if (null l)
(sxhash 'list)
(loop for el in l and
accumulator = (sxhash 'list)
do (setf accumulator (logxor accumulator (hash el)))
finally ( return accumulator))))
(defmethod compare ((a null) (b null)) t)
(defmethod compare ((a t) (b t))
(equal a b))
(defmethod compare ((a list) (b list))
(if (compare (first a) (first b))
(compare (rest a) (rest b))
nil))
(defmethod compare ((a grouping-element) (b grouping-element))
; (format t "~A is not ~A~%" a b)
nil)
(defmethod compare ((a grammar) (b grammar))
(eq a b))
(defmethod compare ((a expression-list) (b expression-list))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a ordered-expr-list) (b ordered-expr-list))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a zero-or-more) (b zero-or-more))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a optional) (b optional))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a negated) (b negated))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a followed-by) (b followed-by))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a at-least-one) (b at-least-one))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a bracketed-rule) (b bracketed-rule))
(compare (slot-value a 'expr) (slot-value b 'expr)))
(defmethod compare ((a named-non-terminal) (b named-non-terminal))
(if (equal (slot-value a 'name) (slot-value b 'name))
(compare (slot-value a 'expr) (slot-value b 'expr))
nil))
(defclass parse-table () ((expressionToRowMap :initarg :expressionToRowMap
:reader expressionToRowMap
:type NET.HEXAPODIA.HASHTABLES::hash-container
)))
(defun make-parse-table ()
(make-instance 'parse-table
:expressionToRowMap (NET.HEXAPODIA.HASHTABLES::make-generic-hashtable :size 50000 :test 'pehash1)
)
)
(defvar *parse-result-table*)
; we set safety to 1 to be able to catch the invalid array reference
; I don't know if this is portable to lisps besides SBCL
; we have a hard-coded initial input length of 1000 characters
; TODO: expand-array should double the array length or increase it in some other way
(defun lookup-parse-result (pe-or-l input-offset) (declare (type (or parse-element list) pe-or-l ) (fixnum input-offset))
(declare (optimize (speed 3) (safety 1) (debug 0)))
(let
((lookup
(NET.HEXAPODIA.HASHTABLES::hashref pe-or-l (expressionToRowMap *parse-result-table*))))
(if (null lookup)
nil
(handler-case
(let ((lookup2 (aref (the simple-vector lookup) input-offset)))
(if (eq lookup2 0)
nil
lookup2))
(type-error () (progn
(setf (NET.HEXAPODIA.HASHTABLES::hashref pe-or-l (expressionToRowMap *parse-result-table*)) (expand-array lookup))
nil)))
))
)
; is there a better way for copying arrays?
(defun expand-array (array) (declare ((or nil simple-vector) array))
(let ((temp (make-array 1000 :element-type t :initial-element nil)))
(declare (simple-vector temp))
(dotimes (i (length array))
(setf (aref temp i) (aref array i))
)
temp)
)
; some speed tests
;; (defvar *test-ht*)
;; (setf *test-ht* (NET.HEXAPODIA.HASHTABLES::make-generic-hashtable :size 50000 :test 'pehash1))
;; (defun constest (i)
;; (declare (optimize (speed 3) (safety 1) (debug 0)))
;; (if (eq (NET.HEXAPODIA.HASHTABLES::hashref '(wombat squirrel mouse) *test-ht*) #\l)
;; (format t "yowzer ~A " (cons 'b '(1 23)))
;; )
;; (if (eq (mod i 5000) 0)
;; (setf (NET.HEXAPODIA.HASHTABLES::hashref '(wombat squirrel mouse) *test-ht*) i))
;; )
;; (defun test-hash ()
;; (memoize (make-instance 'call-rule :feature '|wosina|))
;; (memoize (make-instance 'call-rule :feature '|wosina|))
;; (memoize (make-instance 'call-rule :feature '|wosina|))
;; )
; the initial array length is the max of 1000 or the input-offset + 1
(defun set-parse-result (pe-or-l input-offset pr) (declare (type (or parse-element list) pe-or-l ) (type fixnum input-offset))
(declare (optimize (speed 3) (safety 1) (debug 0)))
(let ((lookup (NET.HEXAPODIA.HASHTABLES::hashref pe-or-l (expressionToRowMap *parse-result-table*))))
(if (null lookup)
(setf (NET.HEXAPODIA.HASHTABLES::hashref pe-or-l (expressionToRowMap *parse-result-table*)) (make-array (max 1000 (+ 1 input-offset)) :element-type t :initial-element nil)))
(let ((lookup2 (NET.HEXAPODIA.HASHTABLES::hashref pe-or-l (expressionToRowMap *parse-result-table*))))
(setf (aref (the simple-vector lookup2) input-offset) pr))
)
)
; set up the non-terminal-map to map the non-terminal name symbol to the named-non-terminal object
(defmethod prepare-matching-structures ((g grammar))
(setf (slot-value g 'non-terminal-map) (make-hash-table))
(loop for nt in (collect-non-terminals g) do
(if (not (eq (gethash (slot-value nt 'name) (non-terminal-map g)) nil))
(progn
(format t "~A" (list (slot-value nt 'name) " installing non-terminal twice!"))
(break "installing non-terminal ~A twice" (slot-value nt 'name))
)
(setf (gethash (slot-value nt 'name) (non-terminal-map g)) nt)))
; initialize the parse-table
(clear-match-results g)
(setf (slot-value g 'ready-for-matching) t)
)
(defmethod clear-match-results ((g grammar))
(setf (slot-value g 'parse-table) (make-parse-table))
)
(defun dump-pe-map (g)
(NET.HEXAPODIA.HASHTABLES::all-hash-keys (pe-map g))
)
(defun workout ()
(let ((a (make-instance 'optional :expr (list (make-instance 'zero-or-more :expr (list (make-instance 'quoted-string :feature "what"))))))
(b (make-instance 'optional :expr (list (make-instance 'zero-or-more :expr (list (make-instance 'quoted-string :feature "gabble")))))))
(print "a ")
(print (hash a))
(print "b ")
(print (hash b))))
(defun workout2 ()
(let ((a (make-instance 'optional :expr (list
(make-instance 'call-rule :feature 'martin)
(make-instance 'zero-or-more :expr (list
(make-instance 'quoted-string :feature "what")
(make-instance 'call-rule :feature 'martin))))))
(b (make-instance 'optional :expr (list
(make-instance 'call-rule :feature 'robert)
(make-instance 'zero-or-more :expr (list
(make-instance 'quoted-string :feature "what")
(make-instance 'call-rule :feature 'robert)))))))
(print "a ")
(print (hash a))
(print "b ")
(print (hash b))))