-
Notifications
You must be signed in to change notification settings - Fork 13
/
reader.lisp
326 lines (294 loc) · 12.5 KB
/
reader.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
(in-package :jsown)
(declaim (optimize (speed 3) (safety 0) (debug 3)))
;;;;;;;;;;;;;;;;;;;
;;;; character-tree
(defun build-character-tree (&rest strings)
"Builds a character tree from a set of strings"
(build-tree (loop for string in strings collect
(loop for char across string collect char))))
(define-compiler-macro build-character-tree (&whole form &rest strings)
(if (loop for string in strings unless (stringp string) return T)
form
`(quote ,(apply #'build-character-tree strings))))
(defun find-first-elts (lists)
(remove-duplicates (loop for list in lists
when (first list)
collect (first list))
:test #'eql))
(defun build-tree (lists)
"Builds a tree from a range of lists and a function to compare its elements by"
(when lists
(loop for first-elt in (find-first-elts lists)
collect (let ((matching-lists (loop for list in lists when (and (first list) (eql first-elt (first list)))
collect (rest list))))
(list first-elt
(loop for list in matching-lists unless list return T) ;; T shows that this is an end-result
(build-tree matching-lists))))))
(defun iterate-tree (tree char)
"Iterates a character-tree with the given character
Returns two values, being the new tree and whether or not this is an end-point."
(declare (type (or cons nil) tree)
(type character char))
(let ((solution (rest (find char tree :key #'first :test #'eql))))
(when solution
(values (second solution) (first solution)))))
;;;;;;;;;;;;;;;;;
;;;; parsing code
(defconstant +space-characters+ '(#\Space #\Newline #\Tab #\Linefeed)
"List of characters which may denote a space in the JSON format (these have not been verified")
(defstruct buffer
"A string-buffer which is used to operate on the strings
The use of a string-buffer allows us to read the data in bulk, and to operate on it, by using simple index manipulations.
Reading the string up front removes the hassle of having a fixed-size maximal input"
(string ""
:type simple-string) ; This contains the content of the buffer
(index 0 :type fixnum) ; This is the current index of the buffer
(mark 0 :type fixnum)) ; This contains a single number to indicate the start of a region. The user must ensure that this does not get overwritten himself
(defun build-buffer (string)
"Makes a new buffer and ensures the string is of the correct type"
(make-buffer :string (if (typep string 'simple-string)
string
(coerce string 'simple-string))))
(declaim (inline next-char next-char/ decr-char current-char peek-behind-char fetch-char subseq-buffer-mark mark-buffer mark-length skip-to skip-to/ skip-until skip-until/ skip-until* skip-spaces subseq-until subseq-until/ subseq-tree))
(defun next-char (buffer)
(declare (type buffer buffer))
"Sets the pointer to the next char in the buffer"
(incf (buffer-index buffer)))
(defun next-char/ (buffer)
(declare (type buffer buffer))
"Sets the pointer to the next char in the buffer, ignores escaped characters (they start with a \\) through"
(incf (buffer-index buffer))
(loop until (char/= (current-char buffer) #\\)
do (incf (buffer-index buffer) 2)))
(defun decr-char (buffer)
(declare (type buffer buffer))
"Sets the pointer to the previous char in the buffer"
(decf (buffer-index buffer)))
(defun current-char (buffer)
(declare (type buffer buffer))
"Returns the current character the buffer is pointing to"
(elt (buffer-string buffer) (buffer-index buffer)))
(defun peek-behind-char (buffer)
(declare (type buffer buffer))
(elt (buffer-string buffer) (1- (buffer-index buffer))))
(defun fetch-char (buffer)
(declare (type buffer buffer))
"Reads a character from the buffer and increases the index"
(next-char buffer)
(peek-behind-char buffer))
(defun subseq-buffer-mark (buffer)
(declare (type buffer buffer))
"Returns the content between index and mark for the current buffer
result: (subseq buffer-string mark index))"
(subseq (buffer-string buffer) (buffer-mark buffer) (buffer-index buffer)))
(defun mark-buffer (buffer)
"Sets the mark of the buffer to the current character"
(setf (buffer-mark buffer) (buffer-index buffer)))
(defun mark-length (buffer)
(declare (type buffer buffer))
"Returns the current amount of characters in the marked piece of the buffer"
(- (buffer-index buffer) (buffer-mark buffer)))
(defun skip-to (buffer last-char)
"Skips characters until <char> has been found. <char> is the last char which is skipped
see: skip-until"
(declare (type buffer buffer)
(type character last-char))
(skip-until buffer last-char)
(next-char buffer))
(defun skip-to/ (buffer last-char)
"What skip-to does, but with the ignoring of \\"
(declare (type buffer buffer)
(type character last-char))
(skip-until/ buffer last-char)
(next-char/ buffer))
(defun skip-until (buffer last-char)
"Skips characters until <char> has been found. <char> is NOT skipped
See: skip-to"
(declare (type buffer buffer)
(type character last-char))
(loop until (eql (current-char buffer) last-char)
do (next-char buffer))
(values))
(defun skip-until/ (buffer last-char)
"What skip-until does, but with \\ escaping"
(declare (type buffer buffer)
(type character last-char))
(decr-char buffer)
(loop do (next-char/ buffer)
until (eql (current-char buffer) last-char)))
(defun skip-until* (buffer &rest chars)
"Skips characters until one of the characters in <chars> has been found. The character which was found is not read from the buffer"
(declare (type buffer buffer))
(loop until (find (current-char buffer) chars :test #'eql)
do (next-char buffer)))
(defun skip-spaces (buffer)
"Skips spaces, tabs and newlines until a non-space character has been found"
(loop while (find (current-char buffer) +space-characters+ :test #'eql)
do (next-char buffer)))
;; (defmacro skip-spaces (buffer)
;; (declare (ignore buffer))
;; nil)
(defun subseq-until (buffer &rest chars)
"Returns a subsequence of stream, reading everything before a character belonging to chars is found. The character which was found is not read from the buffer"
(declare (type buffer buffer))
(mark-buffer buffer)
(loop until (find (current-char buffer) chars :test #'eql)
do (next-char buffer))
(subseq-buffer-mark buffer))
(defun subseq-until/ (buffer last-char)
"Does what subseq-until does, but does escaping too"
(declare (type buffer buffer)
(type character last-char))
(mark-buffer buffer)
(decr-char buffer)
(loop do (next-char/ buffer)
until (eql (current-char buffer) last-char))
(subseq-buffer-mark buffer))
(defun subseq-tree (buffer end-char tree)
"Returns a sequence of the buffer, reading everything that matches with the given tree before end-char is found. end-char is not read from the buffer
Returns nil if no sequence matching the tree could be found. It then stops iterating at the failed position
Skips #\\"
(declare (type buffer buffer)
(type character end-char))
(mark-buffer buffer)
(decr-char buffer)
(let ((accepted-p nil))
(loop
while (progn (next-char/ buffer)
(and tree (char/= (current-char buffer) end-char)))
do (multiple-value-setq (tree accepted-p) (iterate-tree tree (current-char buffer))))
(values accepted-p
(if accepted-p (subseq-buffer-mark buffer) ""))))
(defun read-object (buffer)
"reads a key-value pair into the hash"
(declare (type buffer buffer))
(loop until (progn (skip-spaces buffer)
(eql (fetch-char buffer) #\})) ; we may fetch-char here, as the character is a #\, to be skipped if it is not a #\}
collect (cons (read-key buffer)
(progn (skip-to buffer #\:)
(read-value buffer)))))
(defun read-partial-object (buffer tree)
"Reads an object from the buffer, but only when the key matches a key in the tree"
(declare (type buffer buffer)
(type (or cons nil) tree))
(loop until (progn (skip-spaces buffer)
(eql (fetch-char buffer) #\})) ; we may fetch-char here, as the character is a #\, to be skipped if it is not a #\}
append (multiple-value-bind (found-p key)
(read-partial-key buffer tree)
(progn (skip-to buffer #\:)
(if found-p
(list (cons key (read-value buffer)))
(progn (skip-value buffer) nil))))))
(defun skip-object (buffer)
"Skips an object from the buffer"
(declare (type buffer buffer))
(loop until (progn (skip-spaces buffer)
(eql (fetch-char buffer) #\})) ; we may read-char here, as the character is a , to be skipped if it is not a }
do (skip-key buffer) (skip-value buffer)))
(defun read-partial-key (buffer tree)
"reads a key from the buffer. Returns (values key T) if the key was found as a valid key in the tree, or (values nil nil) if it was not"
(declare (type buffer buffer)
(type (or cons nil) tree))
(skip-to buffer #\")
(multiple-value-bind (accepted-p solution)
(subseq-tree buffer #\" tree)
(declare (type (or nil T) accepted-p)
(type simple-string solution))
(skip-to buffer #\") ;; clean up mess
(values accepted-p solution)))
(defun read-key (buffer)
"reads a key from the key-value list"
(declare (type buffer buffer))
(skip-to buffer #\")
(parse-string buffer))
(defun skip-key (buffer)
"reads a key from the key-value list"
(declare (type buffer buffer))
(skip-to buffer #\")
(skip-string buffer))
(defun read-value (buffer)
"Reads a value from the stream.
This searches for the first meaningful character, and delegates to the right function for that character"
(declare (type buffer buffer))
(skip-spaces buffer)
(case (fetch-char buffer)
(#\" (parse-string buffer))
(#\{ (read-object buffer))
(#\[ (read-array buffer))
(#\t (incf (buffer-index buffer) 3)
T)
(#\f (incf (buffer-index buffer) 4)
nil)
(#\n (incf (buffer-index buffer) 3)
nil)
(T (read-number buffer))))
(defun skip-value (buffer)
"Skips a value from the stream.
This searches for the first meaningful character, and delegates to the right function for skipping that"
(declare (type buffer buffer))
(skip-spaces buffer)
(case (fetch-char buffer)
(#\" (skip-string buffer))
(#\{ (skip-object buffer))
(#\[ (skip-array buffer))
(#\t (incf (buffer-index buffer) 3))
(#\f (incf (buffer-index buffer) 4))
(#\n (incf (buffer-index buffer) 3))
(T (skip-number buffer)))
(values))
(defun skip-string (buffer)
(declare (type buffer buffer))
"Skips the contents of an input string from the buffer. Assumes the first #\" has been read"
(skip-to/ buffer #\"))
(defun parse-string (buffer)
"Reads a JSON string from the stream (assumes the first \" is missing and NO escaped characters are in there"
(declare (type buffer buffer))
(let ((result (subseq-until/ buffer #\")))
(next-char buffer)
result))
(defun skip-array (buffer)
(declare (type buffer buffer))
"Skips the contents of an array from the buffer. Assumes the first #\[ is already read from the buffer"
(decr-char buffer)
(loop
until (progn (skip-spaces buffer)
(next-char buffer)
(eql (peek-behind-char buffer) #\]))
do (skip-value buffer))
)
(defun read-array (buffer)
"Reads a JSON array from the stream (assumes the first [ is missing"
(declare (type buffer buffer))
(decr-char buffer)
(loop
until (progn (skip-spaces buffer)
(next-char buffer)
(eql (peek-behind-char buffer) #\]))
collect (read-value buffer)))
(defun read-number (buffer)
(declare (type buffer buffer))
(decr-char buffer)
(let ((whole-part (parse-integer (subseq-until buffer #\] #\} #\, #\.)))) ;; only these chars can delimit the whole part of a number
(if (eql (current-char buffer) #\.)
(progn
(next-char buffer)
(let ((float-part (parse-integer (subseq-until buffer #\] #\} #\,)))) ;; only these characters are allowed to actually end a number
(+ whole-part (/ float-part (expt 10 (mark-length buffer))))))
whole-part)))
(defun skip-number (buffer)
(declare (type buffer buffer))
(decr-char buffer)
(skip-until* buffer #\] #\} #\,))
(defun parse (string &rest keywords-to-read)
"Reads a json object from the given string, with the given keywords being the keywords which are fetched from the object"
(let ((buffer (build-buffer string)))
(skip-spaces buffer)
(if keywords-to-read
(read-partial-object buffer (apply #'build-character-tree keywords-to-read))
(read-object buffer))))
(define-compiler-macro parse (&whole whole string &rest keywords-to-read) ; this allows the character tree to be precompiled
(if keywords-to-read
`(let ((buffer (build-buffer ,string)))
(skip-spaces buffer)
(read-partial-object buffer (build-character-tree ,@keywords-to-read)))
whole))