/
json.ml
415 lines (389 loc) · 15.5 KB
/
json.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
type number = {
integer: int;
fraction: int;
precision: int;
exponent: int;
}
let make_int n = {
integer = n;
fraction = 0;
precision = 0;
exponent = 0;
}
let make_decimal i f p = {
integer = i;
fraction = f;
precision = p;
exponent = 0;
}
let make_number i f p e = {
integer = i;
fraction = f;
precision = p;
exponent = e;
}
type json =
| Null
| Bool of bool
| Number of number
| String of string
| Array of json list
| Object of dict
and dict = (string, json) Hashtbl.t
let empty_dict () = Hashtbl.create 0
let new_dict key value =
let h = Hashtbl.create 4 in
let () = Hashtbl.add h key value in
h
let new_dict_from_list: (string * json) list -> (string, json) Hashtbl.t = fun ls ->
let h = Hashtbl.create 4 in
let add (key, value) =
Hashtbl.add h key value
in
let () = List.iter add ls in
h
type error =
| EmptyString
| CharMismatch of char
| HexCharExpected
| NullExpected
| BoolExpected
| ExponentRequired
| UnrecognisedEscapeSequence of char
| InvalidStringStart
| InvalidStringEnd
| InvalidArrayElement of error
| InvalidObjectElement of error
| Fatal
| InvalidValue
| OutOfBounds
| Garbage
let sofc chr =
String.make 1 chr
let rec describe = function
| EmptyString -> "Empty string parsed"
| CharMismatch c -> "Expected " ^ sofc(c)
| HexCharExpected -> "Expected hex char"
| NullExpected -> "Expected null"
| BoolExpected -> "Expected bool"
| ExponentRequired -> "Exponent required"
| UnrecognisedEscapeSequence c -> "Unrecognised escape sequence \\" ^ sofc(c)
| InvalidStringStart -> {|String must start with "|}
| InvalidStringEnd -> {|String must end with "|}
| InvalidArrayElement e -> "Failed to parse array element: " ^ (describe e)
| InvalidObjectElement e -> "Failed to parse object element: " ^ (describe e)
| Fatal -> "Should never happen"
| InvalidValue -> "Invalid value"
| OutOfBounds -> "Out of bounds read attempt"
| Garbage -> "garbage found at the end of string"
let is_letter = function
| 'a'..'z' -> true
| 'A'..'Z' -> true
| _ -> false
let is_digit = function
| '0'..'9' -> true
| _ -> false
let is_ws = function
| ' ' -> true
| '\t' -> true
| '\n' -> true
| '\r' -> true
| _ -> false
let hexbytes = Bytes.of_string "0123456789abcdef"
let is_hex ch = Bytes.contains hexbytes (Char.lowercase_ascii ch)
let parse_hex_unsafe ch =
Bytes.index hexbytes (Char.lowercase_ascii ch)
let implode_rev ls =
let size = List.length ls in
let bytes = Bytes.create size in
List.iteri (fun idx ch -> Bytes.set bytes (size - idx - 1) ch) ls;
Bytes.to_string bytes
(** length s returns length of the string s ignoring any trailing whitespaca *)
let length str =
let strlen = String.length str in
let rec proc idx =
if idx >= 0 && is_ws str.[idx] then proc (idx - 1)
else idx + 1
in
if strlen > 0 then proc (strlen - 1)
else 0
let (let=) v f =
match v with
| Error _ as err -> err
| Ok ok -> f ok
(** Parse non empty string into json *)
let parse1 str strlen =
let peek idx = str.[idx]
in
let check idx =
if idx < strlen then Ok ()
else Error (OutOfBounds, idx)
in
let take f idx =
let rec proc last =
if last < strlen && f (peek last) then proc (last + 1)
else last
in
let last = proc idx in
let len = last - idx in
match len with
| 0 -> Error (EmptyString, idx)
| _ -> Ok (String.sub str idx len, last)
in
let ask f idx =
let rec proc acc idx =
if idx < strlen then
match f idx with
| Ok (Some c, idx') -> proc (c :: acc) idx'
| Ok (None, idx') -> Ok (implode_rev acc, idx')
| Error e -> Error e
else Ok (implode_rev acc, idx)
in
let= (data, idx') = proc [] idx in
Ok (data, idx')
in
let skip f idx =
let rec proc last =
if last < strlen && f (peek last) then proc (last + 1)
else last
in
proc idx
in
let skip_ws idx = skip is_ws idx
in
let chr ch idx =
if idx < strlen && peek idx = ch then Ok (ch, idx + 1)
else Error (CharMismatch ch, idx)
in
let nhex acc idx =
if idx < strlen && is_hex (peek idx) then
Ok (16 * acc + parse_hex_unsafe (peek idx), idx + 1)
else Error (HexCharExpected, idx)
in
let hexword idx =
let= (a, idx) = nhex 0 idx in
let= (b, idx) = nhex a idx in
let= (c, idx) = nhex b idx in
let= (d, idx) = nhex c idx in
Ok (d, idx)
in
let parse_null idx =
match take is_letter idx with
| Ok ("null", idx') -> Ok (Null, idx')
| _ -> Error (NullExpected, idx)
in
let parse_bool idx =
match take is_letter idx with
| Ok ("true", idx') -> Ok (Bool true, idx')
| Ok ("false", idx') -> Ok (Bool false, idx')
| _ -> Error (BoolExpected, idx)
in
let parse_number idx =
(* parse sign *)
let sign, idx =
match chr '-' idx with
| Ok (_, idx') -> -1, idx'
| Error _ -> 1, idx
in
(* parse integral part *)
let= (n, idx) = take is_digit idx in
let integer = sign * int_of_string n in
(* parse fraction part *)
let (fraction, precision, idx) =
match chr '.' idx with
| Error (_, idx) -> (0, 0, idx)
| Ok (_, idx) ->
match take is_digit idx with
| Error (_, idx) -> (0, 0, idx)
| Ok (n, idx) -> (int_of_string n, String.length n, idx)
in
(* parse exponent *)
let= (exponent, idx) =
match chr 'e' idx with
| Error (_, idx) -> Ok (0, idx)
| Ok (_, idx) ->
match take is_digit idx with
| Error (_, idx) -> Error (ExponentRequired, idx)
| Ok (n, idx) -> Ok (int_of_string n, idx)
in
Ok (Number (make_number integer fraction precision exponent), idx)
in
let parse_string idx =
let string_char idx =
let= () = check idx in
match peek idx with
| '"' -> Ok (None, idx)
| '\\' ->
(let= () = check (idx + 1) in
match peek (idx + 1) with
| '"' -> Ok (Some '"', idx + 2)
| '\\' -> Ok (Some '\\', idx + 2)
| '/' -> Ok (Some '/', idx + 2)
| 'b' -> Ok (Some '\b', idx + 2)
| 'f' -> Ok (Some '\014', idx + 2) (* '\f' not supported by OCaml? *)
| 'n' -> Ok (Some '\n', idx + 2)
| 'r' -> Ok (Some '\r', idx + 2)
| 't' -> Ok (Some '\t', idx + 2)
| 'u' ->
let= (_hex, idx) = hexword (idx + 2) in
Ok (Some ('?'), idx) (* we don't support unicode, replace with question mark *)
| c -> Error (UnrecognisedEscapeSequence c, idx))
| c -> Ok (Some c, idx + 1)
in
match chr '"' idx with
| Error (_, idx) -> Error (InvalidStringStart, idx)
| Ok (_, idx) ->
let= str, idx = ask string_char idx in
match chr '"' idx with
| Error (_, idx) -> Error (InvalidStringEnd, idx)
| Ok (_, idx) -> Ok (String str, idx)
in
let rec parse_array idx =
let rec parse_next_array_item acc idx =
let idx = skip_ws idx in
match chr ',' idx with
| Error (_, idx) -> Ok (List.rev acc, idx)
| Ok (_, idx) ->
match parse_value idx with
| Error (e, idx) -> Error (InvalidArrayElement e, idx)
| Ok (value, idx) ->
parse_next_array_item (value :: acc) idx
in
let parse_array_items value idx =
parse_next_array_item [value] idx
in
let= (_, idx) = chr '[' idx in
let= (items, idx) =
match parse_value idx with
| Error (_, idx) -> Ok ([], idx)
| Ok (value, idx) -> parse_array_items value idx
in
let idx = skip_ws idx in
let= (_, idx) = chr ']' idx in
Ok (Array items, idx)
and parse_object idx =
let parse_object_item idx =
match parse_string idx with
| Error _ as err -> err
| Ok (String key, idx) -> (
let idx = skip_ws idx in
let= (_, idx) = chr ':' idx in
let idx = skip_ws idx in
match parse_value idx with
| Error (e, idx) -> Error (InvalidObjectElement e, idx)
| Ok (value, idx) -> Ok ((key, value), idx)
)
| Ok _ -> Error (Fatal, idx)
in
let rec parse_next_object_item dict idx =
match chr ',' idx with
| Error (_, idx) -> Ok (dict, idx)
| Ok (_, idx) ->
let idx = skip_ws idx in
let= ((key, value), idx) = parse_object_item idx in
let () = Hashtbl.replace dict key value in
let idx = skip_ws idx in
parse_next_object_item dict idx
in
let parse_object_items key value idx' =
let dict = new_dict key value in
parse_next_object_item dict idx'
in
let parse_object_rest key value idx =
let idx = skip_ws idx in
let= (items, idx) = parse_object_items key value idx in
Ok (items, idx)
in
let= (_, idx) = chr '{' idx in
let idx = skip_ws idx in
let= (pairs, idx) =
if peek idx = '"' then
let= ((key, value), idx) = parse_object_item idx in
parse_object_rest key value idx
else
Ok (empty_dict (), idx)
in
let idx = skip_ws idx in
let= (_, idx) = chr '}' idx in
Ok (Object pairs, idx)
and parse_value idx =
let idx = skip_ws idx in
if idx < strlen then
match peek idx with
| 'n' -> parse_null idx
| 't' | 'f' -> parse_bool idx
| '0'..'9' | '-' -> parse_number idx
| '"' -> parse_string idx
| '[' -> parse_array idx
| '{' -> parse_object idx
| _ -> Error (InvalidValue, idx)
else Error (OutOfBounds, idx)
in
let= (result, idx) = parse_value 0 in
if idx = strlen then Ok result
else Error (Garbage, idx)
(** Parse string into json *)
let parse str =
let strlen = length str in
if strlen > 0 then parse1 str strlen
else Error (EmptyString, 0)
let print_bool chan = function
| false -> output_string chan "false"
| true -> output_string chan "true"
let print_number chan n =
let print_fraction chan fraction =
if fraction > 0 then
Printf.fprintf chan ".%i" fraction
in
let print_exponent chan exponent =
if exponent > 0 then
Printf.fprintf chan "e%i" exponent
in
Printf.fprintf chan "%i%a%a" n.integer print_fraction n.fraction print_exponent n.exponent
let escape = function
| '"' -> "\\\""
| '\\' -> "\\\\"
| '\r' -> "\\r"
| '\n' -> "\\n"
| '\t' -> "\\t"
| '\b' -> "\\b"
| '\014' -> "\\f"
| ch -> sofc ch
let transliterate chan str =
String.iter (fun ch -> output_string chan (escape ch)) str
let rec print_string chan s =
Printf.fprintf chan {|"%a"|} transliterate s
and print_array chan a =
let rec print_array_items chan = function
| [] -> ()
| hd :: tl ->
Printf.fprintf chan ", %a" print hd;
print_array_items chan tl
in
match a with
| [] -> output_string chan "[]"
| [item] -> Printf.fprintf chan "[%a]" print item
| hd :: tl -> Printf.fprintf chan "[%a%a]" print hd print_array_items tl
and print_object chan o =
let print_object_item sep chan (key, value) =
Printf.fprintf chan "%s%a: %a" sep print_string key print value
in
let rec print_object_items chan seq =
match seq () with
| Seq.Nil -> ()
| Seq.Cons (item, next) ->
print_object_item ", " chan item;
print_object_items chan next
in
match Hashtbl.to_seq o () with
| Seq.Nil -> output_string chan "{}"
| Seq.Cons (item, next) ->
Printf.fprintf chan "{%a%a}" (print_object_item "") item print_object_items next
(** Print string representation to given channall chan *)
and print chan = function
| Null -> output_string chan "null"
| Bool b -> print_bool chan b
| Number n -> print_number chan n
| String s -> print_string chan s
| Array a -> print_array chan a
| Object o -> print_object chan o