Skip to content

Commit fb71985

Browse files
committed
Merge: Json Ad-Hoc string parser
Simple ad-hoc JSON parser working in a similar way as `DOMXmlParser` or `SExpParser`, on the simple example, the runtime now is around 1.5 seconds. Once #1885 is merged however, the runtime is ~0.65 seconds, which is nice. Depends on #1884 Pull-Request: #1886 Reviewed-by: Jean Privat <jean@pryen.org> Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
2 parents 6725d2d + 002ff6e commit fb71985

38 files changed

+814
-401
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"message": "Wrong format for `mois` (expected AAAA-MM got 2015\/01)"
2+
"message": "Wrong format for `mois` (expected AAAA-MM got 2015/01)"
33
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"message": "Wrong format for `date` (expected AAAA-MM-DD got 2015\/01\/11)"
2+
"message": "Wrong format for `date` (expected AAAA-MM-DD got 2015/01/11)"
33
}

lib/core/text/native.nit

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,21 @@ redef class Byte
3131
end
3232
end
3333

34+
redef class Int
35+
# Returns the code_point from a utf16 surrogate pair
36+
#
37+
# assert 0xD83DDE02.from_utf16_surr == 0x1F602
38+
fun from_utf16_surr: Int do
39+
var hi = (self & 0xFFFF0000) >> 16
40+
var lo = self & 0xFFFF
41+
var cp = 0
42+
cp += (hi - 0xD800) << 10
43+
cp += lo - 0xDC00
44+
cp += 0x10000
45+
return cp
46+
end
47+
end
48+
3449
# Native strings are simple C char *
3550
extern class NativeString `{ char* `}
3651
# Creates a new NativeString with a capacity of `length`

lib/json/static.nit

Lines changed: 83 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ interface Jsonable
4747
# avoid cyclic references between `append_json` and `to_json` when none are
4848
# implemented.
4949
protected fun to_json_by_append: String do
50-
var buffer = new RopeBuffer
50+
var buffer = new FlatBuffer
5151
append_json(buffer)
5252
return buffer.to_s
5353
end
@@ -89,6 +89,22 @@ end
8989
redef class Text
9090
super Jsonable
9191

92+
# Removes JSON-escaping if necessary in a JSON string
93+
#
94+
# assert "\\\"string\\uD83D\\uDE02\\\"".unescape_json == "\"string😂\""
95+
fun unescape_json: Text do
96+
if not json_need_escape then return self
97+
return self.json_to_nit_string
98+
end
99+
100+
# Does `self` need treatment from JSON to Nit ?
101+
#
102+
# i.e. is there at least one `\` character in it ?
103+
#
104+
# assert not "string".json_need_escape
105+
# assert "\\\"string\\\"".json_need_escape
106+
protected fun json_need_escape: Bool do return has('\\')
107+
92108
redef fun append_json(buffer) do
93109
buffer.add '\"'
94110
for i in [0 .. self.length[ do
@@ -97,19 +113,13 @@ redef class Text
97113
buffer.append "\\\\"
98114
else if char == '\"' then
99115
buffer.append "\\\""
100-
else if char == '\/' then
101-
buffer.append "\\/"
102116
else if char < ' ' then
103117
if char == '\n' then
104118
buffer.append "\\n"
105119
else if char == '\r' then
106120
buffer.append "\\r"
107121
else if char == '\t' then
108122
buffer.append "\\t"
109-
else if char == 0x0C.code_point then
110-
buffer.append "\\f"
111-
else if char == 0x08.code_point then
112-
buffer.append "\\b"
113123
else
114124
buffer.append char.escape_to_utf16
115125
end
@@ -120,13 +130,66 @@ redef class Text
120130
buffer.add '\"'
121131
end
122132

133+
# Escapes `self` from a JSON string to a Nit string
134+
#
135+
# assert "\\\"string\\\"".json_to_nit_string == "\"string\""
136+
# assert "\\nEscape\\t\\n".json_to_nit_string == "\nEscape\t\n"
137+
# assert "\\u0041zu\\uD800\\uDFD3".json_to_nit_string == "Azu𐏓"
138+
protected fun json_to_nit_string: String do
139+
var res = new FlatBuffer.with_capacity(bytelen)
140+
var i = 0
141+
while i < self.length do
142+
var char = self[i]
143+
if char == '\\' then
144+
i += 1
145+
char = self[i]
146+
if char == 'b' then
147+
char = 0x08.code_point
148+
else if char == 'f' then
149+
char = 0x0C.code_point
150+
else if char == 'n' then
151+
char = '\n'
152+
else if char == 'r' then
153+
char = '\r'
154+
else if char == 't' then
155+
char = '\t'
156+
else if char == 'u' then
157+
var code = substring(i + 1, 4)
158+
var hx = code.to_hex
159+
if hx >= 0xD800 and hx <= 0xDFFF then
160+
var lostr = substring(i + 7, 4)
161+
if lostr.length < 4 then
162+
hx = 0xFFFD
163+
else
164+
hx <<= 16
165+
hx += lostr.to_hex
166+
hx = hx.from_utf16_surr
167+
end
168+
i += 6
169+
end
170+
i += 4
171+
char = hx.code_point
172+
end
173+
# `"`, `/` or `\` => Keep `char` as-is.
174+
end
175+
res.add char
176+
i += 1
177+
end
178+
return res.to_s
179+
end
180+
181+
123182
# Encode `self` in JSON.
124183
#
125184
# ~~~
126185
# assert "\t\"http://example.com\"\r\n\0\\".to_json ==
127-
# "\"\\t\\\"http:\\/\\/example.com\\\"\\r\\n\\u0000\\\\\""
186+
# "\"\\t\\\"http://example.com\\\"\\r\\n\\u0000\\\\\""
128187
# ~~~
129-
redef fun to_json do return to_json_by_append
188+
redef fun to_json do
189+
var b = new FlatBuffer.with_capacity(bytelen)
190+
append_json(b)
191+
return b.to_s
192+
end
130193

131194
# Parse `self` as JSON.
132195
#
@@ -173,6 +236,16 @@ redef class Text
173236
end
174237
end
175238

239+
redef class FlatText
240+
redef fun json_need_escape do
241+
var its = items
242+
for i in [first_byte .. last_byte] do
243+
if its[i] == 0x5Cu8 then return true
244+
end
245+
return false
246+
end
247+
end
248+
176249
redef class Buffer
177250

178251
# Append the JSON representation of `jsonable` to `self`.
@@ -424,51 +497,7 @@ end
424497

425498
redef class Nstring
426499
# The represented string.
427-
private fun to_nit_string: String do
428-
var res = new Buffer
429-
var i = 1
430-
while i < text.length - 1 do
431-
var char = text[i]
432-
if char == '\\' then
433-
i += 1
434-
char = text[i]
435-
if char == 'b' then
436-
char = 0x08.code_point
437-
else if char == 'f' then
438-
char = 0x0C.code_point
439-
else if char == 'n' then
440-
char = '\n'
441-
else if char == 'r' then
442-
char = '\r'
443-
else if char == 't' then
444-
char = '\t'
445-
else if char == 'u' then
446-
var escape = new Buffer
447-
escape.append "\\u"
448-
var code = text.substring(i + 1, 4)
449-
escape.append code
450-
var hx = code.to_hex
451-
if hx >= 0xD800 and hx <= 0xDFFF then
452-
var lostr = text.substring(i + 7, 4)
453-
if lostr.length < 4 then
454-
escape.clear
455-
escape.append "\\uFFFD"
456-
else
457-
escape.append "\\u"
458-
escape.append lostr
459-
end
460-
i += 6
461-
end
462-
i += 4
463-
char = escape.from_utf16_escape
464-
end
465-
# `"`, `/` or `\` => Keep `char` as-is.
466-
end
467-
res.add char
468-
i += 1
469-
end
470-
return res.to_s
471-
end
500+
private fun to_nit_string: String do return text.substring(1, text.length - 2).unescape_json.to_s
472501
end
473502

474503
redef class Nvalue_object

0 commit comments

Comments
 (0)