@@ -18,54 +18,6 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
18
18
i_aref , i_send , i_respond_to_p , i_match , i_keys , i_depth ,
19
19
i_buffer_initial_length , i_dup , i_script_safe , i_escape_slash , i_strict ;
20
20
21
- /*
22
- * Copyright 2001-2004 Unicode, Inc.
23
- *
24
- * Disclaimer
25
- *
26
- * This source code is provided as is by Unicode, Inc. No claims are
27
- * made as to fitness for any particular purpose. No warranties of any
28
- * kind are expressed or implied. The recipient agrees to determine
29
- * applicability of information provided. If this file has been
30
- * purchased on magnetic or optical media from Unicode, Inc., the
31
- * sole remedy for any claim will be exchange of defective media
32
- * within 90 days of receipt.
33
- *
34
- * Limitations on Rights to Redistribute This Code
35
- *
36
- * Unicode, Inc. hereby grants the right to freely use the information
37
- * supplied in this file in the creation of products supporting the
38
- * Unicode Standard, and to make copies of this file in any form
39
- * for internal or external distribution as long as this notice
40
- * remains attached.
41
- */
42
-
43
- /*
44
- * Index into the table below with the first byte of a UTF-8 sequence to
45
- * get the number of trailing bytes that are supposed to follow it.
46
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47
- * left as-is for anyone who may want to do such conversion, which was
48
- * allowed in earlier algorithms.
49
- */
50
- static const char trailingBytesForUTF8 [256 ] = {
51
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
52
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
53
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
54
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
55
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
56
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
57
- 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
58
- 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 , 3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,4 ,4 ,4 ,4 ,5 ,5 ,5 ,5
59
- };
60
-
61
- /*
62
- * Magic values subtracted from a buffer value during UTF8 conversion.
63
- * This table contains as many values as there might be trailing bytes
64
- * in a UTF-8 sequence.
65
- */
66
- static const UTF32 offsetsFromUTF8 [6 ] = { 0x00000000UL , 0x00003080UL , 0x000E2080UL ,
67
- 0x03C82080UL , 0xFA082080UL , 0x82082080UL };
68
-
69
21
/* Escapes the UTF16 character and stores the result in the buffer buf. */
70
22
static void unicode_escape (char * buf , UTF16 character )
71
23
{
@@ -94,98 +46,6 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip
94
46
const UTF8 * sourceEnd = source + RSTRING_LEN (string );
95
47
char buf [6 ] = { '\\' , 'u' };
96
48
97
- int ascii_only = rb_enc_str_asciionly_p (string );
98
-
99
- if (!ascii_only ) {
100
- if (RB_ENCODING_GET_INLINED (string ) != rb_utf8_encindex () || RB_ENC_CODERANGE (string ) != RUBY_ENC_CODERANGE_VALID ) {
101
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
102
- "source sequence is illegal/malformed utf-8" );
103
- }
104
- }
105
-
106
- while (source < sourceEnd ) {
107
- UTF32 ch = 0 ;
108
- unsigned short extraBytesToRead = trailingBytesForUTF8 [* source ];
109
- /*
110
- * The cases all fall through. See "Note A" below.
111
- */
112
- switch (extraBytesToRead ) {
113
- case 5 : ch += * source ++ ; ch <<= 6 ; /* remember, illegal UTF-8 */
114
- case 4 : ch += * source ++ ; ch <<= 6 ; /* remember, illegal UTF-8 */
115
- case 3 : ch += * source ++ ; ch <<= 6 ;
116
- case 2 : ch += * source ++ ; ch <<= 6 ;
117
- case 1 : ch += * source ++ ; ch <<= 6 ;
118
- case 0 : ch += * source ++ ;
119
- }
120
- ch -= offsetsFromUTF8 [extraBytesToRead ];
121
-
122
- if (ch <= UNI_MAX_BMP ) { /* Target is a character <= 0xFFFF */
123
- /* UTF-16 surrogate values are illegal in UTF-32 */
124
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END ) {
125
- #if UNI_STRICT_CONVERSION
126
- source -= (extraBytesToRead + 1 ); /* return to the illegal value itself */
127
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
128
- "source sequence is illegal/malformed utf-8" );
129
- #else
130
- unicode_escape_to_buffer (buffer , buf , UNI_REPLACEMENT_CHAR );
131
- #endif
132
- } else {
133
- /* normal case */
134
- if (ch >= 0x20 && ch <= 0x7f ) {
135
- switch (ch ) {
136
- case '\\' :
137
- fbuffer_append (buffer , "\\\\" , 2 );
138
- break ;
139
- case '"' :
140
- fbuffer_append (buffer , "\\\"" , 2 );
141
- break ;
142
- case '/' :
143
- if (script_safe ) {
144
- fbuffer_append (buffer , "\\/" , 2 );
145
- break ;
146
- }
147
- default :
148
- fbuffer_append_char (buffer , (char )ch );
149
- break ;
150
- }
151
- } else {
152
- switch (ch ) {
153
- case '\n' :
154
- fbuffer_append (buffer , "\\n" , 2 );
155
- break ;
156
- case '\r' :
157
- fbuffer_append (buffer , "\\r" , 2 );
158
- break ;
159
- case '\t' :
160
- fbuffer_append (buffer , "\\t" , 2 );
161
- break ;
162
- case '\f' :
163
- fbuffer_append (buffer , "\\f" , 2 );
164
- break ;
165
- case '\b' :
166
- fbuffer_append (buffer , "\\b" , 2 );
167
- break ;
168
- default :
169
- unicode_escape_to_buffer (buffer , buf , (UTF16 ) ch );
170
- break ;
171
- }
172
- }
173
- }
174
- } else if (ch > UNI_MAX_UTF16 ) {
175
- #if UNI_STRICT_CONVERSION
176
- source -= (extraBytesToRead + 1 ); /* return to the start */
177
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
178
- "source sequence is illegal/malformed utf8" );
179
- #else
180
- unicode_escape_to_buffer (buffer , buf , UNI_REPLACEMENT_CHAR );
181
- #endif
182
- } else {
183
- /* target is a character in range 0xFFFF - 0x10FFFF. */
184
- ch -= halfBase ;
185
- unicode_escape_to_buffer (buffer , buf , (UTF16 )((ch >> halfShift ) + UNI_SUR_HIGH_START ));
186
- unicode_escape_to_buffer (buffer , buf , (UTF16 )((ch & halfMask ) + UNI_SUR_LOW_START ));
187
- }
188
- }
189
49
RB_GC_GUARD (string );
190
50
}
191
51
@@ -202,98 +62,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
202
62
unsigned char c ;
203
63
char buf [6 ] = { '\\' , 'u' };
204
64
int ascii_only = rb_enc_str_asciionly_p (string );
205
-
206
- if (!ascii_only ) {
207
- if (RB_ENCODING_GET_INLINED (string ) != rb_utf8_encindex () || RB_ENC_CODERANGE (string ) != RUBY_ENC_CODERANGE_VALID ) {
208
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
209
- "source sequence is illegal/malformed utf-8" );
210
- }
211
- }
212
-
213
- for (start = 0 , end = 0 ; end < len ;) {
214
- p = ptr + end ;
215
- c = (unsigned char ) * p ;
216
- if (c < 0x20 ) {
217
- switch (c ) {
218
- case '\n' :
219
- escape = "\\n" ;
220
- escape_len = 2 ;
221
- break ;
222
- case '\r' :
223
- escape = "\\r" ;
224
- escape_len = 2 ;
225
- break ;
226
- case '\t' :
227
- escape = "\\t" ;
228
- escape_len = 2 ;
229
- break ;
230
- case '\f' :
231
- escape = "\\f" ;
232
- escape_len = 2 ;
233
- break ;
234
- case '\b' :
235
- escape = "\\b" ;
236
- escape_len = 2 ;
237
- break ;
238
- default :
239
- unicode_escape (buf , (UTF16 ) * p );
240
- escape = buf ;
241
- escape_len = 6 ;
242
- break ;
243
- }
244
- } else {
245
- switch (c ) {
246
- case '\\' :
247
- escape = "\\\\" ;
248
- escape_len = 2 ;
249
- break ;
250
- case '"' :
251
- escape = "\\\"" ;
252
- escape_len = 2 ;
253
- break ;
254
- case '/' :
255
- if (script_safe ) {
256
- escape = "\\/" ;
257
- escape_len = 2 ;
258
- break ;
259
- }
260
- default :
261
- {
262
- unsigned short clen = 1 ;
263
- if (!ascii_only ) {
264
- clen += trailingBytesForUTF8 [c ];
265
- if (end + clen > len ) {
266
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
267
- "partial character in source, but hit end" );
268
- }
269
-
270
- if (script_safe && c == 0xE2 ) {
271
- unsigned char c2 = (unsigned char ) * (p + 1 );
272
- unsigned char c3 = (unsigned char ) * (p + 2 );
273
- if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9 )) {
274
- fbuffer_append (buffer , ptr + start , end - start );
275
- start = end = (end + clen );
276
- if (c3 == 0xA8 ) {
277
- fbuffer_append (buffer , "\\u2028" , 6 );
278
- } else {
279
- fbuffer_append (buffer , "\\u2029" , 6 );
280
- }
281
- continue ;
282
- }
283
- }
284
- }
285
- end += clen ;
286
- }
287
- continue ;
288
- break ;
289
- }
290
- }
291
- fbuffer_append (buffer , ptr + start , end - start );
292
- fbuffer_append (buffer , escape , escape_len );
293
- start = ++ end ;
294
- escape = NULL ;
295
- }
296
- fbuffer_append (buffer , ptr + start , end - start );
297
65
}
298
66
299
67
static char * fstrndup (const char * ptr , unsigned long len ) {
0 commit comments