@@ -25,96 +25,77 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
25
25
* Everything else (should be UTF-8) is just passed through and
26
26
* appended to the result.
27
27
*/
28
- static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
28
+ static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE str , const bool escape_table [ 256 ] , bool out_script_safe )
29
29
{
30
30
const char * hexdig = "0123456789abcdef" ;
31
31
char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
32
32
33
- const char * in_utf8_str = RSTRING_PTR (in_string );
34
- unsigned long in_utf8_len = RSTRING_LEN (in_string );
33
+ const char * ptr = RSTRING_PTR (str );
34
+ unsigned long len = RSTRING_LEN (str );
35
35
36
36
unsigned long beg = 0 , pos ;
37
37
38
- for (pos = 0 ; pos < in_utf8_len ;) {
39
- uint32_t ch ;
40
- short ch_len ;
41
- bool should_escape ;
42
-
43
- /* UTF-8 decoding */
44
- short i ;
45
- if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
46
- else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
47
- else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
48
- else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
49
- else {
50
- rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
51
- }
52
-
53
- for (i = 1 ; i < ch_len ; i ++ ) {
54
- ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
55
- }
56
-
57
- /* JSON policy */
58
- should_escape =
59
- (ch < 0x20 ) ||
60
- (ch == '"' ) ||
61
- (ch == '\\' ) ||
62
- (out_script_safe && (ch == '/' )) ||
63
- (out_script_safe && (ch == 0x2028 )) ||
64
- (out_script_safe && (ch == 0x2029 ));
65
-
38
+ for (pos = 0 ; pos < len ;) {
39
+ unsigned char ch = ptr [pos ];
66
40
/* JSON encoding */
67
- if (should_escape ) {
68
- if (pos > beg ) {
69
- fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
70
- }
71
-
72
- beg = pos + ch_len ;
41
+ if (escape_table [ch ]) {
42
+ #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
73
43
switch (ch ) {
74
- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
75
- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
76
- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
77
- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
78
- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
79
- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
80
- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
81
- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
82
- default :
83
- if (ch <= 0xFFFF ) {
44
+ case '"' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
45
+ case '\\' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
46
+ case '/' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\/" , 2 ); break ;
47
+ case '\b' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\b" , 2 ); break ;
48
+ case '\f' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\f" , 2 ); break ;
49
+ case '\n' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\n" , 2 ); break ;
50
+ case '\r' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\r" , 2 ); break ;
51
+ case '\t' : FLUSH_POS (1 ); fbuffer_append (out_buffer , "\\t" , 2 ); break ;
52
+ default : {
53
+ if ((ch & 0x80 ) == 0x00 ) { /* leading 1 bit is 0b0 */
54
+ FLUSH_POS (1 );
84
55
scratch [2 ] = hexdig [ch >> 12 ];
85
56
scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
86
57
scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
87
58
scratch [5 ] = hexdig [ch & 0xf ];
88
59
fbuffer_append (out_buffer , scratch , 6 );
60
+ } else if ((ch & 0xE0 ) == 0xC0 ) { /* leading 3 bits are 0b110 */
61
+ pos += 2 ;
62
+ } else if ((ch & 0xF0 ) == 0xE0 ) { /* leading 4 bits are 0b1110 */
63
+ unsigned char b2 = ptr [pos + 1 ];
64
+ unsigned char b3 = ptr [pos + 2 ];
65
+ if (out_script_safe && (b2 == 0x80 )) {
66
+ if (b3 == 0xA8 ) {
67
+ FLUSH_POS (3 );
68
+ fprintf (stderr , "escape: \\u2028 pos = %ld\n" , pos );
69
+ fbuffer_append (out_buffer , "\\u2028" , 6 );
70
+ } else if (b3 == 0xA9 ) {
71
+ FLUSH_POS (3 );
72
+ fprintf (stderr , "escape: \\u2029 pos = %ld\n" , pos );
73
+ fbuffer_append (out_buffer , "\\u2029" , 6 );
74
+ } else {
75
+ pos += 3 ;
76
+ }
77
+ } else {
78
+ pos += 3 ;
79
+ }
80
+ } else if ((ch & 0xF8 ) == 0xF0 ) { /* leading 5 bits are 0b11110 */
81
+ pos += 4 ;
89
82
} else {
90
- uint16_t hi , lo ;
91
- ch -= 0x10000 ;
92
- hi = 0xD800 + (uint16_t )(ch >> 10 );
93
- lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
94
-
95
- scratch [2 ] = hexdig [hi >> 12 ];
96
- scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
97
- scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
98
- scratch [5 ] = hexdig [hi & 0xf ];
99
-
100
- scratch [8 ] = hexdig [lo >> 12 ];
101
- scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
102
- scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
103
- scratch [11 ] = hexdig [lo & 0xf ];
104
-
105
- fbuffer_append (out_buffer , scratch , 12 );
83
+ // This should be unreachable
84
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
106
85
}
86
+ }
107
87
}
88
+ } else {
89
+ pos ++ ;
108
90
}
109
-
110
- pos += ch_len ;
111
91
}
92
+ #undef FLUSH_POS
112
93
113
- if (beg < in_utf8_len ) {
114
- fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
94
+ if (beg < len ) {
95
+ fbuffer_append (out_buffer , & ptr [beg ], len - beg );
115
96
}
116
97
117
- RB_GC_GUARD (in_string );
98
+ RB_GC_GUARD (str );
118
99
}
119
100
120
101
static const bool escape_table [256 ] = {
@@ -736,7 +717,7 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
736
717
if (RB_UNLIKELY (state -> ascii_only )) {
737
718
convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe );
738
719
} else {
739
- convert_UTF8_to_JSON (buffer , obj , state -> script_safe );
720
+ convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table , state -> script_safe );
740
721
}
741
722
break ;
742
723
default :
0 commit comments