Skip to content

Commit 0819553

Browse files
LukeShubyroot
authored andcommitted
Delete code that is based on CVTUTF
I did this based on manual inspection, comparing the code to my re-created history of CVTUTF at https://git.lukeshu.com/2git/cvtutf/ (created by the scripts at https://git.lukeshu.com/2git/cvtutf-make/)
1 parent 98ca195 commit 0819553

File tree

3 files changed

+0
-268
lines changed

3 files changed

+0
-268
lines changed

ext/json/ext/generator/generator.c

Lines changed: 0 additions & 232 deletions
Original file line numberDiff line numberDiff line change
@@ -18,54 +18,6 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
1818
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
1919
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
2020

21-
/*
22-
* Copyright 2001-2004 Unicode, Inc.
23-
*
24-
* Disclaimer
25-
*
26-
* This source code is provided as is by Unicode, Inc. No claims are
27-
* made as to fitness for any particular purpose. No warranties of any
28-
* kind are expressed or implied. The recipient agrees to determine
29-
* applicability of information provided. If this file has been
30-
* purchased on magnetic or optical media from Unicode, Inc., the
31-
* sole remedy for any claim will be exchange of defective media
32-
* within 90 days of receipt.
33-
*
34-
* Limitations on Rights to Redistribute This Code
35-
*
36-
* Unicode, Inc. hereby grants the right to freely use the information
37-
* supplied in this file in the creation of products supporting the
38-
* Unicode Standard, and to make copies of this file in any form
39-
* for internal or external distribution as long as this notice
40-
* remains attached.
41-
*/
42-
43-
/*
44-
* Index into the table below with the first byte of a UTF-8 sequence to
45-
* get the number of trailing bytes that are supposed to follow it.
46-
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47-
* left as-is for anyone who may want to do such conversion, which was
48-
* allowed in earlier algorithms.
49-
*/
50-
static const char trailingBytesForUTF8[256] = {
51-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
59-
};
60-
61-
/*
62-
* Magic values subtracted from a buffer value during UTF8 conversion.
63-
* This table contains as many values as there might be trailing bytes
64-
* in a UTF-8 sequence.
65-
*/
66-
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
67-
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
68-
6921
/* Escapes the UTF16 character and stores the result in the buffer buf. */
7022
static void unicode_escape(char *buf, UTF16 character)
7123
{
@@ -94,98 +46,6 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip
9446
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
9547
char buf[6] = { '\\', 'u' };
9648

97-
int ascii_only = rb_enc_str_asciionly_p(string);
98-
99-
if (!ascii_only) {
100-
if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
101-
rb_raise(rb_path2class("JSON::GeneratorError"),
102-
"source sequence is illegal/malformed utf-8");
103-
}
104-
}
105-
106-
while (source < sourceEnd) {
107-
UTF32 ch = 0;
108-
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
109-
/*
110-
* The cases all fall through. See "Note A" below.
111-
*/
112-
switch (extraBytesToRead) {
113-
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
114-
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
115-
case 3: ch += *source++; ch <<= 6;
116-
case 2: ch += *source++; ch <<= 6;
117-
case 1: ch += *source++; ch <<= 6;
118-
case 0: ch += *source++;
119-
}
120-
ch -= offsetsFromUTF8[extraBytesToRead];
121-
122-
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
123-
/* UTF-16 surrogate values are illegal in UTF-32 */
124-
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
125-
#if UNI_STRICT_CONVERSION
126-
source -= (extraBytesToRead+1); /* return to the illegal value itself */
127-
rb_raise(rb_path2class("JSON::GeneratorError"),
128-
"source sequence is illegal/malformed utf-8");
129-
#else
130-
unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
131-
#endif
132-
} else {
133-
/* normal case */
134-
if (ch >= 0x20 && ch <= 0x7f) {
135-
switch (ch) {
136-
case '\\':
137-
fbuffer_append(buffer, "\\\\", 2);
138-
break;
139-
case '"':
140-
fbuffer_append(buffer, "\\\"", 2);
141-
break;
142-
case '/':
143-
if(script_safe) {
144-
fbuffer_append(buffer, "\\/", 2);
145-
break;
146-
}
147-
default:
148-
fbuffer_append_char(buffer, (char)ch);
149-
break;
150-
}
151-
} else {
152-
switch (ch) {
153-
case '\n':
154-
fbuffer_append(buffer, "\\n", 2);
155-
break;
156-
case '\r':
157-
fbuffer_append(buffer, "\\r", 2);
158-
break;
159-
case '\t':
160-
fbuffer_append(buffer, "\\t", 2);
161-
break;
162-
case '\f':
163-
fbuffer_append(buffer, "\\f", 2);
164-
break;
165-
case '\b':
166-
fbuffer_append(buffer, "\\b", 2);
167-
break;
168-
default:
169-
unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
170-
break;
171-
}
172-
}
173-
}
174-
} else if (ch > UNI_MAX_UTF16) {
175-
#if UNI_STRICT_CONVERSION
176-
source -= (extraBytesToRead+1); /* return to the start */
177-
rb_raise(rb_path2class("JSON::GeneratorError"),
178-
"source sequence is illegal/malformed utf8");
179-
#else
180-
unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
181-
#endif
182-
} else {
183-
/* target is a character in range 0xFFFF - 0x10FFFF. */
184-
ch -= halfBase;
185-
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
186-
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
187-
}
188-
}
18949
RB_GC_GUARD(string);
19050
}
19151

@@ -202,98 +62,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
20262
unsigned char c;
20363
char buf[6] = { '\\', 'u' };
20464
int ascii_only = rb_enc_str_asciionly_p(string);
205-
206-
if (!ascii_only) {
207-
if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
208-
rb_raise(rb_path2class("JSON::GeneratorError"),
209-
"source sequence is illegal/malformed utf-8");
210-
}
211-
}
212-
213-
for (start = 0, end = 0; end < len;) {
214-
p = ptr + end;
215-
c = (unsigned char) *p;
216-
if (c < 0x20) {
217-
switch (c) {
218-
case '\n':
219-
escape = "\\n";
220-
escape_len = 2;
221-
break;
222-
case '\r':
223-
escape = "\\r";
224-
escape_len = 2;
225-
break;
226-
case '\t':
227-
escape = "\\t";
228-
escape_len = 2;
229-
break;
230-
case '\f':
231-
escape = "\\f";
232-
escape_len = 2;
233-
break;
234-
case '\b':
235-
escape = "\\b";
236-
escape_len = 2;
237-
break;
238-
default:
239-
unicode_escape(buf, (UTF16) *p);
240-
escape = buf;
241-
escape_len = 6;
242-
break;
243-
}
244-
} else {
245-
switch (c) {
246-
case '\\':
247-
escape = "\\\\";
248-
escape_len = 2;
249-
break;
250-
case '"':
251-
escape = "\\\"";
252-
escape_len = 2;
253-
break;
254-
case '/':
255-
if(script_safe) {
256-
escape = "\\/";
257-
escape_len = 2;
258-
break;
259-
}
260-
default:
261-
{
262-
unsigned short clen = 1;
263-
if (!ascii_only) {
264-
clen += trailingBytesForUTF8[c];
265-
if (end + clen > len) {
266-
rb_raise(rb_path2class("JSON::GeneratorError"),
267-
"partial character in source, but hit end");
268-
}
269-
270-
if (script_safe && c == 0xE2) {
271-
unsigned char c2 = (unsigned char) *(p+1);
272-
unsigned char c3 = (unsigned char) *(p+2);
273-
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
274-
fbuffer_append(buffer, ptr + start, end - start);
275-
start = end = (end + clen);
276-
if (c3 == 0xA8) {
277-
fbuffer_append(buffer, "\\u2028", 6);
278-
} else {
279-
fbuffer_append(buffer, "\\u2029", 6);
280-
}
281-
continue;
282-
}
283-
}
284-
}
285-
end += clen;
286-
}
287-
continue;
288-
break;
289-
}
290-
}
291-
fbuffer_append(buffer, ptr + start, end - start);
292-
fbuffer_append(buffer, escape, escape_len);
293-
start = ++end;
294-
escape = NULL;
295-
}
296-
fbuffer_append(buffer, ptr + start, end - start);
29765
}
29866

29967
static char *fstrndup(const char *ptr, unsigned long len) {

ext/json/ext/generator/generator.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,6 @@
2222

2323
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
2424

25-
/* unicode definitions */
26-
27-
#define UNI_STRICT_CONVERSION 1
28-
29-
typedef unsigned long UTF32; /* at least 32 bits */
30-
typedef unsigned short UTF16; /* at least 16 bits */
31-
typedef unsigned char UTF8; /* typically 8 bits */
32-
33-
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
34-
#define UNI_MAX_BMP (UTF32)0x0000FFFF
35-
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
36-
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
37-
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
38-
39-
#define UNI_SUR_HIGH_START (UTF32)0xD800
40-
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
41-
#define UNI_SUR_LOW_START (UTF32)0xDC00
42-
#define UNI_SUR_LOW_END (UTF32)0xDFFF
43-
44-
static const int halfShift = 10; /* used for shifting by 10 bits */
45-
46-
static const UTF32 halfBase = 0x0010000UL;
47-
static const UTF32 halfMask = 0x3FFUL;
48-
4925
static void unicode_escape(char *buf, UTF16 character);
5026
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
5127
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);

ext/json/ext/parser/parser.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,6 @@
1919

2020
#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
2121

22-
/* unicode */
23-
24-
typedef unsigned long UTF32; /* at least 32 bits */
25-
typedef unsigned short UTF16; /* at least 16 bits */
26-
typedef unsigned char UTF8; /* typically 8 bits */
27-
28-
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
29-
#define UNI_SUR_HIGH_START (UTF32)0xD800
30-
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
31-
#define UNI_SUR_LOW_START (UTF32)0xDC00
32-
#define UNI_SUR_LOW_END (UTF32)0xDFFF
33-
3422
typedef struct JSON_ParserStruct {
3523
VALUE Vsource;
3624
char *source;

0 commit comments

Comments
 (0)