Skip to content

Commit

Permalink
Escaping non-printable UTF-8 characters
Browse files Browse the repository at this point in the history
  • Loading branch information
GaoYang Cao authored and likema committed Apr 6, 2022
1 parent d6b1a3b commit af60fb1
Showing 1 changed file with 53 additions and 46 deletions.
99 changes: 53 additions & 46 deletions src/yajl_encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,64 +64,71 @@ yajl_string_encode(const yajl_print_t print,
case '\b': escaped = "\\b"; escapedLen = 2; break;
case '\t': escaped = "\\t"; escapedLen = 2; break;
default:
switch (c & 0xF8) { /* 11111 000 */
case 0: break;
case 0xC0: /* 110 00 000 */
case 0xC8: /* 110 01 000 */
case 0xD0: /* 110 10 000 */
case 0xD8: /* 110 11 000 */
escaped = hexBuf;
utf16 = (c & 0x1F) << 6;
if ((c = str[++end]) & 0x80) {
utf16 |= c & 0x3F;
} else {
utf16 = str[--end];
}
Utf16ToHex (utf16, hexBuf + 2);
break;
case 0xE0: /* 1110 0 000 */
case 0xE8: /* 1110 1 000 */
escaped = hexBuf;
utf16 = (c & 0x0F) << 12;
if ((c = str[++end]) & 0x80) {
utf16 |= (c & 0x3F) << 6;

if (c < 32) {
escaped = hexBuf;
Utf16ToHex((unsigned short)(c & 0x7F), hexBuf + 2);
} else {
switch (c & 0xF8) { /* 11111 000 */
case 0: break;
case 0xC0: /* 110 00 000 */
case 0xC8: /* 110 01 000 */
case 0xD0: /* 110 10 000 */
case 0xD8: /* 110 11 000 */
escaped = hexBuf;
utf16 = (c & 0x1F) << 6;
if ((c = str[++end]) & 0x80) {
utf16 |= c & 0x3F;
Utf16ToHex (utf16, hexBuf + 2);
break;
} else {
// Fall through for two byte overlong UTF-8 character.
utf16 = str[--end];
}
}

Utf16ToHex (str[mark], hexBuf + 2);
end = mark;
break;
case 0xF0: /* 11110 000 */
escaped = hexBuf;
utf16 = (0xD8 << 8) | ((c & 0x07) << 8);
if ((c = str[++end]) & 0x80) {
utf16 |= c << 2;

Utf16ToHex (utf16, hexBuf + 2);
break;
case 0xE0: /* 1110 0 000 */
case 0xE8: /* 1110 1 000 */
escaped = hexBuf;
utf16 = (c & 0x0F) << 12;
if ((c = str[++end]) & 0x80) {
utf16 |= (c & 0x30) >> 4;
utf16 -= 1 << 6;
Utf16ToHex (utf16, hexBuf + 2);

utf16 |= (0xD9 << 8) | (c & 0x0F) << 6;
utf16 |= (c & 0x3F) << 6;

if ((c = str[++end]) & 0x80) {
utf16 |= c & 0x3F;
Utf16ToHex (utf16, hexBuf + 8);
escapedLen = 12;
Utf16ToHex (utf16, hexBuf + 2);
break;
}
}
}

Utf16ToHex (str[mark], hexBuf + 2);
end = mark;
break;
Utf16ToHex (str[mark], hexBuf + 2);
end = mark;
break;
case 0xF0: /* 11110 000 */
escaped = hexBuf;
utf16 = (0xD8 << 8) | ((c & 0x07) << 8);
if ((c = str[++end]) & 0x80) {
utf16 |= c << 2;

if ((c = str[++end]) & 0x80) {
utf16 |= (c & 0x30) >> 4;
utf16 -= 1 << 6;
Utf16ToHex (utf16, hexBuf + 2);

utf16 |= (0xD9 << 8) | (c & 0x0F) << 6;

if ((c = str[++end]) & 0x80) {
utf16 |= c & 0x3F;
Utf16ToHex (utf16, hexBuf + 8);
escapedLen = 12;
break;
}
}
}

Utf16ToHex (str[mark], hexBuf + 2);
end = mark;
break;
}
}

break;
}
if (escaped != NULL) {
Expand Down

0 comments on commit af60fb1

Please sign in to comment.