Permalink
Browse files

MFH Fix bug #46944 - UTF-8 characters outside the BMP aren't encoded …

…correctly.
  • Loading branch information...
1 parent 8d62f3d commit 0bdbc4e356d81fd2fd7e1f2391e24e76c2f3b8f6 Scott MacVicar committed Jan 2, 2009
Showing with 34 additions and 2 deletions.
  1. +32 −0 ext/json/tests/bug46944.phpt
  2. +1 −1 ext/json/utf8_decode.c
  3. +1 −1 ext/json/utf8_to_utf16.c
@@ -0,0 +1,32 @@
+--TEST--
+Bug #46944 (json_encode() doesn't handle 3 byte utf8 correctly)
+--SKIPIF--
+<?php if (!extension_loaded('json')) print 'skip'; ?>
+--FILE--
+<?php
+
+for ($i = 1; $i <= 16; $i++) {
+ echo json_encode(b"aa" . (0xf0|($i >> 2)) . (0x8f|($i & 3) << 4) . "\xbf\xbdzz") . "\n";
+}
+
+
+echo "Done\n";
+?>
+--EXPECT--
+"aa\ud83f\udffdzz"
+"aa\ud87f\udffdzz"
+"aa\ud8bf\udffdzz"
+"aa\ud8ff\udffdzz"
+"aa\ud93f\udffdzz"
+"aa\ud97f\udffdzz"
+"aa\ud9bf\udffdzz"
+"aa\ud9ff\udffdzz"
+"aa\uda3f\udffdzz"
+"aa\uda7f\udffdzz"
+"aa\udabf\udffdzz"
+"aa\udaff\udffdzz"
+"aa\udb3f\udffdzz"
+"aa\udb7f\udffdzz"
+"aa\udbbf\udffdzz"
+"aa\udbff\udffdzz"
+Done
View
@@ -165,7 +165,7 @@ utf8_decode_next(json_utf8_decode *utf8)
/*
Three continuation (65536 to 1114111)
*/
- if ((c & 0xF1) == 0xF0) {
+ if ((c & 0xF8) == 0xF0) {
int c1 = cont(utf8);
int c2 = cont(utf8);
int c3 = cont(utf8);
View
@@ -46,7 +46,7 @@ utf8_to_utf16(unsigned short w[], char p[], int length)
w[the_index] = (unsigned short)c;
the_index += 1;
} else {
- c &= 0xFFFF;
+ c -= 0x10000;
w[the_index] = (unsigned short)(0xD800 | (c >> 10));
the_index += 1;
w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));

0 comments on commit 0bdbc4e

Please sign in to comment.