Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

[GH #845] Fix data_json \unnnn encoding

\u needs to be encoded as utf16 char, not utf8 because they
are binary different. E.g. \u00a2 is 0xc2a2 in utf8.

Add utf16_chr vtable method for the Parrot_utf16_encoding.
Add unicode tests to data_json.
  • Loading branch information...
commit f50380b17eba4c85080e108d02f3e7421c84676b 1 parent 988986b
@rurban rurban authored
View
3  compilers/data_json/data_json/pge2pir.tg
@@ -183,7 +183,10 @@ transform pir (string) {
$S1 = substr $S0, 2, 4
$P1 = $S1
$I0 = $P1.'to_int'(16)
+ # encode to utf16 not utf8. GH #845
$S0 = chr $I0
+ $I0 = find_encoding 'utf16'
+ $S0 = trans_encoding $S0, $I0
goto char
escape:
$P0 = get_root_global ['parrot'; 'data_json'], '$escapes'
View
4 src/ops/string.ops
@@ -59,6 +59,10 @@ inline op ord(out INT, in STR, in INT) {
The character specified by codepoint integer $2 is returned in string $1.
+For characters > 0xff an utf8 encoded string is returned,
+for characters between 0x7f and 0xff a latin1 encoded string is returned,
+for characters below 0x7f an ascii encoded string.
+
=cut
inline op chr(out STR, in INT) {
View
41 src/string/encoding/utf16.c
@@ -1,5 +1,5 @@
/*
-Copyright (C) 2001-2010, Parrot Foundation.
+Copyright (C) 2001-2012, Parrot Foundation.
=head1 NAME
@@ -80,6 +80,11 @@ static UINTVAL utf16_ord(PARROT_INTERP,
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+PARROT_CANNOT_RETURN_NULL
+static STRING *
+utf16_chr(PARROT_INTERP, UINTVAL codepoint)
+ __attribute__nonnull__(1);
+
static INTVAL utf16_partial_scan(PARROT_INTERP,
ARGIN(const char *buf),
ARGMOD(Parrot_String_Bounds *bounds))
@@ -136,6 +141,9 @@ static STRING * utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
#define ASSERT_ARGS_utf16_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf16_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(codepoint))
#define ASSERT_ARGS_utf16_partial_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(buf) \
@@ -462,6 +470,35 @@ utf16_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
return utf16_decode(interp, start);
}
+/*
+
+=item C<STRING * utf16_chr(PARROT_INTERP, UINTVAL codepoint)>
+
+Returns a one-codepoint string for the given codepoint.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING *
+utf16_chr(PARROT_INTERP, UINTVAL codepoint)
+{
+ ASSERT_ARGS(utf16_chr)
+ String_iter iter;
+ STRING * const dest = Parrot_str_new_init(interp, NULL, 4,
+ Parrot_utf16_encoding_ptr, 0);
+
+ dest->bufused = 4;
+ dest->strlen = 1;
+
+ STRING_ITER_INIT(interp, &iter);
+ STRING_iter_set_and_advance(interp, dest, &iter, codepoint);
+ dest->bufused = iter.bytepos;
+
+ return dest;
+}
+
/*
@@ -590,7 +627,7 @@ static STR_VTABLE Parrot_utf16_encoding = {
4, /* Max bytes per codepoint */
utf16_to_encoding,
- unicode_chr,
+ utf16_chr,
encoding_equal,
encoding_compare,
View
17 t/compilers/data_json/from_parrot.t
@@ -1,5 +1,5 @@
#!./parrot
-# Copyright (C) 2001-2010, Parrot Foundation.
+# Copyright (C) 2001-2012, Parrot Foundation.
=head1 NAME
@@ -13,11 +13,13 @@ t/compilers/data_json/from_parrot.t - test parrot to JSON conversion.
Tests JSON->Parrot conversions.
+Note: This uses the old JSON.pbc not the new data_json compiler.
+
=cut
.sub main :main
.include 'test_more.pir'
- plan(39)
+ plan(40)
load_bytecode 'JSON.pbc'
test_create_json_of_an_empty_string()
@@ -356,7 +358,7 @@ OUTPUT
.end
-# no. 24..27
+# no. 24..28
.sub test_create_json_of_string_pmcs
.local pmc s
@@ -373,10 +375,13 @@ OUTPUT
is($S0, '"12345\"67890"', 'Create JSON of String PMCs')
$S0 = _json( s, 1 )
is($S0, "\"12345\\\"67890\"\n", 'Create JSON of String PMCs')
+ s = utf16:"\x{0}\u203e Pl\x{e4}ne"
+ $S0 = _json( s, 0 )
+ is($S0, '"\x{0}\u203e Pl\x{e4}ne"', 'Create JSON of String PMCs')
.end
-# no. 28..31
+# no. 29..32
.sub test_create_json_of_integer_pmcs
.local pmc i
@@ -396,7 +401,7 @@ OUTPUT
.end
-# no. 32..35
+# no. 33..36
.sub test_create_json_of_boolean_pmcs
.local pmc b
@@ -416,7 +421,7 @@ OUTPUT
.end
-# no. 36..39
+# no. 37..40
.sub test_create_json_of_null_and_undef
.local pmc n
null n
View
21 t/compilers/data_json/to_parrot.t
@@ -1,12 +1,12 @@
#!perl
-# Copyright (C) 2001-2008, Parrot Foundation.
+# Copyright (C) 2001-2012, Parrot Foundation.
use strict;
use warnings;
use lib qw( t . lib ../lib ../../lib );
use Test::More;
-use Parrot::Test tests => 60;
+use Parrot::Test tests => 61;
=head1 NAME
@@ -20,6 +20,8 @@ t/compilers/data_json/to_parrot.t - test JSON to parrot conversions
Tests JSON->Parrot conversions.
+Note: This uses the new data_json compiler.
+
=cut
json_dump_is( <<'JSON', <<'OUT', 'empty string' );
@@ -677,6 +679,21 @@ JSON
]
OUT
+json_dump_is( <<'JSON', <<'OUT', 'unicode chars' );
+["\u0000","\u00e4","\u007f","\u0080","\u0100","\u203e","Pl\u00e4ne"]
+JSON
+"JSON" => ResizablePMCArray (size:7) [
+ "\x{0}",
+ "\x{e4}",
+ "\x{7f}",
+ "\x{80}",
+ "\u0100",
+ "\u203e",
+ "Pl\x{e4}ne"
+]
+OUT
+
+
# GH #570 Need many more tests, exercising all aspects of http://www.json.org/
sub json_dump_is {
Please sign in to comment.
Something went wrong with that request. Please try again.