Skip to content
Browse files

Merge branch 'gh837_concat'

Fixes [GH #837], a imcc optimizer problem with SREG's and const
not holding STRING*, thus no good encoding information.
  • Loading branch information...
2 parents e923aa3 + 9c81593 commit 81f3b68f6201364a982f4b917b2ddbf56534c011 @rurban rurban committed Sep 21, 2012
Showing with 131 additions and 10 deletions.
  1. +3 −0 ChangeLog
  2. +9 −7 compilers/imcc/optimizer.c
  3. +47 −0 examples/benchmarks/stress_strings1.pir
  4. +48 −0 examples/benchmarks/stress_stringsu.pir
  5. +1 −1 src/string/api.c
  6. +23 −2 t/op/stringu.t
View
3 ChangeLog
@@ -1,5 +1,8 @@
2012-10-16 release 4.9.0
- Core
+ + Keep encoding information in the imcc optimizer, which fixes the
+ concat op but probably many more cases with non-ascii encoded
+ constant strings. [GH #837]
- Documentation
- Tests
- Community
View
16 compilers/imcc/optimizer.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2002-2010, Parrot Foundation.
+ * Copyright (C) 2002-2012, Parrot Foundation.
*/
/*
@@ -1070,13 +1070,15 @@ IMCC_subst_constants(ARGMOD(imc_info_t *imcc), ARGMOD(IMC_Unit *unit),
case 'S':
{
char * const cstr = Parrot_str_to_cstring(imcc->interp, REG_STR(imcc->interp, 0));
-
- r[1] = mk_const(imcc, cstr, r[0]->set);
-
- snprintf(b, sizeof (b), "%p", REG_STR(imcc->interp, 0));
+ const STR_VTABLE* encoding = REG_STR(imcc->interp, 0)->encoding;
+ if (encoding == Parrot_ascii_encoding_ptr) {
+ r[1] = mk_const(imcc, cstr, r[0]->set);
+ }
+ else {
+ snprintf(b, sizeof (b), "%s:\"%s\"", encoding->name, cstr);
+ r[1] = mk_const(imcc, b, 'U');
+ }
Parrot_str_free_cstring(cstr);
-
- break;
}
default:
break;
View
47 examples/benchmarks/stress_strings1.pir
@@ -0,0 +1,47 @@
+# Copyright (C) 2010-2012, Parrot Foundation.
+
+=head1 NAME
+
+examples/benchmarks/stress_strings1.pir - comparison with stress_stringsu.pir
+
+=head1 SYNOPSIS
+
+ % time ./parrot examples/benchmarks/stress_strings1.pir
+ % time ./parrot examples/benchmarks/stress_stringsu.pir
+
+=head1 DESCRIPTION
+
+Create non-encoded strings, running through the imcc optimizer.
+Some of the strings are long-lived, most of them are short lived.
+
+=cut
+
+.sub 'main' :main
+ .local pmc rsa # array of long lived strings.
+ .local pmc args
+ .local int i
+
+ rsa = new ['ResizableStringArray']
+ args = new ['ResizablePMCArray']
+ i = 0
+ push args, i
+ loop:
+ $S0 = "c"
+ args[0] = i
+ sprintf $S1, "%d", args
+ $S2 = concat $S0, $S1
+ $I0 = i % 10 # every 10th string is longlived
+ if $I0 goto inc_i
+ push rsa, $S2
+ inc_i:
+ inc i
+ if i < 10000000 goto loop
+
+.end
+
+
+# Local Variables:
+# mode: pir
+# fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
View
48 examples/benchmarks/stress_stringsu.pir
@@ -0,0 +1,48 @@
+# Copyright (C) 2010-2012, Parrot Foundation.
+
+=head1 NAME
+
+examples/benchmarks/stress_stringsu.pir - GC unicode strings stress-testing
+
+=head1 SYNOPSIS
+
+ % time ./parrot examples/benchmarks/stress_stringsu.pir
+
+=head1 DESCRIPTION
+
+Create encoded strings, running through the imcc optimizer, which reencodes the strings.
+Some of the strings are long-lived, most of them are short lived.
+
+Main purpose - test encoding issues and imcc performance. [GH #873]
+
+=cut
+
+.sub 'main' :main
+ .local pmc rsa # array of long lived strings.
+ .local pmc args
+ .local int i
+
+ rsa = new ['ResizableStringArray']
+ args = new ['ResizablePMCArray']
+ i = 0
+ push args, i
+ loop:
+ $S0 = utf8:"\x{a2}"
+ args[0] = i
+ sprintf $S1, "%d", args
+ $S2 = concat $S0, $S1
+ $I0 = i % 10 # every 10th string is longlived
+ if $I0 goto inc_i
+ push rsa, $S2
+ inc_i:
+ inc i
+ if i < 10000000 goto loop
+
+.end
+
+
+# Local Variables:
+# mode: pir
+# fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
View
2 src/string/api.c
@@ -700,7 +700,7 @@ STRING *encodingname)>
Given a buffer and an encoding, creates and returns a new string. If buffer is
NULL the result is a null string. Otherwise, the buffer should be a zero
terminated c-style string and its content must be valid for the encoding
-specified. If encoding is null, assume plaftorm encoding.
+specified. If encoding is null, assume platform encoding.
=cut
View
25 t/op/stringu.t
@@ -1,11 +1,11 @@
#!perl
-# Copyright (C) 2001-2009, Parrot Foundation.
+# Copyright (C) 2001-2012, Parrot Foundation.
use strict;
use warnings;
use lib qw( . lib ../lib ../../lib );
use Test::More;
-use Parrot::Test tests => 47;
+use Parrot::Test tests => 48;
use Parrot::Config;
=head1 NAME
@@ -22,6 +22,27 @@ Tests Parrot unicode string system.
=cut
+pir_output_is(<<'CODE',<<'OUTPUT', 'non-ascii immc optimizer GH#837');
+.sub main
+ $S1 = utf8:"\x{a2}"
+ say $S1
+
+ concat $S1, unicode:"\x{a2}", unicode:"\x{a2}"
+ say $S1
+
+ concat $S2, unicode:"\x{a2}", "c"
+ say $S2
+
+ concat $S3, unicode:"\x{62}", unicode:"\x{62}"
+ say $S3
+.end
+CODE
+¢¢
+¢c
+bb
+OUTPUT
+
pir_output_is( <<'CODE', <<OUTPUT, "angstrom" );
.sub main :main
$P0 = getinterp

0 comments on commit 81f3b68

Please sign in to comment.
Something went wrong with that request. Please try again.