From 486331d1a845c50d92836271b0682ff739058a0f Mon Sep 17 00:00:00 2001 From: Whiteknight Date: Mon, 9 Apr 2012 19:34:33 -0400 Subject: [PATCH] Implement StringBuilder.push_integer vtable, so we can assemble strings by pushing raw codepoints instead of needing to first create a 1-character string (chr_s_i) and then pushing that string to the StringBuilder --- src/pmc/stringbuilder.pmc | 75 +++++++++++++++++++++++++++++--------- src/string/encoding/utf8.c | 4 +- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/pmc/stringbuilder.pmc b/src/pmc/stringbuilder.pmc index 3878c5f55b..dbf927cabc 100644 --- a/src/pmc/stringbuilder.pmc +++ b/src/pmc/stringbuilder.pmc @@ -27,7 +27,15 @@ PARROT_WARN_UNUSED_RESULT PARROT_CONST_FUNCTION static size_t calculate_capacity(PARROT_INTERP, size_t needed); +static void convert_encoding(PARROT_INTERP, + STR_VTABLE * dest_encoding, + STRING * buffer, + size_t size_to_add) + __attribute__nonnull__(1); + #define ASSERT_ARGS_calculate_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (0) +#define ASSERT_ARGS_convert_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ + PARROT_ASSERT_ARG(interp)) /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ /* HEADERIZER END: static */ @@ -183,24 +191,8 @@ Append string to current buffer. if (s->encoding != Parrot_utf8_encoding_ptr) s = Parrot_utf8_encoding_ptr->to_encoding(interp, s); - if (buffer->encoding != Parrot_utf8_encoding_ptr) { - /* Create new temporary string */ - STRING * new_buffer; - - new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer); - total_size = new_buffer->bufused + s->bufused; - - if (total_size > buffer->_buflen) { - /* Reallocate */ - total_size = calculate_capacity(INTERP, total_size); - Parrot_gc_reallocate_string_storage(INTERP, buffer, total_size); - } - buffer->bufused = new_buffer->bufused; - buffer->encoding = new_buffer->encoding; - - memcpy(buffer->strstart, new_buffer->strstart, - new_buffer->bufused); - } + if (buffer->encoding != Parrot_utf8_encoding_ptr) + convert_encoding(INTERP, Parrot_utf8_encoding_ptr, buffer, s->bufused); } } @@ -228,6 +220,32 @@ Append string to current buffer. PARROT_ASSERT(buffer->bufused <= Buffer_buflen(buffer)); } + VTABLE void push_integer(INTVAL value) { + STRING * s = PARROT_STRINGBUILDER(SELF)->buffer; + String_iter iter; + size_t total_size; + + if (s->encoding != Parrot_utf8_encoding_ptr && value > 0x7F) { + if (s->strlen == 0) + s->encoding = Parrot_utf8_encoding_ptr; + else + convert_encoding(INTERP, Parrot_utf8_encoding_ptr, s, sizeof (INTVAL)); + } + + total_size = s->bufused + sizeof (INTVAL); + if (total_size > s->_buflen) { + total_size = calculate_capacity(INTERP, total_size); + Parrot_gc_reallocate_string_storage(INTERP, s, total_size); + } + + STRING_ITER_INIT(INTERP, &iter); + iter.charpos = s->strlen; + iter.bytepos = s->bufused; + STRING_iter_set_and_advance(INTERP, s, &iter, value); + s->strlen = iter.charpos; + s->bufused = iter.bytepos; + } + /* =item C @@ -484,6 +502,27 @@ calculate_capacity(SHIM_INTERP, size_t needed) return needed; } +static void +convert_encoding(PARROT_INTERP, STR_VTABLE * dest_encoding, STRING * buffer, size_t size_to_add) +{ + STRING * new_buffer; + size_t total_size; + + new_buffer = dest_encoding->to_encoding(interp, buffer); + total_size = new_buffer->bufused + size_to_add; + + if (total_size > buffer->_buflen) { + /* Reallocate */ + total_size = calculate_capacity(interp, total_size); + Parrot_gc_reallocate_string_storage(interp, buffer, total_size); + } + buffer->bufused = new_buffer->bufused; + buffer->encoding = new_buffer->encoding; + + memcpy(buffer->strstart, new_buffer->strstart, + new_buffer->bufused); +} + /* * Local variables: * c-file-style: "parrot" diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 2e52a2cfd4..9ae7e30f12 100644 --- a/src/string/encoding/utf8.c +++ b/src/string/encoding/utf8.c @@ -577,7 +577,9 @@ utf8_iter_set_and_advance(PARROT_INTERP, i->charpos += 1; i->bytepos += end - ptr; - PARROT_ASSERT(i->bytepos <= str->bufused); + /* Commenting this out allows us to iterate and assemble strings in empty + (but pre-allocated) buffers. */ + /* PARROT_ASSERT(i->bytepos <= str->bufused); */ }