Skip to content

Commit

Permalink
rb_str_buf_append: fastpath to str_buf_cat
Browse files Browse the repository at this point in the history
If the LHS is ASCII compatible and the RHS is 7BIT
we can directly concat without being concerned about
anything else.

Benchmark:
```
compare-ruby: ruby 3.2.0dev (2022-07-12T15:01:11Z master 71aec68) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-13T10:13:53Z faster-buffer-conc.. a04c104) [arm64-darwin21]
warming up...

|                      |compare-ruby|built-ruby|
|:---------------------|-----------:|---------:|
|binary_append_utf8    |    385.315k|  573.663k|
|                      |           -|     1.49x|
|binary_append_binary  |    446.579k|  574.898k|
|                      |           -|     1.29x|
|utf8_append_utf8      |    430.936k|  573.394k|
|                      |           -|     1.33x|
```

Note that in the benchmark, the RHS always have a precomputed
coderange. So the benchmark never enter the slowpath of having to
scan the RHS. However it's extremly likely that we'll end
up scanning it anyway in rb_enc_cr_str_buf_cat
  • Loading branch information
byroot committed Jul 19, 2022
1 parent ee1d2b2 commit 0ae8dbb
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 5 deletions.
25 changes: 23 additions & 2 deletions benchmark/string_concat.yml
@@ -1,8 +1,29 @@
prelude: |
CHUNK = "a" * 64
BCHUNK = "a".b * 64
benchmark:
string_concat: |
buffer = String.new(capacity: 4096)
binary_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
binary_concat_binary: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
utf8_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
Expand Down
22 changes: 19 additions & 3 deletions string.c
Expand Up @@ -3303,12 +3303,28 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr)
}
}

static inline bool
str_enc_fastpath(VALUE str)
{
// The overwhelming majority of strings are in one of these 3 encodings.
switch (ENCODING_GET_INLINED(str)) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
return true;
default:
return false;
}
}

VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr;

str2_cr = ENC_CODERANGE(str2);
int str2_cr = rb_enc_str_coderange(str2);
if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2));
return str;
}

rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
ENCODING_GET(str2), str2_cr, &str2_cr);
Expand Down

0 comments on commit 0ae8dbb

Please sign in to comment.