Skip to content

Commit

Permalink
Fix rb_interned_str_* functions to not assume static strings
Browse files Browse the repository at this point in the history
Fixes [Feature #13381]

When passed a `fake_str`, `register_fstring` would create new strings
with `str_new_static`. That's not what was expected, and answer
almost no use cases.
  • Loading branch information
byroot committed Nov 18, 2020
1 parent dc3a65b commit 350cd2e
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 11 deletions.
14 changes: 14 additions & 0 deletions ext/-test-/string/rb_interned_str.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "ruby.h"

static VALUE
bug_rb_interned_str_dup(VALUE self, VALUE str)
{
rb_check_type(str, T_STRING);
return rb_interned_str(RSTRING_PTR(str), RSTRING_LEN(str));
}

void
Init_string_rb_interned_str(VALUE klass)
{
rb_define_singleton_method(klass, "rb_interned_str_dup", bug_rb_interned_str_dup, 1);
}
69 changes: 58 additions & 11 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ static VALUE str_new_shared(VALUE klass, VALUE str);
static VALUE str_new_frozen(VALUE klass, VALUE orig);
static VALUE str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding);
static VALUE str_new_static(VALUE klass, const char *ptr, long len, int encindex);
static VALUE str_new(VALUE klass, const char *ptr, long len);
static void str_make_independent_expand(VALUE str, long len, long expand, const int termlen);
static inline void str_modifiable(VALUE str);
static VALUE rb_str_downcase(int argc, VALUE *argv, VALUE str);
Expand Down Expand Up @@ -271,7 +272,7 @@ mustnot_wchar(VALUE str)

static int fstring_cmp(VALUE a, VALUE b);

static VALUE register_fstring(VALUE str);
static VALUE register_fstring(VALUE str, bool copy);

const struct st_hash_type rb_fstring_hash_type = {
fstring_cmp,
Expand Down Expand Up @@ -324,6 +325,50 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existi
}
}

static int
fstr_copy_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
{
VALUE *fstr = (VALUE *)arg;
VALUE str = (VALUE)*key;

if (existing) {
/* because of lazy sweep, str may be unmarked already and swept
* at next time */

if (rb_objspace_garbage_object_p(str)) {
*fstr = Qundef;
return ST_DELETE;
}

*fstr = str;
return ST_STOP;
}
else {
if (FL_TEST_RAW(str, STR_FAKESTR)) {
VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->as.heap.len);
rb_enc_copy(new_str, str);
str = new_str;
OBJ_FREEZE_RAW(str);
}
else {
if (!OBJ_FROZEN(str))
str = str_new_frozen(rb_cString, str);
if (STR_SHARED_P(str)) { /* str should not be shared */
/* shared substring */
str_make_independent(str);
assert(OBJ_FROZEN(str));
}
if (!BARE_STRING_P(str)) {
str = str_new_frozen(rb_cString, str);
}
}
RBASIC(str)->flags |= RSTRING_FSTR;

*key = *value = *fstr = str;
return ST_CONTINUE;
}
}

RUBY_FUNC_EXPORTED
VALUE
rb_fstring(VALUE str)
Expand Down Expand Up @@ -351,7 +396,7 @@ rb_fstring(VALUE str)
if (!OBJ_FROZEN(str))
rb_str_resize(str, RSTRING_LEN(str));

fstr = register_fstring(str);
fstr = register_fstring(str, FALSE);

if (!bare) {
str_replace_shared_without_enc(str, fstr);
Expand All @@ -362,18 +407,18 @@ rb_fstring(VALUE str)
}

static VALUE
register_fstring(VALUE str)
register_fstring(VALUE str, bool copy)
{
VALUE ret;

RB_VM_LOCK_ENTER();
{
st_table *frozen_strings = rb_vm_fstring_table();

st_update_callback_func *update_callback = copy ? fstr_copy_update_callback : fstr_update_callback;
do {
ret = str;
st_update(frozen_strings, (st_data_t)str,
fstr_update_callback, (st_data_t)&ret);
st_update(frozen_strings, (st_data_t)str, update_callback, (st_data_t)&ret);
} while (ret == Qundef);
}
RB_VM_LOCK_LEAVE();
Expand Down Expand Up @@ -418,14 +463,14 @@ MJIT_FUNC_EXPORTED VALUE
rb_fstring_new(const char *ptr, long len)
{
struct RString fake_str;
return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII));
return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), FALSE);
}

VALUE
rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc)
{
struct RString fake_str;
return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc));
return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), FALSE);
}

VALUE
Expand Down Expand Up @@ -11411,25 +11456,27 @@ rb_str_to_interned_str(VALUE str)
VALUE
rb_interned_str(const char *ptr, long len)
{
return rb_fstring_new(ptr, len);
struct RString fake_str;
return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), TRUE);
}

VALUE
rb_interned_str_cstr(const char *ptr)
{
return rb_fstring_cstr(ptr);
return rb_interned_str(ptr, strlen(ptr));
}

VALUE
rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
{
return rb_fstring_enc_new(ptr, len, enc);
struct RString fake_str;
return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), TRUE);
}

VALUE
rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
{
return rb_fstring_enc_new(ptr, strlen(ptr), enc);
return rb_enc_interned_str(ptr, strlen(ptr), enc);
}

/*
Expand Down
12 changes: 12 additions & 0 deletions test/-ext-/string/test_interned_str.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
require 'test/unit'
require '-test-/string'

class Test_RbInternedStr < Test::Unit::TestCase
def test_interned_str
src = "a" * 20
interned_str = Bug::String.rb_interned_str_dup(src)
src.clear
src << "b" * 20
assert_equal "a" * 20, interned_str
end
end

0 comments on commit 350cd2e

Please sign in to comment.