Skip to content

Commit

Permalink
Unify length field for embedded and heap strings (#7908)
Browse files Browse the repository at this point in the history
* Unify length field for embedded and heap strings

The length field is of the same type and position in RString for both
embedded and heap allocated strings, so we can unify it.

* Remove RSTRING_EMBED_LEN
  • Loading branch information
peterzhu2118 committed Jun 6, 2023
1 parent fae2f80 commit 7577c10
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 119 deletions.
5 changes: 2 additions & 3 deletions ext/-test-/string/cstr.c
Expand Up @@ -61,13 +61,12 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen)
if (RSTRING_LEN(str) < beg) rb_raise(rb_eIndexError, "beg: %ld", beg);
if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len);
str = rb_str_new_shared(str);
RSTRING(str)->len = len;
if (STR_EMBED_P(str)) {
RSTRING(str)->as.embed.len = (short)len;
memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len);
}
else {
RSTRING(str)->as.heap.ptr += beg;
RSTRING(str)->as.heap.len = len;
}
return str;
}
Expand Down Expand Up @@ -114,7 +113,7 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str)
RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6);
RSTRING(str2)->as.heap.aux.capa = capacity;
RSTRING(str2)->as.heap.ptr = buf;
RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
RSTRING(str2)->len = RSTRING_LEN(str);
TERM_FILL(RSTRING_END(str2), TERM_LEN(str));
return str2;
}
Expand Down
56 changes: 13 additions & 43 deletions include/ruby/internal/core/rstring.h
Expand Up @@ -43,7 +43,6 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
#define RSTRING_FSTR RSTRING_FSTR
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
#define RSTRING_LENINT RSTRING_LENINT
#define RSTRING_PTR RSTRING_PTR
Expand Down Expand Up @@ -199,6 +198,13 @@ struct RString {
/** Basic part, including flags and class. */
struct RBasic basic;

/**
* Length of the string, not including terminating NUL character.
*
* @note This is in bytes.
*/
long len;

/** String's specific fields. */
union {

Expand All @@ -207,14 +213,6 @@ struct RString {
* pattern.
*/
struct {

/**
* Length of the string, not including terminating NUL character.
*
* @note This is in bytes.
*/
long len;

/**
* Pointer to the contents of the string. In the old days each
* string had dedicated memory regions. That is no longer true
Expand Down Expand Up @@ -245,7 +243,6 @@ struct RString {

/** Embedded contents. */
struct {
long len;
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
Expand Down Expand Up @@ -364,24 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL()
*
* @param[in] str String in question.
* @return Its length, in bytes.
* @pre `str` must be an instance of ::RString, and must has its
* ::RSTRING_NOEMBED flag off.
*
* @internal
*
* This was a macro before. It was inevitable to be public, since macros are
* global constructs. But should it be forever? Now that it is a function,
* @shyouhei thinks it could just be eliminated, hidden into implementation
* details.
* @pre `str` must be an instance of ::RString.
*/
static inline long
RSTRING_EMBED_LEN(VALUE str)
RSTRING_LEN(VALUE str)
{
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));

long f = RSTRING(str)->as.embed.len;
return f;
return RSTRING(str)->len;
}

RBIMPL_WARNING_PUSH()
Expand Down Expand Up @@ -411,29 +396,14 @@ rbimpl_rstring_getmem(VALUE str)
else {
/* Expecting compilers to optimize this on-stack struct away. */
struct RString retval;
retval.as.heap.len = RSTRING_EMBED_LEN(str);
retval.len = RSTRING_LEN(str);
retval.as.heap.ptr = RSTRING(str)->as.embed.ary;
return retval;
}
}

RBIMPL_WARNING_POP()

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the length of the string.
*
* @param[in] str String in question.
* @return Its length, in bytes.
* @pre `str` must be an instance of ::RString.
*/
static inline long
RSTRING_LEN(VALUE str)
{
return rbimpl_rstring_getmem(str).as.heap.len;
}

RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the contents pointer of the string.
Expand Down Expand Up @@ -482,7 +452,7 @@ RSTRING_END(VALUE str)
rb_debug_rstring_null_ptr("RSTRING_END");
}

return &buf.as.heap.ptr[buf.as.heap.len];
return &buf.as.heap.ptr[buf.len];
}

RBIMPL_ATTR_ARTIFICIAL()
Expand Down Expand Up @@ -516,7 +486,7 @@ RSTRING_LENINT(VALUE str)
__extension__ ({ \
struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
(ptrvar) = rbimpl_str.as.heap.ptr; \
(lenvar) = rbimpl_str.as.heap.len; \
(lenvar) = rbimpl_str.len; \
})
#else
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
Expand Down
5 changes: 1 addition & 4 deletions lib/ruby_vm/rjit/insn_compiler.rb
Expand Up @@ -2994,15 +2994,12 @@ def jit_rb_int_aref(jit, ctx, asm, argc, _known_recv_class)
# @param ctx [RubyVM::RJIT::Context]
# @param asm [RubyVM::RJIT::Assembler]
def jit_rb_str_empty_p(jit, ctx, asm, argc, known_recv_class)
# Assume same offset to len embedded or not so we can use one code path to read the length
#assert_equal(C.RString.offsetof(:as, :heap, :len), C.RString.offsetof(:as, :embed, :len))

recv_opnd = ctx.stack_pop(1)
out_opnd = ctx.stack_push(Type::UnknownImm)

asm.comment('get string length')
asm.mov(:rax, recv_opnd)
str_len_opnd = [:rax, C.RString.offsetof(:as, :heap, :len)]
str_len_opnd = [:rax, C.RString.offsetof(:len)]

asm.cmp(str_len_opnd, 0)
asm.mov(:rax, Qfalse)
Expand Down
3 changes: 1 addition & 2 deletions rjit_c.rb
Expand Up @@ -857,11 +857,11 @@ def C.RString
@RString ||= CType::Struct.new(
"RString", Primitive.cexpr!("SIZEOF(struct RString)"),
basic: [self.RBasic, Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), basic)")],
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), len)")],
as: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as)"),
heap: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap)"),
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, len)")],
ptr: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, ptr)")],
aux: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap.aux)"),
Expand All @@ -871,7 +871,6 @@ def C.RString
),
embed: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.embed)"),
len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, len)")],
ary: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, ary)")],
),
), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), as)")],
Expand Down

0 comments on commit 7577c10

Please sign in to comment.