Skip to content

Commit

Permalink
[prism] Handle string and xstring encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton committed Dec 6, 2023
1 parent 82f18ba commit 153c09f
Showing 1 changed file with 32 additions and 9 deletions.
41 changes: 32 additions & 9 deletions prism_compile.c
Expand Up @@ -182,14 +182,34 @@ parse_imaginary(pm_imaginary_node_t *node)
static inline VALUE
parse_string(pm_string_t *string, const pm_parser_t *parser)
{
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), enc);
}

/**
* Certain strings can have their encoding differ from the parser's encoding due
* to bytes or escape sequences that have the top bit set. This function handles
* creating those strings based on the flags set on the owning node.
*/
static inline VALUE
parse_string_encoded(const pm_node_t *node, const pm_string_t *string, const pm_parser_t *parser) {
rb_encoding *encoding;

if (node->flags & PM_ENCODING_FLAGS_FORCED_BINARY_ENCODING) {
encoding = rb_ascii8bit_encoding();
} else if (node->flags & PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING) {
encoding = rb_utf8_encoding();
} else {
encoding = rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
}

return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding);
}

static inline ID
parse_symbol(const uint8_t *start, const uint8_t *end, pm_parser_t *parser)
{
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
return rb_intern3((const char *) start, end - start, enc);
}

Expand Down Expand Up @@ -278,7 +298,7 @@ pm_reg_enc(const pm_regular_expression_node_t *node, const pm_parser_t *parser)
return rb_utf8_encoding();
}

return rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
return rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
}

/**
Expand Down Expand Up @@ -362,7 +382,7 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node, pm_p
return pm_new_regex(cast, parser);
}
case PM_SOURCE_ENCODING_NODE: {
rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(scope_node->parser->encoding.name));
rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(scope_node->parser->encoding->name));
if (!encoding) rb_bug("Encoding not found!");
return rb_enc_from_encoding(encoding);
}
Expand Down Expand Up @@ -4431,8 +4451,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
}
case PM_STRING_NODE: {
if (!popped) {
pm_string_node_t *string_node = (pm_string_node_t *) node;
ADD_INSN1(ret, &dummy_line_node, putstring, parse_string(&string_node->unescaped, parser));
pm_string_node_t *cast = (pm_string_node_t *) node;
VALUE value = parse_string_encoded(node, &cast->unescaped, parser);
ADD_INSN1(ret, &dummy_line_node, putstring, value);
}
return;
}
Expand Down Expand Up @@ -4553,9 +4574,11 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
return;
}
case PM_X_STRING_NODE: {
pm_x_string_node_t *xstring_node = (pm_x_string_node_t *) node;
pm_x_string_node_t *cast = (pm_x_string_node_t *) node;
VALUE value = parse_string_encoded(node, &cast->unescaped, parser);

PM_PUTSELF;
ADD_INSN1(ret, &dummy_line_node, putobject, parse_string(&xstring_node->unescaped, parser));
ADD_INSN1(ret, &dummy_line_node, putobject, value);
ADD_SEND_WITH_FLAG(ret, &dummy_line_node, idBackquote, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE));

PM_POP_IF_POPPED;
Expand Down Expand Up @@ -4599,7 +4622,7 @@ rb_translate_prism(pm_parser_t *parser, rb_iseq_t *iseq, pm_scope_node_t *scope_
RUBY_ASSERT(ISEQ_COMPILE_DATA(iseq));

ID *constants = calloc(parser->constant_pool.size, sizeof(ID));
rb_encoding *encoding = rb_enc_find(parser->encoding.name);
rb_encoding *encoding = rb_enc_find(parser->encoding->name);
for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
pm_constant_t *constant = &parser->constant_pool.constants[index];
constants[index] = rb_intern3((const char *) constant->start, constant->length, encoding);
Expand Down

0 comments on commit 153c09f

Please sign in to comment.