Skip to content

Commit

Permalink
Merge pull request #2787 from ruby/implicit-locals
Browse files Browse the repository at this point in the history
Add error for invalid implicit local writes
  • Loading branch information
kddnewton committed May 7, 2024
2 parents f1f9afd + 31f0201 commit f00ea7d
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 42 deletions.
1 change: 1 addition & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ errors:
- PATTERN_EXPRESSION_AFTER_REST
- PATTERN_HASH_KEY
- PATTERN_HASH_KEY_DUPLICATE
- PATTERN_HASH_KEY_INTERPOLATED
- PATTERN_HASH_KEY_LABEL
- PATTERN_HASH_KEY_LOCALS
- PATTERN_IDENT_AFTER_HROCKET
Expand Down
81 changes: 40 additions & 41 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
* it's important that it be as fast as possible.
*/
static inline size_t
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
if (parser->encoding_changed) {
size_t width;
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
Expand Down Expand Up @@ -16125,21 +16125,52 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
}

/**
* Check that the slice of the source given by the bounds parameters constitutes
* a valid local variable name.
*/
static bool
pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
ptrdiff_t length = end - start;
if (length == 0) return false;

// First ensure that it starts with a valid identifier starting character.
size_t width = char_is_identifier_start(parser, start);
if (width == 0) return false;

// Next, ensure that it's not an uppercase character.
if (parser->encoding_changed) {
if (parser->encoding->isupper_char(start, length)) return false;
} else {
if (pm_encoding_utf_8_isupper_char(start, length)) return false;
}

// Next, iterate through all of the bytes of the string to ensure that they
// are all valid identifier characters.
const uint8_t *cursor = start + width;
while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
return cursor == end;
}

/**
* Create an implicit node for the value of a hash pattern that has omitted the
* value. This will use an implicit local variable target.
*/
static pm_node_t *
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);

pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
int depth = -1;
if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
} else {

if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
depth = pm_parser_local_depth_constant_id(parser, constant_id);
} else {
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);

if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
}
}

if (depth == -1) {
Expand Down Expand Up @@ -16209,7 +16240,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
// If we get anything else, then this is an error. For this we'll
// create a missing node for the value and create an assoc node for
// the first node in the list.
pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
pm_parser_err_node(parser, first_node, diag_id);

pm_token_t operator = not_provided(parser);
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
Expand Down Expand Up @@ -19762,39 +19794,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
}
}

/**
* Returns true if the name of the capture group is a valid local variable that
* can be written to.
*/
static bool
parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
if (length == 0) {
return false;
}

// First ensure that it starts with a valid identifier starting character.
size_t width = char_is_identifier_start(parser, source);
if (!width) {
return false;
}

// Next, ensure that it's not an uppercase character.
if (parser->encoding_changed) {
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
} else {
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
}

// Next, iterate through all of the bytes of the string to ensure that they
// are all valid identifier characters.
const uint8_t *cursor = source + width;
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
cursor += width;
}

return cursor == source + length;
}

/**
* Potentially change a =~ with a regular expression with named captures into a
* match write node.
Expand All @@ -19821,7 +19820,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *

// If the name of the capture group isn't a valid identifier, we do
// not add it to the local table.
if (!parse_regular_expression_named_capture(parser, source, length)) continue;
if (!pm_slice_is_valid_local(parser, source, source + length)) continue;

if (content->type == PM_STRING_SHARED) {
// If the unescaped string is a slice of the source, then we can
Expand Down
3 changes: 2 additions & 1 deletion templates/src/diagnostic.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_PATTERN_EXPRESSION_AFTER_REST] = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_HASH_KEY] = { "expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_HASH_KEY_DUPLICATE] = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_HASH_KEY_LABEL] = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX }, // TODO // THIS // AND // ABOVE // IS WEIRD
[PM_ERR_PATTERN_HASH_KEY_INTERPOLATED] = { "symbol literal with interpolation is not allowed", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_HASH_KEY_LABEL] = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_HASH_KEY_LOCALS] = { "key must be valid as local variables", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_LABEL_AFTER_COMMA] = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
Expand Down

0 comments on commit f00ea7d

Please sign in to comment.