diff --git a/prism/config.yml b/prism/config.yml index bc321cf0691807..b6d329ea7e17d3 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -247,6 +247,7 @@ warnings: - KEYWORD_EOL - LITERAL_IN_CONDITION_DEFAULT - LITERAL_IN_CONDITION_VERBOSE + - UNEXPECTED_CARRIAGE_RETURN tokens: - name: EOF value: 1 diff --git a/prism/prism.c b/prism/prism.c index 38e55106a8357b..84f48d36460b8c 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9291,6 +9291,7 @@ parser_lex(pm_parser_t *parser) { if (match_eol_offset(parser, 1)) { chomping = false; } else { + pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); parser->current.end++; space_seen = true; } @@ -10348,16 +10349,43 @@ parser_lex(pm_parser_t *parser) { // other options. We'll skip past it and return the next // token after adding an appropriate error message. if (!width) { - pm_diagnostic_id_t diag_id; if (*parser->current.start >= 0x80) { - diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER; - } else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) { - diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); + } else if (*parser->current.start == '\\') { + switch (peek_at(parser, parser->current.start + 1)) { + case ' ': + parser->current.end++; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); + break; + case '\f': + parser->current.end++; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); + break; + case '\t': + parser->current.end++; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); + break; + case '\v': + parser->current.end++; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); + break; + case '\r': + if (peek_at(parser, parser->current.start + 2) != '\n') { + parser->current.end++; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); + break; + } + /* fallthrough */ + default: + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); + break; + } + } else if (char_is_ascii_printable(*parser->current.start)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); } else { - diag_id = PM_ERR_INVALID_CHARACTER; + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); } - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start); goto lex_next_token; } diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index be07432577d17a..f031dbbda38466 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -326,7 +326,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_WARN_INVALID_CHARACTER] = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_LITERAL_IN_CONDITION_DEFAULT] = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT }, - [PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE } + [PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE }, + [PM_WARN_UNEXPECTED_CARRIAGE_RETURN] = { "encountered \\r in middle of line, treated as a mere space", PM_WARNING_LEVEL_DEFAULT } }; /**