Skip to content

Commit

Permalink
add diagnosis messages for chars and byte literal errors
Browse files Browse the repository at this point in the history
  • Loading branch information
pvalletbo committed Oct 11, 2023
1 parent 8a23314 commit 1fe6ac8
Show file tree
Hide file tree
Showing 9 changed files with 337 additions and 12 deletions.
55 changes: 55 additions & 0 deletions crates/parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
//! include info about comments and whitespace.

use rustc_dependencies::lexer as rustc_lexer;

use std::ops;

use rustc_lexer::unescape::{Mode, EscapeError};

use crate::{
SyntaxKind::{self, *},
T,
Expand Down Expand Up @@ -254,13 +257,28 @@ impl<'a> Converter<'a> {
rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated {
err = "Missing trailing `'` symbol to terminate the character literal";
} else {
let text = &self.res.text[self.offset + 1..][..len - 1];
let i = text.rfind('\'').unwrap();
let text = &text[..i];
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
err = error_to_diagnostic_message(e, Mode::Char);
}
}
CHAR
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
err = "Missing trailing `'` symbol to terminate the byte literal";
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
let i = text.rfind('\'').unwrap();
let text = &text[..i];
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
err = error_to_diagnostic_message(e, Mode::Byte);
}
}

BYTE
}
rustc_lexer::LiteralKind::Str { terminated } => {
Expand Down Expand Up @@ -305,3 +323,40 @@ impl<'a> Converter<'a> {
self.push(syntax_kind, len, err);
}
}

fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
match error {
EscapeError::ZeroChars => "empty character literal",
EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
EscapeError::LoneSlash => "",
EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
"unknown byte escape"
}
EscapeError::InvalidEscape => "unknown character escape",
EscapeError::BareCarriageReturn => "",
EscapeError::BareCarriageReturnInRawString => "",
EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
EscapeError::EscapeOnlyChar => "character constant must be escaped",
EscapeError::TooShortHexEscape => "numeric character escape is too short",
EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
EscapeError::OutOfRangeHexEscape => "out of range hex escape",
EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
EscapeError::EmptyUnicodeEscape => "empty unicode escape",
EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
"non-ASCII character in byte literal"
}
EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
"non-ASCII character in byte string literal"
}
EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
EscapeError::UnskippedWhitespaceWarning => "",
EscapeError::MultipleSkippedLinesWarning => "",
}
}
92 changes: 92 additions & 0 deletions crates/parser/test_data/lexer/err/byte_char_literals.rast
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
BYTE "b''" error: empty character literal
WHITESPACE "\n"
BYTE "b'\\'" error: Missing trailing `'` symbol to terminate the byte literal
WHITESPACE "\n"
BYTE "b'\n'" error: byte constant must be escaped
WHITESPACE "\n"
BYTE "b'spam'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\x0ff'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\\"a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\na'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\ra'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\ta'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\\\a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\'a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\0a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\v'" error: unknown byte escape
WHITESPACE "\n"
BYTE "b'\\💩'" error: unknown byte escape
WHITESPACE "\n"
BYTE "b'\\●'" error: unknown byte escape
WHITESPACE "\n"
BYTE "b'\\\\\\r'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\x'" error: numeric character escape is too short
WHITESPACE "\n"
BYTE "b'\\x0'" error: numeric character escape is too short
WHITESPACE "\n"
BYTE "b'\\xf'" error: numeric character escape is too short
WHITESPACE "\n"
BYTE "b'\\xa'" error: numeric character escape is too short
WHITESPACE "\n"
BYTE "b'\\xx'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xы'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\x🦀'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xtt'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xff'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\xFF'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\x80'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\u'" error: incorrect unicode escape sequence
WHITESPACE "\n"
BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
WHITESPACE "\n"
BYTE "b'\\u{0x}'" error: invalid character in unicode escape
WHITESPACE "\n"
BYTE "b'\\u{'" error: unterminated unicode escape
WHITESPACE "\n"
BYTE "b'\\u{0000'" error: unterminated unicode escape
WHITESPACE "\n"
BYTE "b'\\u{}'" error: empty unicode escape
WHITESPACE "\n"
BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
WHITESPACE "\n"
BYTE "b'\\u{0000000}'" error: overlong unicode escape
WHITESPACE "\n"
BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{DC00}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{D800}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
WHITESPACE "\n"
BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
WHITESPACE "\n"
47 changes: 47 additions & 0 deletions crates/parser/test_data/lexer/err/byte_char_literals.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
b''
b'\'
b'
'
b'spam'
b'\x0ff'
b'\"a'
b'\na'
b'\ra'
b'\ta'
b'\\a'
b'\'a'
b'\0a'
b'\u{0}x'
b'\u{1F63b}}'
b'\v'
b'\💩'
b'\●'
b'\\\r'
b'\x'
b'\x0'
b'\xf'
b'\xa'
b'\xx'
b'\xы'
b'\x🦀'
b'\xtt'
b'\xff'
b'\xFF'
b'\x80'
b'\u'
b'\u[0123]'
b'\u{0x}'
b'\u{'
b'\u{0000'
b'\u{}'
b'\u{_0000}'
b'\u{0000000}'
b'\u{FFFFFF}'
b'\u{ffffff}'
b'\u{ffffff}'
b'\u{DC00}'
b'\u{DDDD}'
b'\u{DFFF}'
b'\u{D800}'
b'\u{DAAA}'
b'\u{DBFF}'
92 changes: 92 additions & 0 deletions crates/parser/test_data/lexer/err/char_literals.rast
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
CHAR "'hello'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "''" error: empty character literal
WHITESPACE "\n"
CHAR "'\n'" error: character constant must be escaped
WHITESPACE "\n"
CHAR "'spam'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\x0ff'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\\"a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\na'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\ra'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\ta'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\\\a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\'a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\0a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\u{0}x'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\u{1F63b}}'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\v'" error: unknown character escape
WHITESPACE "\n"
CHAR "'\\💩'" error: unknown character escape
WHITESPACE "\n"
CHAR "'\\●'" error: unknown character escape
WHITESPACE "\n"
CHAR "'\\\\\\r'" error: character literal may only contain one codepoint
WHITESPACE "\n"
CHAR "'\\x'" error: numeric character escape is too short
WHITESPACE "\n"
CHAR "'\\x0'" error: numeric character escape is too short
WHITESPACE "\n"
CHAR "'\\xf'" error: numeric character escape is too short
WHITESPACE "\n"
CHAR "'\\xa'" error: numeric character escape is too short
WHITESPACE "\n"
CHAR "'\\xx'" error: invalid character in numeric character escape
WHITESPACE "\n"
CHAR "'\\xы'" error: invalid character in numeric character escape
WHITESPACE "\n"
CHAR "'\\x🦀'" error: invalid character in numeric character escape
WHITESPACE "\n"
CHAR "'\\xtt'" error: invalid character in numeric character escape
WHITESPACE "\n"
CHAR "'\\xff'" error: out of range hex escape
WHITESPACE "\n"
CHAR "'\\xFF'" error: out of range hex escape
WHITESPACE "\n"
CHAR "'\\x80'" error: out of range hex escape
WHITESPACE "\n"
CHAR "'\\u'" error: incorrect unicode escape sequence
WHITESPACE "\n"
CHAR "'\\u[0123]'" error: incorrect unicode escape sequence
WHITESPACE "\n"
CHAR "'\\u{0x}'" error: invalid character in unicode escape
WHITESPACE "\n"
CHAR "'\\u{'" error: unterminated unicode escape
WHITESPACE "\n"
CHAR "'\\u{0000'" error: unterminated unicode escape
WHITESPACE "\n"
CHAR "'\\u{}'" error: empty unicode escape
WHITESPACE "\n"
CHAR "'\\u{_0000}'" error: invalid start of unicode escape
WHITESPACE "\n"
CHAR "'\\u{0000000}'" error: overlong unicode escape
WHITESPACE "\n"
CHAR "'\\u{FFFFFF}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{DC00}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{DDDD}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{DFFF}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{D800}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{DAAA}'" error: invalid unicode character escape
WHITESPACE "\n"
CHAR "'\\u{DBFF}'" error: invalid unicode character escape
WHITESPACE "\n"
47 changes: 47 additions & 0 deletions crates/parser/test_data/lexer/err/char_literals.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'hello'
''
'
'
'spam'
'\x0ff'
'\"a'
'\na'
'\ra'
'\ta'
'\\a'
'\'a'
'\0a'
'\u{0}x'
'\u{1F63b}}'
'\v'
'\💩'
'\●'
'\\\r'
'\x'
'\x0'
'\xf'
'\xa'
'\xx'
'\xы'
'\x🦀'
'\xtt'
'\xff'
'\xFF'
'\x80'
'\u'
'\u[0123]'
'\u{0x}'
'\u{'
'\u{0000'
'\u{}'
'\u{_0000}'
'\u{0000000}'
'\u{FFFFFF}'
'\u{ffffff}'
'\u{ffffff}'
'\u{DC00}'
'\u{DDDD}'
'\u{DFFF}'
'\u{D800}'
'\u{DAAA}'
'\u{DBFF}'
6 changes: 0 additions & 6 deletions crates/parser/test_data/lexer/ok/byte_strings.rast
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
BYTE "b''"
WHITESPACE " "
BYTE "b'x'"
WHITESPACE " "
BYTE_STRING "b\"foo\""
WHITESPACE " "
BYTE_STRING "br\"\""
WHITESPACE "\n"
BYTE "b''suf"
WHITESPACE " "
BYTE_STRING "b\"\"ix"
WHITESPACE " "
BYTE_STRING "br\"\"br"
Expand All @@ -17,6 +13,4 @@ WHITESPACE " "
BYTE "b'\\\\'"
WHITESPACE " "
BYTE "b'\\''"
WHITESPACE " "
BYTE "b'hello'"
WHITESPACE "\n"
6 changes: 3 additions & 3 deletions crates/parser/test_data/lexer/ok/byte_strings.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
b'' b'x' b"foo" br""
b''suf b""ix br""br
b'\n' b'\\' b'\'' b'hello'
b'x' b"foo" br""
b""ix br""br
b'\n' b'\\' b'\''

0 comments on commit 1fe6ac8

Please sign in to comment.