Skip to content

Commit

Permalink
Auto merge of #15746 - pvalletbo:string-literals-diagnose, r=Veykril
Browse files Browse the repository at this point in the history
String literals diagnose

Continues the work from #15744 to add diagnosis errors to Str, ByteStr, and CStr literal kinds.

Also replaces `unescape_char` for `unescape_byte` to use the correct method for Byte literals.
  • Loading branch information
bors committed Oct 16, 2023
2 parents 2910dbf + 6845c80 commit d6afb4f
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 21 deletions.
41 changes: 40 additions & 1 deletion crates/parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ impl<'a> Converter<'a> {
let text = &self.res.text[self.offset + 2..][..len - 2];
let i = text.rfind('\'').unwrap();
let text = &text[..i];
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
err = error_to_diagnostic_message(e, Mode::Byte);
}
}
Expand All @@ -284,18 +284,33 @@ impl<'a> Converter<'a> {
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
} else {
let text = &self.res.text[self.offset + 1..][..len - 1];
let i = text.rfind('"').unwrap();
let text = &text[..i];
err = unescape_string_error_message(text, Mode::Str);
}
STRING
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the byte string literal";
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
let i = text.rfind('"').unwrap();
let text = &text[..i];
err = unescape_string_error_message(text, Mode::ByteStr);
}
BYTE_STRING
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
let i = text.rfind('"').unwrap();
let text = &text[..i];
err = unescape_string_error_message(text, Mode::CStr);
}
C_STRING
}
Expand Down Expand Up @@ -360,3 +375,27 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
EscapeError::MultipleSkippedLinesWarning => "",
}
}

fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
let mut error_message = "";
match mode {
Mode::CStr => {
rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
if let Err(e) = res {
error_message = error_to_diagnostic_message(e, mode);
}
});
}
Mode::ByteStr | Mode::Str => {
rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
if let Err(e) = res {
error_message = error_to_diagnostic_message(e, mode);
}
});
}
_ => {
// Other Modes are not supported yet or do not apply
}
}
error_message
}
28 changes: 11 additions & 17 deletions crates/parser/test_data/lexer/err/byte_char_literals.rast
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ BYTE "b'\\'a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\0a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
BYTE "b'\\u{0}x'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\v'" error: unknown byte escape
WHITESPACE "\n"
Expand All @@ -50,12 +50,6 @@ BYTE "b'\\x🦀'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xtt'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xff'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\xFF'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\x80'" error: out of range hex escape
WHITESPACE "\n"
BYTE "b'\\u'" error: incorrect unicode escape sequence
WHITESPACE "\n"
BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
Expand All @@ -72,21 +66,21 @@ BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
WHITESPACE "\n"
BYTE "b'\\u{0000000}'" error: overlong unicode escape
WHITESPACE "\n"
BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{DC00}'" error: invalid unicode character escape
BYTE "b'\\u{DC00}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
BYTE "b'\\u{DDDD}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
BYTE "b'\\u{DFFF}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{D800}'" error: invalid unicode character escape
BYTE "b'\\u{D800}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
BYTE "b'\\u{DAAA}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
BYTE "b'\\u{DBFF}'" error: unicode escape in byte string
WHITESPACE "\n"
3 changes: 0 additions & 3 deletions crates/parser/test_data/lexer/err/byte_char_literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ b'\xx'
b'\xы'
b'\x🦀'
b'\xtt'
b'\xff'
b'\xFF'
b'\x80'
b'\u'
b'\u[0123]'
b'\u{0x}'
Expand Down
28 changes: 28 additions & 0 deletions crates/parser/test_data/lexer/err/byte_strings.rast
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
BYTE_STRING "b\"\\💩\"" error: unknown byte escape
WHITESPACE "\n"
BYTE_STRING "b\"\\●\"" error: unknown byte escape
WHITESPACE "\n"
BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
WHITESPACE "\n"
BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
WHITESPACE "\n"
BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
WHITESPACE "\n"
BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape
WHITESPACE "\n"
14 changes: 14 additions & 0 deletions crates/parser/test_data/lexer/err/byte_strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
b"\💩"
b"\●"
b"\u{_0000}"
b"\u{0000000}"
b"\u{FFFFFF}"
b"\u{ffffff}"
b"\u{ffffff}"
b"\u{DC00}"
b"\u{DDDD}"
b"\u{DFFF}"
b"\u{D800}"
b"\u{DAAA}"
b"\u{DBFF}"
b"\xы"
28 changes: 28 additions & 0 deletions crates/parser/test_data/lexer/err/c_strings.rast
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
C_STRING "c\"\\💩\"" error: unknown character escape
WHITESPACE "\n"
C_STRING "c\"\\●\"" error: unknown character escape
WHITESPACE "\n"
C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
WHITESPACE "\n"
C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
WHITESPACE "\n"
C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
C_STRING "c\"\\xы\"" error: invalid character in numeric character escape
WHITESPACE "\n"
14 changes: 14 additions & 0 deletions crates/parser/test_data/lexer/err/c_strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
c"\💩"
c"\●"
c"\u{_0000}"
c"\u{0000000}"
c"\u{FFFFFF}"
c"\u{ffffff}"
c"\u{ffffff}"
c"\u{DC00}"
c"\u{DDDD}"
c"\u{DFFF}"
c"\u{D800}"
c"\u{DAAA}"
c"\u{DBFF}"
c"\xы"
28 changes: 28 additions & 0 deletions crates/parser/test_data/lexer/err/strings.rast
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
STRING "\"\\💩\"" error: unknown character escape
WHITESPACE "\n"
STRING "\"\\●\"" error: unknown character escape
WHITESPACE "\n"
STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
WHITESPACE "\n"
STRING "\"\\u{0000000}\"" error: overlong unicode escape
WHITESPACE "\n"
STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{DC00}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{D800}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
WHITESPACE "\n"
STRING "\"\\xы\"" error: invalid character in numeric character escape
WHITESPACE "\n"
14 changes: 14 additions & 0 deletions crates/parser/test_data/lexer/err/strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"\💩"
"\●"
"\u{_0000}"
"\u{0000000}"
"\u{FFFFFF}"
"\u{ffffff}"
"\u{ffffff}"
"\u{DC00}"
"\u{DDDD}"
"\u{DFFF}"
"\u{D800}"
"\u{DAAA}"
"\u{DBFF}"
"\xы"

0 comments on commit d6afb4f

Please sign in to comment.