Skip to content

Commit

Permalink
Reserve guarded string literal syntax (RFC 3593)
Browse files Browse the repository at this point in the history
The syntax change applies to all editions, because the
particular syntax `#"foo"#` is unlikely to exist in the wild.
  • Loading branch information
pitaj committed May 2, 2024
1 parent f92d49b commit b64c7f4
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 9 deletions.
1 change: 1 addition & 0 deletions compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::str::Chars;
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
#[derive(Clone)]
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
Expand Down
92 changes: 84 additions & 8 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pub mod unescape;
#[cfg(test)]
mod tests;

use std::num::NonZeroU8;

pub use crate::cursor::Cursor;

use self::LiteralKind::*;
Expand Down Expand Up @@ -179,24 +181,27 @@ pub enum DocStyle {
/// `rustc_ast::ast::LitKind`).
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99", "1f32".
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
/// "12.34f32", "1e3", but not "1f32".
/// `12.34f32`, `1e3`, but not `1f32`.
Float { base: Base, empty_exponent: bool },
/// "'a'", "'\\'", "'''", "';"
/// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
/// "b'a'", "b'\\'", "b'''", "b';"
/// `b'a'`, `b'\\'`, `b'''`, `b';`
Byte { terminated: bool },
/// ""abc"", ""abc"
/// `"abc"`, `"abc`
Str { terminated: bool },
/// "b"abc"", "b"abc"
/// `b"abc"`, `b"abc`
ByteStr { terminated: bool },
/// `c"abc"`, `c"abc`
CStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
/// `#"abc"#`, `#"a`, `##"a"#`. `None` indicates no closing quote.
/// Allows fewer hashes to close the string to support older editions.
GuardedStr { n_start_hashes: Option<NonZeroU8>, n_end_hashes: u8 },
/// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option<u8> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
/// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option<u8> },
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
Expand Down Expand Up @@ -365,6 +370,49 @@ impl Cursor<'_> {
_ => self.ident_or_unknown_prefix(),
},

// Guarded string literal (reserved syntax).
'#' if matches!(self.first(), '"' | '#') => {
// Create a backup to restore later if this
// turns out to not be a guarded literal.
let backup = self.clone();

let mut n_start_hashes: u32 = 1; // Already captured one `#`.
while self.first() == '#' {
n_start_hashes += 1;
self.bump();
}

if self.first() == '"' {
self.bump();

let res = self.guarded_double_quoted_string(n_start_hashes);
let suffix_start = self.pos_within_token();

if let (Ok(n_end_hashes), Ok(n)) = (res, u8::try_from(n_start_hashes)) {
self.eat_literal_suffix();

Literal {
kind: GuardedStr {
n_start_hashes: NonZeroU8::new(n),
// Always succeeds because `n_end_hashes <= n`
n_end_hashes: n_end_hashes.try_into().unwrap(),
},
suffix_start,
}
} else {
Literal {
kind: GuardedStr { n_start_hashes: None, n_end_hashes: 0 },
suffix_start,
}
}
} else {
// Not a guarded string, so restore old state.
*self = backup;
// Return a pound token.
Pound
}
}

// Byte literal, byte string literal, raw byte string literal or identifier.
'b' => self.c_or_byte_string(
|terminated| ByteStr { terminated },
Expand Down Expand Up @@ -758,6 +806,34 @@ impl Cursor<'_> {
false
}

/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result<u32, RawStrError> {
debug_assert!(self.prev() == '"');

// Lex the string itself as a normal string literal
// so we can recover that for older editions later.
if !self.double_quoted_string() {
return Err(RawStrError::NoTerminator {
expected: n_start_hashes,
found: 0,
possible_terminator_offset: None,
});
}

// Consume closing '#' symbols.
// Note that this will not consume extra trailing `#` characters:
// `###"abcde"####` is lexed as a `GuardedStr { n_hashes: 3 }`
// followed by a `#` token.
let mut n_end_hashes = 0;
while self.first() == '#' && n_end_hashes < n_start_hashes {
n_end_hashes += 1;
self.bump();
}

// Handle `n_end_hashes < n_start_hashes` later.
Ok(n_end_hashes)
}

/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
// Wrap the actual function to handle the error with too many hashes.
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
.label = the label
.suggestion = add `:` after the label
parse_reserved_guarded_string = invalid string literal
.note = unprefixed guarded string literals are reserved for future use
.suggestion_whitespace = consider inserting whitespace here
parse_return_types_use_thin_arrow = return types are denoted using `->`
.suggestion = use `->` instead
Expand Down
18 changes: 18 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,24 @@ pub enum UnknownPrefixSugg {
},
}

#[derive(Diagnostic)]
#[diag(parse_reserved_guarded_string)]
#[note]
pub struct ReservedGuardedString {
#[primary_span]
pub span: Span,
#[subdiagnostic]
pub sugg: Option<GuardedStringSugg>,
}
#[derive(Subdiagnostic)]
#[suggestion(
parse_suggestion_whitespace,
code = " ",
applicability = "maybe-incorrect",
style = "verbose"
)]
pub struct GuardedStringSugg(#[primary_span] pub Span);

#[derive(Diagnostic)]
#[diag(parse_too_many_hashes)]
pub struct TooManyHashes {
Expand Down
24 changes: 24 additions & 0 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,30 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.report_raw_str_error(start, 1);
}
}
// RFC 3598 reserved this syntax for future use.
rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, n_end_hashes } => {
let span = self.mk_sp(start, self.pos);

if let Some(n_start_hashes) = n_start_hashes {
let n = u32::from(n_start_hashes.get());
let e = u32::from(n_end_hashes);
let expn_data = span.ctxt().outer_expn_data();

let space_pos = start + BytePos(n);
let space_span = self.mk_sp(space_pos, space_pos);

let sugg = if expn_data.is_root() {
Some(errors::GuardedStringSugg(space_span))
} else {
None
};

self.dcx().emit_err(errors::ReservedGuardedString { span, sugg });
self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + e) // ##" "##
} else {
self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None });
}
}
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
Expand Down
3 changes: 2 additions & 1 deletion src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,8 @@ impl<'src> Classifier<'src> {
| LiteralKind::RawStr { .. }
| LiteralKind::RawByteStr { .. }
| LiteralKind::CStr { .. }
| LiteralKind::RawCStr { .. } => Class::String,
| LiteralKind::RawCStr { .. }
| LiteralKind::GuardedStr { .. } => Class::String,
// Number literals.
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
},
Expand Down
4 changes: 4 additions & 0 deletions src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,10 @@ impl<'a> Converter<'a> {
}
C_STRING
}
rustc_lexer::LiteralKind::GuardedStr { .. } => {
err = "Invalid string literal";
STRING
}
};

let err = if err.is_empty() { None } else { Some(err) };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ impl server::FreeFunctions for RaSpanServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
LiteralKind::GuardedStr { .. } => return Err(()),
};

let (lit, suffix) = s.split_at(suffix_start as usize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ impl server::FreeFunctions for TokenIdServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
LiteralKind::GuardedStr { .. } => return Err(()),
};

let (lit, suffix) = s.split_at(suffix_start as usize);
Expand Down
60 changes: 60 additions & 0 deletions tests/ui/lexer/reserved-guarded-strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//@ compile-flags: -Zunstable-options
//@ edition:2024

macro_rules! demo1 {
( $a:tt ) => { println!("one tokens") };
}

macro_rules! demo2 {
( $a:tt $b:tt ) => { println!("two tokens") };
}

macro_rules! demo3 {
( $a:tt $b:tt $c:tt ) => { println!("three tokens") };
}

macro_rules! demo4 {
( $a:tt $b:tt $c:tt $d:tt ) => { println!("four tokens") };
}

macro_rules! demo5 {
( $a:tt $b:tt $c:tt $d:tt $e:tt ) => { println!("five tokens") };
}

macro_rules! demo6 {
( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt ) => { println!("six tokens") };
}

macro_rules! demo7 {
( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt $g:tt ) => { println!("seven tokens") };
}

fn main() {
demo1!("");
demo2!(# "");
demo3!(# ""#);
demo2!(# "foo");
demo3!(## "foo");
demo3!(# "foo"#);
demo4!(### "foo");
demo4!(## "foo"#);
demo7!(### "foo"###);

demo2!("foo"#);
demo4!("foo"###);

demo2!(blah"xx"); //~ ERROR prefix `blah` is unknown
demo2!(blah#"xx"#);
//~^ ERROR prefix `blah` is unknown
//~| ERROR invalid string literal

demo1!(#""); //~ ERROR invalid string literal
demo1!(#""#); //~ ERROR invalid string literal
demo1!(####""); //~ ERROR invalid string literal
demo1!(#"foo"); //~ ERROR invalid string literal
demo1!(###"foo"); //~ ERROR invalid string literal
demo1!(#"foo"#); //~ ERROR invalid string literal
demo1!(###"foo"#); //~ ERROR invalid string literal
demo1!(###"foo"##); //~ ERROR invalid string literal
demo1!(###"foo"###); //~ ERROR invalid string literal
}

0 comments on commit b64c7f4

Please sign in to comment.