diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index e93a23394c03f..db2ef7fba4b8e 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -36,6 +36,21 @@ enum PositionUsedAs { } use PositionUsedAs::*; +struct MacroInput { + fmtstr: P, + args: FormatArguments, + /// Whether the first argument was a string literal or a result from eager macro expansion. + /// If it's not a string literal, we disallow implicit arugment capturing. + /// + /// This does not correspond to whether we can treat spans to the literal normally, as the whole + /// invocation might be the result of another macro expansion, in which case this flag may still be true. + /// + /// See [RFC 2795] for more information. + /// + /// [RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene + is_direct_literal: bool, +} + /// Parses the arguments from the given list of tokens, returning the diagnostic /// if there's a parse error so we can continue parsing other format! /// expressions. @@ -45,11 +60,7 @@ use PositionUsedAs::*; /// ```text /// Ok((fmtstr, parsed arguments)) /// ``` -fn parse_args<'a>( - ecx: &mut ExtCtxt<'a>, - sp: Span, - tts: TokenStream, -) -> PResult<'a, (P, FormatArguments)> { +fn parse_args<'a>(ecx: &mut ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, MacroInput> { let mut args = FormatArguments::new(); let mut p = ecx.new_parser_from_tts(tts); @@ -59,25 +70,21 @@ fn parse_args<'a>( } let first_token = &p.token; - let fmtstr = match first_token.kind { - token::TokenKind::Literal(token::Lit { - kind: token::LitKind::Str | token::LitKind::StrRaw(_), - .. - }) => { - // If the first token is a string literal, then a format expression - // is constructed from it. - // - // This allows us to properly handle cases when the first comma - // after the format string is mistakenly replaced with any operator, - // which cause the expression parser to eat too much tokens. - p.parse_literal_maybe_minus()? - } - _ => { - // Otherwise, we fall back to the expression parser. - p.parse_expr()? - } + + let fmtstr = if let token::Literal(lit) = first_token.kind && matches!(lit.kind, token::Str | token::StrRaw(_)) { + // This allows us to properly handle cases when the first comma + // after the format string is mistakenly replaced with any operator, + // which cause the expression parser to eat too much tokens. + p.parse_literal_maybe_minus()? + } else { + // Otherwise, we fall back to the expression parser. + p.parse_expr()? }; + // Only allow implicit captures to be used when the argument is a direct literal + // instead of a macro expanding to one. + let is_direct_literal = matches!(fmtstr.kind, ExprKind::Lit(_)); + let mut first = true; while p.token != token::Eof { @@ -147,17 +154,19 @@ fn parse_args<'a>( } } } - Ok((fmtstr, args)) + Ok(MacroInput { fmtstr, args, is_direct_literal }) } -pub fn make_format_args( +fn make_format_args( ecx: &mut ExtCtxt<'_>, - efmt: P, - mut args: FormatArguments, + input: MacroInput, append_newline: bool, ) -> Result { let msg = "format argument must be a string literal"; - let unexpanded_fmt_span = efmt.span; + let unexpanded_fmt_span = input.fmtstr.span; + + let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input; + let (fmt_str, fmt_style, fmt_span) = match expr_to_spanned_string(ecx, efmt, msg) { Ok(mut fmt) if append_newline => { fmt.0 = Symbol::intern(&format!("{}\n", fmt.0)); @@ -208,11 +217,11 @@ pub fn make_format_args( } } - let is_literal = parser.is_literal; + let is_source_literal = parser.is_source_literal; if !parser.errors.is_empty() { let err = parser.errors.remove(0); - let sp = if is_literal { + let sp = if is_source_literal { fmt_span.from_inner(InnerSpan::new(err.span.start, err.span.end)) } else { // The format string could be another macro invocation, e.g.: @@ -230,7 +239,7 @@ pub fn make_format_args( if let Some(note) = err.note { e.note(¬e); } - if let Some((label, span)) = err.secondary_label && is_literal { + if let Some((label, span)) = err.secondary_label && is_source_literal { e.span_label(fmt_span.from_inner(InnerSpan::new(span.start, span.end)), label); } if err.should_be_replaced_with_positional_argument { @@ -256,7 +265,7 @@ pub fn make_format_args( } let to_span = |inner_span: rustc_parse_format::InnerSpan| { - is_literal.then(|| { + is_source_literal.then(|| { fmt_span.from_inner(InnerSpan { start: inner_span.start, end: inner_span.end }) }) }; @@ -304,7 +313,7 @@ pub fn make_format_args( // Name not found in `args`, so we add it as an implicitly captured argument. let span = span.unwrap_or(fmt_span); let ident = Ident::new(name, span); - let expr = if is_literal { + let expr = if is_direct_literal { ecx.expr_ident(span, ident) } else { // For the moment capturing variables from format strings expanded from macros is @@ -814,7 +823,7 @@ fn report_invalid_references( // for `println!("{7:7$}", 1);` indexes.sort(); indexes.dedup(); - let span: MultiSpan = if !parser.is_literal || parser.arg_places.is_empty() { + let span: MultiSpan = if !parser.is_source_literal || parser.arg_places.is_empty() { MultiSpan::from_span(fmt_span) } else { MultiSpan::from_spans(invalid_refs.iter().filter_map(|&(_, span, _, _)| span).collect()) @@ -855,8 +864,8 @@ fn expand_format_args_impl<'cx>( ) -> Box { sp = ecx.with_def_site_ctxt(sp); match parse_args(ecx, sp, tts) { - Ok((efmt, args)) => { - if let Ok(format_args) = make_format_args(ecx, efmt, args, nl) { + Ok(input) => { + if let Ok(format_args) = make_format_args(ecx, input, nl) { MacEager::expr(ecx.expr(sp, ExprKind::FormatArgs(P(format_args)))) } else { MacEager::expr(DummyResult::raw_expr(sp, true)) diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 8a3cedfee7952..1c5410c5658c6 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -14,6 +14,7 @@ // We want to be able to build this crate with a stable compiler, so no // `#![feature]` attributes should be added. +use rustc_lexer::unescape; pub use Alignment::*; pub use Count::*; pub use Piece::*; @@ -234,8 +235,10 @@ pub struct Parser<'a> { last_opening_brace: Option, /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` append_newline: bool, - /// Whether this formatting string is a literal or it comes from a macro. - pub is_literal: bool, + /// Whether this formatting string was written directly in the source. This controls whether we + /// can use spans to refer into it and give better error messages. + /// N.B: This does _not_ control whether implicit argument captures can be used. + pub is_source_literal: bool, /// Start position of the current line. cur_line_start: usize, /// Start and end byte offset of every line of the format string. Excludes @@ -262,7 +265,7 @@ impl<'a> Iterator for Parser<'a> { } else { let arg = self.argument(lbrace_end); if let Some(rbrace_pos) = self.must_consume('}') { - if self.is_literal { + if self.is_source_literal { let lbrace_byte_pos = self.to_span_index(pos); let rbrace_byte_pos = self.to_span_index(rbrace_pos); @@ -302,7 +305,7 @@ impl<'a> Iterator for Parser<'a> { _ => Some(String(self.string(pos))), } } else { - if self.is_literal { + if self.is_source_literal { let span = self.span(self.cur_line_start, self.input.len()); if self.line_spans.last() != Some(&span) { self.line_spans.push(span); @@ -322,8 +325,8 @@ impl<'a> Parser<'a> { append_newline: bool, mode: ParseMode, ) -> Parser<'a> { - let input_string_kind = find_width_map_from_snippet(snippet, style); - let (width_map, is_literal) = match input_string_kind { + let input_string_kind = find_width_map_from_snippet(s, snippet, style); + let (width_map, is_source_literal) = match input_string_kind { InputStringKind::Literal { width_mappings } => (width_mappings, true), InputStringKind::NotALiteral => (Vec::new(), false), }; @@ -339,7 +342,7 @@ impl<'a> Parser<'a> { width_map, last_opening_brace: None, append_newline, - is_literal, + is_source_literal, cur_line_start: 0, line_spans: vec![], } @@ -532,13 +535,13 @@ impl<'a> Parser<'a> { '{' | '}' => { return &self.input[start..pos]; } - '\n' if self.is_literal => { + '\n' if self.is_source_literal => { self.line_spans.push(self.span(self.cur_line_start, pos)); self.cur_line_start = pos + 1; self.cur.next(); } _ => { - if self.is_literal && pos == self.cur_line_start && c.is_whitespace() { + if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() { self.cur_line_start = pos + c.len_utf8(); } self.cur.next(); @@ -890,6 +893,7 @@ impl<'a> Parser<'a> { /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` /// in order to properly synthesise the intra-string `Span`s for error diagnostics. fn find_width_map_from_snippet( + input: &str, snippet: Option, str_style: Option, ) -> InputStringKind { @@ -902,8 +906,27 @@ fn find_width_map_from_snippet( return InputStringKind::Literal { width_mappings: Vec::new() }; } + // Strip quotes. let snippet = &snippet[1..snippet.len() - 1]; + // Macros like `println` add a newline at the end. That technically doens't make them "literals" anymore, but it's fine + // since we will never need to point our spans there, so we lie about it here by ignoring it. + // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines. + // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up. + // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up. + let input_no_nl = input.trim_end_matches('\n'); + let Some(unescaped) = unescape_string(snippet) else { + return InputStringKind::NotALiteral; + }; + + let unescaped_no_nl = unescaped.trim_end_matches('\n'); + + if unescaped_no_nl != input_no_nl { + // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect. + // This can for example happen with proc macros that respan generated literals. + return InputStringKind::NotALiteral; + } + let mut s = snippet.char_indices(); let mut width_mappings = vec![]; while let Some((pos, c)) = s.next() { @@ -986,6 +1009,19 @@ fn find_width_map_from_snippet( InputStringKind::Literal { width_mappings } } +fn unescape_string(string: &str) -> Option { + let mut buf = string::String::new(); + let mut ok = true; + unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => ok = false, + } + }); + + ok.then_some(buf) +} + // Assert a reasonable size for `Piece` #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] rustc_data_structures::static_assert_size!(Piece<'_>, 16); diff --git a/tests/ui/fmt/auxiliary/format-string-proc-macro.rs b/tests/ui/fmt/auxiliary/format-string-proc-macro.rs index 1b7ef93f41d57..0c39ade721fac 100644 --- a/tests/ui/fmt/auxiliary/format-string-proc-macro.rs +++ b/tests/ui/fmt/auxiliary/format-string-proc-macro.rs @@ -28,25 +28,41 @@ pub fn err_with_input_span(input: TokenStream) -> TokenStream { TokenStream::from(TokenTree::Literal(lit)) } +fn build_format(args: impl Into) -> TokenStream { + TokenStream::from_iter([ + TokenTree::from(Ident::new("format", Span::call_site())), + TokenTree::from(Punct::new('!', Spacing::Alone)), + TokenTree::from(Group::new(Delimiter::Parenthesis, args.into())), + ]) +} #[proc_macro] pub fn respan_to_invalid_format_literal(input: TokenStream) -> TokenStream { let mut s = Literal::string("{"); s.set_span(input.into_iter().next().unwrap().span()); - TokenStream::from_iter([ - TokenTree::from(Ident::new("format", Span::call_site())), - TokenTree::from(Punct::new('!', Spacing::Alone)), - TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())), - ]) + + build_format(TokenTree::from(s)) } #[proc_macro] pub fn capture_a_with_prepended_space_preserve_span(input: TokenStream) -> TokenStream { let mut s = Literal::string(" {a}"); s.set_span(input.into_iter().next().unwrap().span()); - TokenStream::from_iter([ - TokenTree::from(Ident::new("format", Span::call_site())), - TokenTree::from(Punct::new('!', Spacing::Alone)), - TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())), - ]) + + build_format(TokenTree::from(s)) +} + +#[proc_macro] +pub fn format_args_captures(_: TokenStream) -> TokenStream { + r#"{ let x = 5; format!("{x}") }"#.parse().unwrap() +} + +#[proc_macro] +pub fn bad_format_args_captures(_: TokenStream) -> TokenStream { + r#"{ let x = 5; format!(concat!("{x}")) }"#.parse().unwrap() +} + +#[proc_macro] +pub fn identity_pm(input: TokenStream) -> TokenStream { + input } diff --git a/tests/ui/fmt/format-args-capture-first-literal-is-macro.rs b/tests/ui/fmt/format-args-capture-first-literal-is-macro.rs new file mode 100644 index 0000000000000..bf5c0dcb54d39 --- /dev/null +++ b/tests/ui/fmt/format-args-capture-first-literal-is-macro.rs @@ -0,0 +1,21 @@ +// aux-build:format-string-proc-macro.rs + +#[macro_use] +extern crate format_string_proc_macro; + +macro_rules! identity_mbe { + ($tt:tt) => { + $tt + //~^ ERROR there is no argument named `a` + }; +} + +fn main() { + let a = 0; + + format!(identity_pm!("{a}")); + //~^ ERROR there is no argument named `a` + format!(identity_mbe!("{a}")); + format!(concat!("{a}")); + //~^ ERROR there is no argument named `a` +} diff --git a/tests/ui/fmt/format-args-capture-first-literal-is-macro.stderr b/tests/ui/fmt/format-args-capture-first-literal-is-macro.stderr new file mode 100644 index 0000000000000..4cf3afad7b8f4 --- /dev/null +++ b/tests/ui/fmt/format-args-capture-first-literal-is-macro.stderr @@ -0,0 +1,30 @@ +error: there is no argument named `a` + --> $DIR/format-args-capture-first-literal-is-macro.rs:16:26 + | +LL | format!(identity_pm!("{a}")); + | ^^^^^ + | + = note: did you intend to capture a variable `a` from the surrounding scope? + = note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro + +error: there is no argument named `a` + --> $DIR/format-args-capture-first-literal-is-macro.rs:8:9 + | +LL | $tt + | ^^^ + | + = note: did you intend to capture a variable `a` from the surrounding scope? + = note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro + +error: there is no argument named `a` + --> $DIR/format-args-capture-first-literal-is-macro.rs:19:13 + | +LL | format!(concat!("{a}")); + | ^^^^^^^^^^^^^^ + | + = note: did you intend to capture a variable `a` from the surrounding scope? + = note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro + = note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 3 previous errors + diff --git a/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.rs b/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.rs new file mode 100644 index 0000000000000..f67edf5e16721 --- /dev/null +++ b/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.rs @@ -0,0 +1,8 @@ +// aux-build:format-string-proc-macro.rs + +extern crate format_string_proc_macro; + +fn main() { + format_string_proc_macro::bad_format_args_captures!(); + //~^ ERROR there is no argument named `x` +} diff --git a/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.stderr b/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.stderr new file mode 100644 index 0000000000000..bb6a14d88b3d6 --- /dev/null +++ b/tests/ui/fmt/format-args-capture-from-pm-first-arg-macro.stderr @@ -0,0 +1,12 @@ +error: there is no argument named `x` + --> $DIR/format-args-capture-from-pm-first-arg-macro.rs:6:5 + | +LL | format_string_proc_macro::bad_format_args_captures!(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: did you intend to capture a variable `x` from the surrounding scope? + = note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro + = note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to previous error + diff --git a/tests/ui/fmt/format-args-capture-issue-106408.rs b/tests/ui/fmt/format-args-capture-issue-106408.rs new file mode 100644 index 0000000000000..0fd195416ee5b --- /dev/null +++ b/tests/ui/fmt/format-args-capture-issue-106408.rs @@ -0,0 +1,10 @@ +// check-pass +// aux-build:format-string-proc-macro.rs + +extern crate format_string_proc_macro; + +fn main() { + // While literal macros like `format_args!(concat!())` are not supposed to work with implicit + // captures, it should work if the whole invocation comes from a macro expansion (#106408). + format_string_proc_macro::format_args_captures!(); +} diff --git a/tests/ui/fmt/format-args-capture-macro-hygiene-pass.rs b/tests/ui/fmt/format-args-capture-macro-hygiene-pass.rs new file mode 100644 index 0000000000000..7553fcc4e01cb --- /dev/null +++ b/tests/ui/fmt/format-args-capture-macro-hygiene-pass.rs @@ -0,0 +1,16 @@ +// run-pass + +macro_rules! format_mbe { + ($tt:tt) => { + { + #[allow(unused_variables)] + let a = 123; + format!($tt) + } + }; +} + +fn main() { + let a = 0; + assert_eq!(format_mbe!("{a}"), "0"); +} diff --git a/tests/ui/fmt/respanned-literal-issue-106191.rs b/tests/ui/fmt/respanned-literal-issue-106191.rs index 5a18983a3fa70..44642a10fc076 100644 --- a/tests/ui/fmt/respanned-literal-issue-106191.rs +++ b/tests/ui/fmt/respanned-literal-issue-106191.rs @@ -1,15 +1,10 @@ // aux-build:format-string-proc-macro.rs -// check-fail -// known-bug: #106191 -// unset-rustc-env:RUST_BACKTRACE -// had to be reverted -// error-pattern:unexpectedly panicked -// failure-status:101 -// dont-check-compiler-stderr extern crate format_string_proc_macro; fn main() { format_string_proc_macro::respan_to_invalid_format_literal!("¡"); + //~^ ERROR invalid format string: expected `'}'` but string was terminated format_args!(r#concat!("¡ {")); + //~^ ERROR invalid format string: expected `'}'` but string was terminated } diff --git a/tests/ui/fmt/respanned-literal-issue-106191.stderr b/tests/ui/fmt/respanned-literal-issue-106191.stderr index 16717f42253d6..73a3af65a3849 100644 --- a/tests/ui/fmt/respanned-literal-issue-106191.stderr +++ b/tests/ui/fmt/respanned-literal-issue-106191.stderr @@ -1,2 +1,19 @@ - query stack during panic: -end of query stack +error: invalid format string: expected `'}'` but string was terminated + --> $DIR/respanned-literal-issue-106191.rs:6:65 + | +LL | format_string_proc_macro::respan_to_invalid_format_literal!("¡"); + | ^^^ expected `'}'` in format string + | + = note: if you intended to print `{`, you can escape it using `{{` + +error: invalid format string: expected `'}'` but string was terminated + --> $DIR/respanned-literal-issue-106191.rs:8:18 + | +LL | format_args!(r#concat!("¡ {")); + | ^^^^^^^^^^^^^^^^^^^^^^^ expected `'}'` in format string + | + = note: if you intended to print `{`, you can escape it using `{{` + = note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 2 previous errors +