Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

macro_rules: Less hacky heuristic for using tt metavariable spans #119204

Merged
merged 1 commit into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 1 addition & 22 deletions compiler/rustc_ast/src/tokenstream.rs
Expand Up @@ -26,7 +26,7 @@ use rustc_span::{sym, Span, Symbol, DUMMY_SP};
use smallvec::{smallvec, SmallVec};

use std::borrow::Cow;
use std::{cmp, fmt, iter, mem};
use std::{cmp, fmt, iter};

/// When the main Rust parser encounters a syntax-extension invocation, it
/// parses the arguments to the invocation as a token tree. This is a very
Expand Down Expand Up @@ -81,14 +81,6 @@ impl TokenTree {
}
}

/// Modify the `TokenTree`'s span in-place.
pub fn set_span(&mut self, span: Span) {
match self {
TokenTree::Token(token, _) => token.span = span,
TokenTree::Delimited(dspan, ..) => *dspan = DelimSpan::from_single(span),
}
}

/// Create a `TokenTree::Token` with alone spacing.
pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), Spacing::Alone)
Expand Down Expand Up @@ -461,19 +453,6 @@ impl TokenStream {
t1.next().is_none() && t2.next().is_none()
}

/// Applies the supplied function to each `TokenTree` and its index in `self`, returning a new `TokenStream`
///
/// It is equivalent to `TokenStream::new(self.trees().cloned().enumerate().map(|(i, tt)| f(i, tt)).collect())`.
pub fn map_enumerated_owned(
mut self,
mut f: impl FnMut(usize, TokenTree) -> TokenTree,
) -> TokenStream {
let owned = Lrc::make_mut(&mut self.0); // clone if necessary
// rely on vec's in-place optimizations to avoid another allocation
*owned = mem::take(owned).into_iter().enumerate().map(|(i, tree)| f(i, tree)).collect();
self
}

/// Create a token stream containing a single token with alone spacing. The
/// spacing used for the final token in a constructed stream doesn't matter
/// because it's never used. In practice we arbitrarily use
Expand Down
35 changes: 2 additions & 33 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Expand Up @@ -10,7 +10,7 @@ use crate::mbe::transcribe::transcribe;

use rustc_ast as ast;
use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*};
use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree};
use rustc_ast::tokenstream::{DelimSpan, TokenStream};
use rustc_ast::{NodeId, DUMMY_NODE_ID};
use rustc_ast_pretty::pprust;
use rustc_attr::{self as attr, TransparencyError};
Expand Down Expand Up @@ -213,45 +213,14 @@ fn expand_macro<'cx>(
let arm_span = rhses[i].span();

// rhs has holes ( `$id` and `$(...)` that need filled)
let mut tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) {
let tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) {
Ok(tts) => tts,
Err(mut err) => {
err.emit();
return DummyResult::any(arm_span);
}
};

// Replace all the tokens for the corresponding positions in the macro, to maintain
// proper positions in error reporting, while maintaining the macro_backtrace.
if tts.len() == rhs.tts.len() {
tts = tts.map_enumerated_owned(|i, mut tt| {
let rhs_tt = &rhs.tts[i];
let ctxt = tt.span().ctxt();
match (&mut tt, rhs_tt) {
// preserve the delim spans if able
(
TokenTree::Delimited(target_sp, ..),
mbe::TokenTree::Delimited(source_sp, ..),
) => {
target_sp.open = source_sp.open.with_ctxt(ctxt);
target_sp.close = source_sp.close.with_ctxt(ctxt);
}
(
TokenTree::Delimited(target_sp, ..),
mbe::TokenTree::MetaVar(source_sp, ..),
) => {
target_sp.open = source_sp.with_ctxt(ctxt);
target_sp.close = source_sp.with_ctxt(ctxt).shrink_to_hi();
}
_ => {
let sp = rhs_tt.span().with_ctxt(ctxt);
tt.set_span(sp);
}
}
tt
});
}

if cx.trace_macros() {
let msg = format!("to `{}`", pprust::tts_to_string(&tts));
trace_macros_note(&mut cx.expansions, sp, msg);
Expand Down
64 changes: 61 additions & 3 deletions compiler/rustc_expand/src/mbe/transcribe.rs
Expand Up @@ -4,7 +4,7 @@ use crate::errors::{
NoSyntaxVarsExprRepeat, VarStillRepeating,
};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch};
use crate::mbe::{self, MetaVarExpr};
use crate::mbe::{self, KleeneOp, MetaVarExpr};
use rustc_ast::mut_visit::{self, MutVisitor};
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
Expand Down Expand Up @@ -42,6 +42,7 @@ enum Frame<'a> {
tts: &'a [mbe::TokenTree],
idx: usize,
sep: Option<Token>,
kleene_op: KleeneOp,
},
}

Expand Down Expand Up @@ -207,7 +208,7 @@ pub(super) fn transcribe<'a>(

// Is the repetition empty?
if len == 0 {
if seq.kleene.op == mbe::KleeneOp::OneOrMore {
if seq.kleene.op == KleeneOp::OneOrMore {
// FIXME: this really ought to be caught at macro definition
// time... It happens when the Kleene operator in the matcher and
// the body for the same meta-variable do not match.
Expand All @@ -227,6 +228,7 @@ pub(super) fn transcribe<'a>(
idx: 0,
sep: seq.separator.clone(),
tts: &delimited.tts,
kleene_op: seq.kleene.op,
});
}
}
Expand All @@ -243,7 +245,7 @@ pub(super) fn transcribe<'a>(
MatchedTokenTree(tt) => {
// `tt`s are emitted into the output stream directly as "raw tokens",
// without wrapping them into groups.
result.push(tt.clone());
result.push(maybe_use_metavar_location(cx, &stack, sp, tt));
}
MatchedNonterminal(nt) => {
// Other variables are emitted into the output stream as groups with
Expand Down Expand Up @@ -308,6 +310,62 @@ pub(super) fn transcribe<'a>(
}
}

/// Usually metavariables `$var` produce interpolated tokens, which have an additional place for
/// keeping both the original span and the metavariable span. For `tt` metavariables that's not the
/// case however, and there's no place for keeping a second span. So we try to give the single
/// produced span a location that would be most useful in practice (the hygiene part of the span
/// must not be changed).
///
/// Different locations are useful for different purposes:
/// - The original location is useful when we need to report a diagnostic for the original token in
/// isolation, without combining it with any surrounding tokens. This case occurs, but it is not
/// very common in practice.
/// - The metavariable location is useful when we need to somehow combine the token span with spans
/// of its surrounding tokens. This is the most common way to use token spans.
///
/// So this function replaces the original location with the metavariable location in all cases
/// except these two:
/// - The metavariable is an element of undelimited sequence `$($tt)*`.
/// These are typically used for passing larger amounts of code, and tokens in that code usually
/// combine with each other and not with tokens outside of the sequence.
/// - The metavariable span comes from a different crate, then we prefer the more local span.
///
/// FIXME: Find a way to keep both original and metavariable spans for all tokens without
/// regressing compilation time too much. Several experiments for adding such spans were made in
/// the past (PR #95580, #118517, #118671) and all showed some regressions.
fn maybe_use_metavar_location(
cx: &ExtCtxt<'_>,
stack: &[Frame<'_>],
metavar_span: Span,
orig_tt: &TokenTree,
) -> TokenTree {
let undelimited_seq = matches!(
stack.last(),
Some(Frame::Sequence {
tts: [_],
sep: None,
kleene_op: KleeneOp::ZeroOrMore | KleeneOp::OneOrMore,
..
})
);
if undelimited_seq || cx.source_map().is_imported(metavar_span) {
return orig_tt.clone();
}

match orig_tt {
TokenTree::Token(Token { kind, span }, spacing) => {
let span = metavar_span.with_ctxt(span.ctxt());
TokenTree::Token(Token { kind: kind.clone(), span }, *spacing)
}
TokenTree::Delimited(dspan, dspacing, delimiter, tts) => {
let open = metavar_span.shrink_to_lo().with_ctxt(dspan.open.ctxt());
let close = metavar_span.shrink_to_hi().with_ctxt(dspan.close.ctxt());
let dspan = DelimSpan::from_pair(open, close);
TokenTree::Delimited(dspan, *dspacing, *delimiter, tts.clone())
}
}
}

/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using
/// the set of matches `interpolations`.
///
Expand Down
8 changes: 4 additions & 4 deletions tests/coverage/macro_name_span.cov-map
@@ -1,15 +1,15 @@
Function name: macro_name_span::affected_function
Raw bytes (9): 0x[01, 01, 00, 01, 01, 06, 1b, 00, 20]
Raw bytes (9): 0x[01, 01, 00, 01, 01, 16, 1c, 02, 06]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 6, 27) to (start + 0, 32)
- Code(Counter(0)) at (prev + 22, 28) to (start + 2, 6)

Function name: macro_name_span::main
Raw bytes (9): 0x[01, 02, 00, 01, 01, 0b, 01, 02, 02]
Raw bytes (9): 0x[01, 01, 00, 01, 01, 0b, 01, 02, 02]
Number of files: 1
- file 0 => global file 2
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 11, 1) to (start + 2, 2)
Expand Down
19 changes: 3 additions & 16 deletions tests/coverage/macro_name_span.coverage
@@ -1,16 +1,3 @@
$DIR/auxiliary/macro_name_span_helper.rs:
LL| |// edition: 2021
LL| |
LL| |#[macro_export]
LL| |macro_rules! macro_that_defines_a_function {
LL| | (fn $name:ident () $body:tt) => {
LL| 1| fn $name () -> () $body
LL| | }
LL| |}
LL| |
LL| |// Non-executable comment.

$DIR/macro_name_span.rs:
LL| |// edition: 2021
LL| |
LL| |// Regression test for <https://github.com/rust-lang/rust/issues/117788>.
Expand All @@ -32,8 +19,8 @@ $DIR/macro_name_span.rs:
LL| |}
LL| |
LL| |macro_name_span_helper::macro_that_defines_a_function! {
LL| | fn affected_function() {
LL| | macro_with_an_unreasonably_and_egregiously_long_name!();
LL| | }
LL| 1| fn affected_function() {
LL| 1| macro_with_an_unreasonably_and_egregiously_long_name!();
LL| 1| }
LL| |}

4 changes: 2 additions & 2 deletions tests/ui/macros/issue-118786.stderr
Expand Up @@ -6,8 +6,8 @@ LL | macro_rules! $macro_name {
|
help: change the delimiters to curly braces
|
LL | macro_rules! {} {
| ~ +
LL | macro_rules! {$macro_name} {
| + +
help: add a semicolon
|
LL | macro_rules! $macro_name; {
Expand Down
@@ -1,10 +1,10 @@
error: missing condition for `if` expression
--> $DIR/issue-68091-unicode-ident-after-if.rs:3:14
--> $DIR/issue-68091-unicode-ident-after-if.rs:3:13
|
LL | $($c)ö* {}
| ^ - if this block is the condition of the `if` expression, then it must be followed by another block
| |
| expected condition here
| ^ - if this block is the condition of the `if` expression, then it must be followed by another block
| |
| expected condition here

error: aborting due to 1 previous error

@@ -1,8 +1,8 @@
error: macro expansion ends with an incomplete expression: expected expression
--> $DIR/issue-68092-unicode-ident-after-incomplete-expr.rs:3:14
--> $DIR/issue-68092-unicode-ident-after-incomplete-expr.rs:3:13
|
LL | $($c)ö*
| ^ expected expression
| ^ expected expression

error: aborting due to 1 previous error

2 changes: 1 addition & 1 deletion tests/ui/proc-macro/capture-macro-rules-invoke.stdout
Expand Up @@ -271,7 +271,7 @@ PRINT-BANG INPUT (DEBUG): TokenStream [
span: $DIR/capture-macro-rules-invoke.rs:47:19: 47:20 (#0),
},
],
span: $DIR/capture-macro-rules-invoke.rs:47:13: 47:22 (#0),
span: $DIR/capture-macro-rules-invoke.rs:15:60: 15:63 (#0),
},
Punct {
ch: ',',
Expand Down
4 changes: 2 additions & 2 deletions tests/ui/proc-macro/expand-expr.rs
Expand Up @@ -37,7 +37,7 @@ expand_expr_is!("hello", stringify!(hello));
expand_expr_is!("10 + 20", stringify!(10 + 20));

macro_rules! echo_tts {
($($t:tt)*) => { $($t)* }; //~ ERROR: expected expression, found `$`
($($t:tt)*) => { $($t)* };
}

macro_rules! echo_lit {
Expand Down Expand Up @@ -109,7 +109,7 @@ expand_expr_fail!("string"; hello); //~ ERROR: expected one of `.`, `?`, or an o

// Invalid expressions produce errors in addition to returning `Err(())`.
expand_expr_fail!($); //~ ERROR: expected expression, found `$`
expand_expr_fail!(echo_tts!($));
expand_expr_fail!(echo_tts!($)); //~ ERROR: expected expression, found `$`
expand_expr_fail!(echo_pm!($)); //~ ERROR: expected expression, found `$`

// We get errors reported and recover during macro expansion if the macro
Expand Down
6 changes: 3 additions & 3 deletions tests/ui/proc-macro/expand-expr.stderr
Expand Up @@ -11,10 +11,10 @@ LL | expand_expr_fail!($);
| ^ expected expression

error: expected expression, found `$`
--> $DIR/expand-expr.rs:40:23
--> $DIR/expand-expr.rs:112:29
|
LL | ($($t:tt)*) => { $($t)* };
| ^^^^ expected expression
LL | expand_expr_fail!(echo_tts!($));
| ^ expected expression

error: expected expression, found `$`
--> $DIR/expand-expr.rs:113:28
Expand Down