Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overhaul MacArgs #96546

Merged
merged 6 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub use GenericArgs::*;
pub use UnsafeSource::*;

use crate::ptr::P;
use crate::token::{self, CommentKind, Delimiter, Token};
use crate::token::{self, CommentKind, Delimiter, Token, TokenKind};
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
Expand Down Expand Up @@ -1526,7 +1526,7 @@ impl MacCall {
}

/// Arguments passed to an attribute or a function-like macro.
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
#[derive(Clone, Encodable, Decodable, Debug)]
pub enum MacArgs {
/// No arguments - `#[attr]`.
Empty,
Expand All @@ -1536,11 +1536,20 @@ pub enum MacArgs {
Eq(
/// Span of the `=` token.
Span,
/// "value" as a nonterminal token.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • c

Token,
/// The "value".
MacArgsEq,
),
}

// The RHS of a `MacArgs::Eq` starts out as an expression. Once macro expansion
// is completed, all cases end up either as a literal, which is the form used
// after lowering to HIR, or as an error.
#[derive(Clone, Encodable, Decodable, Debug)]
pub enum MacArgsEq {
Ast(P<Expr>),
Hir(Lit),
petrochenkov marked this conversation as resolved.
Show resolved Hide resolved
}

impl MacArgs {
pub fn delim(&self) -> Option<Delimiter> {
match self {
Expand All @@ -1553,7 +1562,10 @@ impl MacArgs {
match self {
MacArgs::Empty => None,
MacArgs::Delimited(dspan, ..) => Some(dspan.entire()),
MacArgs::Eq(eq_span, token) => Some(eq_span.to(token.span)),
MacArgs::Eq(eq_span, MacArgsEq::Ast(expr)) => Some(eq_span.to(expr.span)),
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
unreachable!("in literal form when getting span: {:?}", lit);
}
}
}

Expand All @@ -1563,7 +1575,23 @@ impl MacArgs {
match self {
MacArgs::Empty => TokenStream::default(),
MacArgs::Delimited(.., tokens) => tokens.clone(),
MacArgs::Eq(.., token) => TokenTree::Token(token.clone()).into(),
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => {
// Currently only literals are allowed here. If more complex expression kinds are
// allowed in the future, then `nt_to_tokenstream` should be used to extract the
// token stream. This will require some cleverness, perhaps with a function
// pointer, because `nt_to_tokenstream` is not directly usable from this crate.
// It will also require changing the `parse_expr` call in `parse_mac_args_common`
// to `parse_expr_force_collect`.
if let ExprKind::Lit(lit) = &expr.kind {
let token = Token::new(TokenKind::Literal(lit.token), lit.span);
TokenTree::Token(token).into()
} else {
unreachable!("couldn't extract literal when getting inner tokens: {:?}", expr)
}
}
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
unreachable!("in literal form when getting inner tokens: {:?}", lit)
}
}
}

Expand All @@ -1574,6 +1602,30 @@ impl MacArgs {
}
}

impl<CTX> HashStable<CTX> for MacArgs
where
CTX: crate::HashStableContext,
{
fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
mem::discriminant(self).hash_stable(ctx, hasher);
match self {
MacArgs::Empty => {}
MacArgs::Delimited(dspan, delim, tokens) => {
dspan.hash_stable(ctx, hasher);
delim.hash_stable(ctx, hasher);
tokens.hash_stable(ctx, hasher);
}
MacArgs::Eq(_eq_span, MacArgsEq::Ast(expr)) => {
unreachable!("hash_stable {:?}", expr);
}
MacArgs::Eq(eq_span, MacArgsEq::Hir(lit)) => {
eq_span.hash_stable(ctx, hasher);
lit.hash_stable(ctx, hasher);
}
}
}
}

#[derive(Copy, Clone, PartialEq, Eq, Encodable, Decodable, Debug, HashStable_Generic)]
pub enum MacDelimiter {
Parenthesis,
Expand Down
23 changes: 19 additions & 4 deletions compiler/rustc_ast/src/attr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
use crate::ast;
use crate::ast::{AttrId, AttrItem, AttrKind, AttrStyle, Attribute};
use crate::ast::{Lit, LitKind};
use crate::ast::{MacArgs, MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem};
use crate::ast::{MacArgs, MacArgsEq, MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem};
use crate::ast::{Path, PathSegment};
use crate::ptr::P;
use crate::token::{self, CommentKind, Delimiter, Token};
use crate::tokenstream::{AttrAnnotatedTokenStream, AttrAnnotatedTokenTree};
use crate::tokenstream::{DelimSpan, Spacing, TokenTree, TreeAndSpacing};
use crate::tokenstream::{LazyTokenStream, TokenStream};
use crate::util::comments;

use rustc_data_structures::thin_vec::ThinVec;
use rustc_index::bit_set::GrowableBitSet;
use rustc_span::source_map::BytePos;
use rustc_span::symbol::{sym, Ident, Symbol};
Expand Down Expand Up @@ -475,7 +477,16 @@ impl MetaItemKind {
pub fn mac_args(&self, span: Span) -> MacArgs {
match self {
MetaItemKind::Word => MacArgs::Empty,
MetaItemKind::NameValue(lit) => MacArgs::Eq(span, lit.to_token()),
MetaItemKind::NameValue(lit) => {
let expr = P(ast::Expr {
id: ast::DUMMY_NODE_ID,
kind: ast::ExprKind::Lit(lit.clone()),
span: lit.span,
attrs: ThinVec::new(),
tokens: None,
});
MacArgs::Eq(span, MacArgsEq::Ast(expr))
}
MetaItemKind::List(list) => {
let mut tts = Vec::new();
for (i, item) in list.iter().enumerate() {
Expand Down Expand Up @@ -552,12 +563,16 @@ impl MetaItemKind {

fn from_mac_args(args: &MacArgs) -> Option<MetaItemKind> {
match args {
MacArgs::Empty => Some(MetaItemKind::Word),
MacArgs::Delimited(_, MacDelimiter::Parenthesis, tokens) => {
MetaItemKind::list_from_tokens(tokens.clone())
}
MacArgs::Delimited(..) => None,
MacArgs::Eq(_, token) => Lit::from_token(token).ok().map(MetaItemKind::NameValue),
MacArgs::Empty => Some(MetaItemKind::Word),
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => match &expr.kind {
ast::ExprKind::Lit(lit) => Some(MetaItemKind::NameValue(lit.clone())),
_ => None,
},
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => Some(MetaItemKind::NameValue(lit.clone())),
}
}

Expand Down
23 changes: 7 additions & 16 deletions compiler/rustc_ast/src/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,21 +370,12 @@ pub fn visit_mac_args<T: MutVisitor>(args: &mut MacArgs, vis: &mut T) {
visit_delim_span(dspan, vis);
visit_tts(tokens, vis);
}
MacArgs::Eq(eq_span, token) => {
MacArgs::Eq(eq_span, MacArgsEq::Ast(expr)) => {
vis.visit_span(eq_span);
if T::VISIT_TOKENS {
visit_token(token, vis);
} else {
// The value in `#[key = VALUE]` must be visited as an expression for backward
// compatibility, so that macros can be expanded in that position.
match &mut token.kind {
token::Interpolated(nt) => match Lrc::make_mut(nt) {
token::NtExpr(expr) => vis.visit_expr(expr),
t => panic!("unexpected token in key-value attribute: {:?}", t),
},
t => panic!("unexpected token in key-value attribute: {:?}", t),
}
}
vis.visit_expr(expr);
}
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
unreachable!("in literal form when visiting mac args eq: {:?}", lit)
}
}
}
Expand Down Expand Up @@ -738,7 +729,7 @@ pub fn visit_token<T: MutVisitor>(t: &mut Token, vis: &mut T) {
}
token::Interpolated(nt) => {
let mut nt = Lrc::make_mut(nt);
visit_interpolated(&mut nt, vis);
visit_nonterminal(&mut nt, vis);
}
_ => {}
}
Expand Down Expand Up @@ -769,7 +760,7 @@ pub fn visit_token<T: MutVisitor>(t: &mut Token, vis: &mut T) {
// contain multiple items, but decided against it when I looked at
// `parse_item_or_view_item` and tried to figure out what I would do with
// multiple items there....
pub fn visit_interpolated<T: MutVisitor>(nt: &mut token::Nonterminal, vis: &mut T) {
pub fn visit_nonterminal<T: MutVisitor>(nt: &mut token::Nonterminal, vis: &mut T) {
match nt {
token::NtItem(item) => visit_clobber(item, |item| {
// This is probably okay, because the only visitors likely to
Expand Down
9 changes: 9 additions & 0 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,15 @@ pub enum TokenKind {
/// treat regular and interpolated lifetime identifiers in the same way.
Lifetime(Symbol),

/// An embedded AST node, as produced by a macro. This only exists for
/// historical reasons. We'd like to get rid of it, for multiple reasons.
/// - It's conceptually very strange. Saying a token can contain an AST
/// node is like saying, in natural language, that a word can contain a
/// sentence.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

proc_macro-style tokens contain "sentences" (delimited groups) in them all the time, but for flattened rustc_parse-style tokens it's unusual, I guess.
When parser librarification was still in plans, one of the alternatives was to switch rustc_parse to the proc_macro-style token model.

The main reason to get rid of Interpolated tokens from my point of view is that we want to define everything happening during macro expansion in terms of token streams, so we have to treat the AST pieces as token streams at least in some cases, moreover the token stream representation is the source of truth for them.
So these AST pieces are supposed to be a parser caching mechanism at most, which is highly non-obvious when they are represented like this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're talking about token trees, rather than tokens, right?

The main reason to get rid of Interpolated tokens from my point of view ...

That's a good reason as well :)

/// - It requires special handling in a bunch of places in the parser.
/// - It prevents `Token` from implementing `Copy`.
/// It adds complexity and likely slows things down. Please don't add new
/// occurrences of this token kind!
Interpolated(Lrc<Nonterminal>),

/// A doc comment token.
Expand Down
14 changes: 4 additions & 10 deletions compiler/rustc_ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
//! those that are created by the expansion of a macro.

use crate::ast::*;
use crate::token;

use rustc_span::symbol::{Ident, Symbol};
use rustc_span::Span;
Expand Down Expand Up @@ -937,14 +936,9 @@ pub fn walk_mac_args<'a, V: Visitor<'a>>(visitor: &mut V, args: &'a MacArgs) {
match args {
MacArgs::Empty => {}
MacArgs::Delimited(_dspan, _delim, _tokens) => {}
// The value in `#[key = VALUE]` must be visited as an expression for backward
// compatibility, so that macros can be expanded in that position.
MacArgs::Eq(_eq_span, token) => match &token.kind {
token::Interpolated(nt) => match &**nt {
token::NtExpr(expr) => visitor.visit_expr(expr),
t => panic!("unexpected token in key-value attribute: {:?}", t),
},
t => panic!("unexpected token in key-value attribute: {:?}", t),
},
MacArgs::Eq(_eq_span, MacArgsEq::Ast(expr)) => visitor.visit_expr(expr),
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
unreachable!("in literal form when walking mac args eq: {:?}", lit)
}
}
}
50 changes: 18 additions & 32 deletions compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
#![recursion_limit = "256"]
#![allow(rustc::potential_query_instability)]

use rustc_ast::token::{Delimiter, Token};
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream, TokenTree};
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream};
use rustc_ast::visit;
use rustc_ast::{self as ast, *};
use rustc_ast_pretty::pprust;
Expand Down Expand Up @@ -874,37 +873,24 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
)
}
// This is an inert key-value attribute - it will never be visible to macros
// after it gets lowered to HIR. Therefore, we can synthesize tokens with fake
// spans to handle nonterminals in `#[doc]` (e.g. `#[doc = $e]`).
MacArgs::Eq(eq_span, ref token) => {
// In valid code the value is always representable as a single literal token.
fn unwrap_single_token(sess: &Session, tokens: TokenStream, span: Span) -> Token {
if tokens.len() != 1 {
sess.diagnostic()
.delay_span_bug(span, "multiple tokens in key-value attribute's value");
}
match tokens.into_trees().next() {
Some(TokenTree::Token(token)) => token,
Some(TokenTree::Delimited(_, delim, tokens)) => {
if delim != Delimiter::Invisible {
sess.diagnostic().delay_span_bug(
span,
"unexpected delimiter in key-value attribute's value",
);
}
unwrap_single_token(sess, tokens, span)
}
None => Token::dummy(),
// after it gets lowered to HIR. Therefore, we can extract literals to handle
// nonterminals in `#[doc]` (e.g. `#[doc = $e]`).
MacArgs::Eq(eq_span, MacArgsEq::Ast(ref expr)) => {
// In valid code the value always ends up as a single literal. Otherwise, a dummy
// literal suffices because the error is handled elsewhere.
let lit = if let ExprKind::Lit(lit) = &expr.kind {
lit.clone()
} else {
Lit {
token: token::Lit::new(token::LitKind::Err, kw::Empty, None),
kind: LitKind::Err(kw::Empty),
span: DUMMY_SP,
}
}

let tokens = FlattenNonterminals {
parse_sess: &self.sess.parse_sess,
synthesize_tokens: CanSynthesizeMissingTokens::Yes,
nt_to_tokenstream: self.nt_to_tokenstream,
}
.process_token(token.clone());
MacArgs::Eq(eq_span, unwrap_single_token(self.sess, tokens, token.span))
};
MacArgs::Eq(eq_span, MacArgsEq::Hir(lit))
}
MacArgs::Eq(_, MacArgsEq::Hir(ref lit)) => {
unreachable!("in literal form when lowering mac args eq: {:?}", lit)
}
}
}
Expand Down
28 changes: 20 additions & 8 deletions compiler/rustc_ast_pretty/src/pprust/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
use rustc_ast::util::parser;
use rustc_ast::{self as ast, BlockCheckMode, PatKind, RangeEnd, RangeSyntax};
use rustc_ast::{attr, Term};
use rustc_ast::{GenericArg, MacArgs};
use rustc_ast::{GenericArg, MacArgs, MacArgsEq};
use rustc_ast::{GenericBound, SelfKind, TraitBoundModifier};
use rustc_ast::{InlineAsmOperand, InlineAsmRegOrRegClass};
use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
Expand Down Expand Up @@ -469,14 +469,22 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
true,
span,
),
MacArgs::Empty | MacArgs::Eq(..) => {
MacArgs::Empty => {
self.print_path(&item.path, false, 0);
if let MacArgs::Eq(_, token) = &item.args {
self.space();
self.word_space("=");
let token_str = self.token_to_string_ext(token, true);
self.word(token_str);
}
}
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => {
self.print_path(&item.path, false, 0);
self.space();
self.word_space("=");
let token_str = self.expr_to_string(expr);
self.word(token_str);
petrochenkov marked this conversation as resolved.
Show resolved Hide resolved
}
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
self.print_path(&item.path, false, 0);
self.space();
self.word_space("=");
let token_str = self.literal_to_string(lit);
self.word(token_str);
}
}
self.end();
Expand Down Expand Up @@ -817,6 +825,10 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
Self::to_string(|s| s.print_expr(e))
}

fn literal_to_string(&self, lit: &ast::Lit) -> String {
Self::to_string(|s| s.print_literal(lit))
}

fn tt_to_string(&self, tt: &TokenTree) -> String {
Self::to_string(|s| s.print_tt(tt, false))
}
Expand Down
13 changes: 3 additions & 10 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern};
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
use rustc_ast::{Visibility, VisibilityKind};
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacArgsEq, MacDelimiter, Mutability, StrLit};
use rustc_ast::{Unsafe, Visibility, VisibilityKind};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::PResult;
use rustc_errors::{
struct_span_err, Applicability, DiagnosticBuilder, ErrorGuaranteed, FatalError, MultiSpan,
Expand Down Expand Up @@ -1157,13 +1156,7 @@ impl<'a> Parser<'a> {
} else if !delimited_only {
if self.eat(&token::Eq) {
let eq_span = self.prev_token.span;

// Collect tokens because they are used during lowering to HIR.
let expr = self.parse_expr_force_collect()?;
let span = expr.span;

let token_kind = token::Interpolated(Lrc::new(token::NtExpr(expr)));
MacArgs::Eq(eq_span, Token::new(token_kind, span))
MacArgs::Eq(eq_span, MacArgsEq::Ast(self.parse_expr_force_collect()?))
} else {
MacArgs::Empty
}
Expand Down
Loading