Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hasten macro parsing #68848

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 14 additions & 34 deletions src/librustc_expand/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,19 @@ use crate::mbe::{self, TokenTree};

use rustc_ast_pretty::pprust;
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
use rustc_parse::Directory;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Symbol};
use syntax::ast::{Ident, Name};
use syntax::ptr::P;
use syntax::token::{self, DocComment, Nonterminal, Token};
use syntax::tokenstream::TokenStream;

use rustc_errors::{FatalError, PResult};
use rustc_span::Span;
use smallvec::{smallvec, SmallVec};

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::mem;
use std::ops::{Deref, DerefMut};
Expand Down Expand Up @@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
Success(())
}

/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
/// against it and return the match.
///
/// # Parameters
///
/// - `sess`: The session into which errors are emitted
/// - `tts`: The tokenstream we are matching against the pattern `ms`
/// - `ms`: A sequence of token trees representing a pattern against which we are matching
/// - `directory`: Information about the file locations (needed for the black-box parser)
/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
/// parser)
pub(super) fn parse(
sess: &ParseSess,
tts: TokenStream,
ms: &[TokenTree],
directory: Option<Directory<'_>>,
recurse_into_modules: bool,
) -> NamedParseResult {
// Create a parser that can be used for the "black box" parts.
let mut parser =
Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);

/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
/// stream from the given `parser` against it and return the match.
pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in which
// the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
// processes all of these possible matcher positions and produces possible next positions into
Expand All @@ -659,7 +639,7 @@ pub(super) fn parse(
// parsing from the black-box parser done. The result is that `next_items` will contain a
// bunch of possible next matcher positions in `next_items`.
match inner_parse_loop(
sess,
parser.sess,
&mut cur_items,
&mut next_items,
&mut eof_items,
Expand All @@ -684,7 +664,7 @@ pub(super) fn parse(
if eof_items.len() == 1 {
let matches =
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
return nameize(sess, ms, matches);
return nameize(parser.sess, ms, matches);
} else if eof_items.len() > 1 {
return Error(
parser.token.span,
Expand All @@ -709,9 +689,14 @@ pub(super) fn parse(
// unnecessary implicit clone later in Rc::make_mut.
drop(eof_items);

// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
if bb_items.is_empty() && next_items.is_empty() {
return Failure(parser.token.clone(), "no rules expected this token in macro call");
}
// Another possibility is that we need to call out to parse some rust nonterminal
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
let nts = bb_items
.iter()
.map(|item| match item.top_elts.get_tt(item.idx) {
Expand All @@ -733,16 +718,11 @@ pub(super) fn parse(
),
);
}
// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
else if bb_items.is_empty() && next_items.is_empty() {
return Failure(parser.token.take(), "no rules expected this token in macro call");
}
// Dump all possible `next_items` into `cur_items` for the next iteration.
else if !next_items.is_empty() {
// Now process the next token
cur_items.extend(next_items.drain(..));
parser.bump();
parser.to_mut().bump();
}
// Finally, we have the case where we need to call the black-box parser to get some
// nonterminal.
Expand All @@ -754,7 +734,7 @@ pub(super) fn parse(
let match_cur = item.match_cur;
item.push_match(
match_cur,
MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
);
item.idx += 1;
item.match_cur += 1;
Expand Down
68 changes: 48 additions & 20 deletions src/librustc_expand/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{SyntaxExtension, SyntaxExtensionKind};
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::parse;
use crate::mbe::macro_parser::parse_tt;
use crate::mbe::macro_parser::{Error, Failure, Success};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::transcribe::transcribe;

use rustc_ast_pretty::pprust;
Expand Down Expand Up @@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
}
}

fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
cx.expansions.entry(sp).or_default().push(message);
cx_expansions.entry(sp).or_default().push(message);
}

/// Given `lhses` and `rhses`, this is the new macro we create
Expand All @@ -184,11 +184,33 @@ fn generic_extension<'cx>(
) -> Box<dyn MacResult + 'cx> {
if cx.trace_macros() {
let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}

// Which arm's failure should we report? (the one furthest along)
let mut best_failure: Option<(Token, &str)> = None;

// We create a base parser that can be used for the "black box" parts.
// Every iteration needs a fresh copy of that parser. However, the parser
// is not mutated on many of the iterations, particularly when dealing with
// macros like this:
//
// macro_rules! foo {
// ("a") => (A);
// ("b") => (B);
// ("c") => (C);
// // ... etc. (maybe hundreds more)
// }
//
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
// parser is only cloned when necessary (upon mutation). Furthermore, we
// reinitialize the `Cow` with the base parser at the start of every
// iteration, so that any mutated parsers are not reused. This is all quite
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
// 68836 suggests a more comprehensive but more complex change to deal with
// this situation.)
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());

for (i, lhs) in lhses.iter().enumerate() {
// try each arm's matchers
let lhs_tt = match *lhs {
Expand All @@ -200,13 +222,14 @@ fn generic_extension<'cx>(
// This is used so that if a matcher is not `Success(..)`ful,
// then the spans which became gated when parsing the unsuccessful matcher
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());
let mut gated_spans_snapshot =
mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());

match parse_tt(cx, lhs_tt, arg.clone()) {
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
cx.parse_sess.gated_spans.merge(gated_spans_snaphot);
cx.parse_sess.gated_spans.merge(gated_spans_snapshot);

let rhs = match rhses[i] {
// ignore delimiters
Expand All @@ -232,11 +255,11 @@ fn generic_extension<'cx>(

if cx.trace_macros() {
let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}

let directory = Directory {
path: Cow::from(cx.current_expansion.module.directory.as_path()),
path: cx.current_expansion.module.directory.clone(),
ownership: cx.current_expansion.directory_ownership,
};
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None);
Expand Down Expand Up @@ -267,8 +290,9 @@ fn generic_extension<'cx>(

// The matcher was not `Success(..)`ful.
// Restore to the state before snapshotting and maybe try again.
mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
mem::swap(&mut gated_spans_snapshot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
}
drop(parser);

let (token, label) = best_failure.expect("ran no matchers");
let span = token.span.substitute_dummy(sp);
Expand All @@ -286,7 +310,8 @@ fn generic_extension<'cx>(
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
_ => continue,
};
match parse_tt(cx, lhs_tt, arg.clone()) {
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
Success(_) => {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
Expand Down Expand Up @@ -368,7 +393,8 @@ pub fn compile_declarative_macro(
),
];

let argument_map = match parse(sess, body, &argument_gram, None, true) {
let parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
Success(m) => m,
Failure(token, msg) => {
let s = parse_failure_msg(&token);
Expand Down Expand Up @@ -1184,14 +1210,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
}
}

/// Use this token tree as a matcher to parse given tts.
fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
// `None` is because we're not interpolating
fn parser_from_cx<'cx>(
current_expansion: &'cx ExpansionData,
sess: &'cx ParseSess,
tts: TokenStream,
) -> Parser<'cx> {
let directory = Directory {
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
path: current_expansion.module.directory.clone(),
ownership: current_expansion.directory_ownership,
};
parse(cx.parse_sess(), tts, mtch, Some(directory), true)
Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
}

/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
Expand Down
9 changes: 4 additions & 5 deletions src/librustc_parse/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ use syntax::ast;
use syntax::token::{self, Nonterminal};
use syntax::tokenstream::{self, TokenStream, TokenTree};

use std::borrow::Cow;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::str;

use log::info;
Expand All @@ -29,8 +28,8 @@ pub mod validate_attr;
pub mod config;

#[derive(Clone)]
pub struct Directory<'a> {
pub path: Cow<'a, Path>,
pub struct Directory {
pub path: PathBuf,
pub ownership: DirectoryOwnership,
}

Expand Down Expand Up @@ -274,7 +273,7 @@ pub fn stream_to_parser<'a>(
pub fn stream_to_parser_with_base_dir<'a>(
sess: &'a ParseSess,
stream: TokenStream,
base_dir: Directory<'a>,
base_dir: Directory,
) -> Parser<'a> {
Parser::new(sess, stream, Some(base_dir), true, false, None)
}
Expand Down
9 changes: 4 additions & 5 deletions src/librustc_parse/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use syntax::token::{self, DelimToken, Token, TokenKind};
use syntax::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint};
use syntax::util::comments::{doc_comment_style, strip_doc_comment_decoration};

use std::borrow::Cow;
use std::path::PathBuf;
use std::{cmp, mem, slice};

Expand Down Expand Up @@ -114,7 +113,7 @@ pub struct Parser<'a> {
prev_token_kind: PrevTokenKind,
restrictions: Restrictions,
/// Used to determine the path to externally loaded source files.
pub(super) directory: Directory<'a>,
pub(super) directory: Directory,
/// `true` to parse sub-modules in other files.
// Public for rustfmt usage.
pub recurse_into_file_modules: bool,
Expand Down Expand Up @@ -376,7 +375,7 @@ impl<'a> Parser<'a> {
pub fn new(
sess: &'a ParseSess,
tokens: TokenStream,
directory: Option<Directory<'a>>,
directory: Option<Directory>,
recurse_into_file_modules: bool,
desugar_doc_comments: bool,
subparser_name: Option<&'static str>,
Expand All @@ -390,7 +389,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
recurse_into_file_modules,
directory: Directory {
path: Cow::from(PathBuf::new()),
path: PathBuf::new(),
ownership: DirectoryOwnership::Owned { relative: None },
},
root_module_name: None,
Expand Down Expand Up @@ -418,7 +417,7 @@ impl<'a> Parser<'a> {
&sess.source_map().lookup_char_pos(parser.token.span.lo()).file.unmapped_path
{
if let Some(directory_path) = path.parent() {
parser.directory.path = Cow::from(directory_path.to_path_buf());
parser.directory.path = directory_path.to_path_buf();
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/librustc_parse/parser/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ impl<'a> Parser<'a> {

fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) {
self.directory.path.to_mut().push(&*path.as_str());
self.directory.path.push(&*path.as_str());
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
} else {
// We have to push on the current module name in the case of relative
Expand All @@ -297,10 +297,10 @@ impl<'a> Parser<'a> {
if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership {
if let Some(ident) = relative.take() {
// remove the relative offset
self.directory.path.to_mut().push(&*ident.as_str());
self.directory.path.push(&*ident.as_str());
}
}
self.directory.path.to_mut().push(&*id.as_str());
self.directory.path.push(&*id.as_str());
}
}
}