Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify MatcherPos some more #95509

Merged
merged 5 commits into from
Apr 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_expand/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![feature(associated_type_bounds)]
#![feature(associated_type_defaults)]
#![feature(box_patterns)]
#![feature(box_syntax)]
#![feature(crate_visibility_modifier)]
#![feature(decl_macro)]
Expand Down
214 changes: 119 additions & 95 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ crate use ParseResult::*;

use crate::mbe::{self, SequenceRepetition, TokenTree};

use rustc_ast::token::{self, DocComment, Nonterminal, Token};
use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind};
use rustc_parse::parser::{NtOrTt, Parser};
use rustc_session::parse::ParseSess;
use rustc_span::symbol::MacroRulesNormalizedIdent;
Expand All @@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
use rustc_span::symbol::Ident;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::mem;

/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
/// descended into.
#[derive(Clone)]
struct MatcherPosFrame<'tt> {
/// The "parent" matcher that we have descended from.
tts: &'tt [TokenTree],
/// The position of the "dot" in `tt` at the time we descended.
idx: usize,
}

// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
// vectors longer than one frequently have many elements, not just two or
Expand All @@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);

#[derive(Clone)]
enum MatcherKind<'tt> {
TopLevel,
Delimited(Box<DelimitedSubmatcher<'tt>>),
Sequence(Box<SequenceSubmatcher<'tt>>),
}

#[derive(Clone)]
struct DelimitedSubmatcher<'tt> {
parent: Parent<'tt>,
}

#[derive(Clone)]
struct SequenceSubmatcher<'tt> {
parent: Parent<'tt>,
seq: &'tt SequenceRepetition,
}

/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
/// `MathcherPos`.
#[derive(Clone)]
struct Parent<'tt> {
tts: &'tt [TokenTree],
idx: usize,
kind: MatcherKind<'tt>,
}

/// A single matcher position, which could be within the top-level matcher, a submatcher, a
/// subsubmatcher, etc. For example:
/// ```text
Expand All @@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
/// <--------------> first submatcher; three tts, zero metavars
/// <--------------------------> top-level matcher; two tts, one metavar
/// ```
#[derive(Clone)]
struct MatcherPos<'tt> {
/// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
/// submatcher, this is just the contents of that submatcher.
tts: &'tt [TokenTree],

/// The "dot" position within the current submatcher, i.e. the index into `tts`.
/// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
/// two positions past the final elements in `tts` when dealing with sequences, see
/// `parse_tt_inner` for details.
idx: usize,

/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
Expand All @@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
/// The number of sequences this mp is within.
seq_depth: usize,

/// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
/// no metavars.
match_lo: usize,

/// The position in `matches` of the next metavar to be matched against the source token
/// stream. Should not be used if there are no metavars.
match_cur: usize,

/// This field is only used if we are matching a sequence.
sequence: Option<MatcherPosSequence<'tt>>,

/// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
/// matcher(s). The bottom of the stack is the top-level matcher.
stack: SmallVec<[MatcherPosFrame<'tt>; 1]>,
/// What kind of matcher we are in. For submatchers, this contains enough information to
/// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
kind: MatcherKind<'tt>,
}

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104);
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);

impl<'tt> MatcherPos<'tt> {
fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
Expand All @@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
idx: 0,
matches: empty_matches,
seq_depth: 0,
match_lo: 0,
match_cur: 0,
stack: smallvec![],
sequence: None,
kind: MatcherKind::TopLevel,
}
}

fn empty_sequence(
parent_mp: &MatcherPos<'tt>,
seq: &'tt SequenceRepetition,
empty_matches: Lrc<NamedMatchVec>,
) -> Self {
let mut mp = MatcherPos {
tts: parent_mp.tts,
idx: parent_mp.idx + 1,
matches: parent_mp.matches.clone(), // a cheap clone
seq_depth: parent_mp.seq_depth,
match_cur: parent_mp.match_cur + seq.num_captures,
kind: parent_mp.kind.clone(), // an expensive clone
};
for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures {
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
}
mp
}

fn sequence(
parent: Box<MatcherPos<'tt>>,
parent_mp: Box<MatcherPos<'tt>>,
nnethercote marked this conversation as resolved.
Show resolved Hide resolved
seq: &'tt SequenceRepetition,
empty_matches: Lrc<NamedMatchVec>,
) -> Self {
let seq_kind = box SequenceSubmatcher {
parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
seq,
};
let mut mp = MatcherPos {
tts: &seq.tts,
idx: 0,
matches: parent.matches.clone(),
seq_depth: parent.seq_depth,
match_lo: parent.match_cur,
match_cur: parent.match_cur,
sequence: Some(MatcherPosSequence { parent, seq }),
stack: smallvec![],
matches: parent_mp.matches,
seq_depth: parent_mp.seq_depth,
match_cur: parent_mp.match_cur,
kind: MatcherKind::Sequence(seq_kind),
};
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
// must have the parent's depth at this point for these `push_match` calls to work.
for idx in mp.match_lo..mp.match_lo + seq.num_captures {
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
}
mp.seq_depth += 1;
Expand Down Expand Up @@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
}
}

#[derive(Clone)]
struct MatcherPosSequence<'tt> {
/// The parent matcher position. Effectively gives a linked list of matches all the way to the
/// top-level matcher.
parent: Box<MatcherPos<'tt>>,

/// The sequence itself.
seq: &'tt SequenceRepetition,
}

enum EofMatcherPositions<'tt> {
None,
One(Box<MatcherPos<'tt>>),
Expand Down Expand Up @@ -454,18 +473,6 @@ impl<'tt> TtParser<'tt> {
let mut eof_mps = EofMatcherPositions::None;

while let Some(mut mp) = self.cur_mps.pop() {
// Backtrack out of delimited submatcher when necessary. When backtracking out again,
// we need to advance the "dot" past the delimiters in the parent matcher(s).
while mp.idx >= mp.tts.len() {
match mp.stack.pop() {
Some(MatcherPosFrame { tts, idx }) => {
mp.tts = tts;
mp.idx = idx + 1;
}
None => break,
}
}

// Get the current position of the "dot" (`idx`) in `mp` and the number of token
// trees in the matcher (`len`).
let idx = mp.idx;
Expand All @@ -479,13 +486,11 @@ impl<'tt> TtParser<'tt> {
let op = seq.kleene.op;
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
// Allow for the possibility of zero matches of this sequence.
let mut new_mp = mp.clone();
new_mp.match_cur += seq.num_captures;
new_mp.idx += 1;
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
new_mp.push_match(idx, MatchedSeq(self.empty_matches.clone()));
}
self.cur_mps.push(new_mp);
self.cur_mps.push(box MatcherPos::empty_sequence(
&*mp,
&seq,
self.empty_matches.clone(),
));
}

// Allow for the possibility of one or more matches of this sequence.
Expand Down Expand Up @@ -515,16 +520,17 @@ impl<'tt> TtParser<'tt> {
}

TokenTree::Delimited(_, delimited) => {
// To descend into a delimited submatcher, we push the current matcher onto
// a stack and push a new mp containing the submatcher onto `cur_mps`.
//
// At the beginning of the loop, if we reach the end of the delimited
// submatcher, we pop the stack to backtrack out of the descent. Note that
// we use `all_tts` to include the open and close delimiter tokens.
let tts = mem::replace(&mut mp.tts, &delimited.all_tts);
let idx = mp.idx;
mp.stack.push(MatcherPosFrame { tts, idx });
// To descend into a delimited submatcher, we update `mp` appropriately,
// including enough information to re-ascend afterwards, and push it onto
// `cur_mps`. Later, when we reach the closing delimiter, we will recover
// the parent matcher position to ascend. Note that we use `all_tts` to
// include the open and close delimiter tokens.
let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
});
mp.tts = &delimited.all_tts;
mp.idx = 0;
mp.kind = kind;
self.cur_mps.push(mp);
}

Expand All @@ -542,6 +548,18 @@ impl<'tt> TtParser<'tt> {
mp.idx += 1;
self.cur_mps.push(mp);
} else if token_name_eq(&t, token) {
if let TokenKind::CloseDelim(_) = token.kind {
// Ascend out of the delimited submatcher.
debug_assert_eq!(idx, len - 1);
match mp.kind {
MatcherKind::Delimited(submatcher) => {
mp.tts = submatcher.parent.tts;
mp.idx = submatcher.parent.idx;
mp.kind = submatcher.parent.kind;
}
_ => unreachable!(),
}
}
mp.idx += 1;
self.next_mps.push(mp);
}
Expand All @@ -550,38 +568,44 @@ impl<'tt> TtParser<'tt> {
// These cannot appear in a matcher.
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
}
} else if let Some(sequence) = &mp.sequence {
} else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
// We are past the end of a sequence.
debug_assert!(idx <= len + 1);
// - If it has no separator, we must be only one past the end.
// - If it has a separator, we may be one past the end, in which case we must
// look for a separator. Or we may be two past the end, in which case we have
// already dealt with the separator.
debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());

if idx == len {
// Add all matches from the sequence to `parent`, and move the "dot" past the
// sequence in `parent`. This allows for the case where the sequence matching
// is finished.
let mut new_mp = sequence.parent.clone();
new_mp.matches = mp.matches.clone();
new_mp.match_cur = mp.match_lo + sequence.seq.num_captures;
new_mp.idx += 1;
// Sequence matching may have finished: move the "dot" past the sequence in
// `parent`. This applies whether a separator is used or not. If sequence
// matching hasn't finished, this `new_mp` will fail quietly when it is
// processed next time around the loop.
let new_mp = box MatcherPos {
tts: parent.tts,
idx: parent.idx + 1,
matches: mp.matches.clone(), // a cheap clone
seq_depth: mp.seq_depth - 1,
match_cur: mp.match_cur,
kind: parent.kind.clone(), // an expensive clone
};
self.cur_mps.push(new_mp);
}

if idx == len && sequence.seq.separator.is_some() {
if sequence
.seq
.separator
.as_ref()
.map_or(false, |sep| token_name_eq(token, sep))
{
if seq.separator.is_some() && idx == len {
// Look for the separator.
if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
// The matcher has a separator, and it matches the current token. We can
// advance past the separator token.
mp.idx += 1;
self.next_mps.push(mp);
}
} else if sequence.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
// We don't need a separator. Move the "dot" back to the beginning of the
// matcher and try to match again UNLESS we are only allowed to have _one_
// repetition.
mp.match_cur = mp.match_lo;
} else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
// We don't need to look for a separator: either this sequence doesn't have
// one, or it does and we've already handled it. Also, we are allowed to have
// more than one repetition. Move the "dot" back to the beginning of the
// matcher and try to match again.
mp.match_cur -= seq.num_captures;
mp.idx = 0;
self.cur_mps.push(mp);
}
Expand Down