Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Auto merge of #37290 - petrochenkov:pnp, r=jseyfried
syntax: Tweak path parsing logic

Associated paths starting with `<<` are parsed in patterns.

Paths like `self::foo::bar` are interpreted as paths and not as `self` arguments in methods (cc @matklad).
Now, I believe, *all* paths are consistently parsed greedily in case of ambiguity.
Detection of `&'a mut self::` requires pretty large (but still fixed) lookahead, so I had to increase the size of parser's lookahead buffer.
Curiously, if `lookahead_distance >= lookahead_buffer_size` was used previously, the parser hung forever, I fixed this as well, now it ICEs.

r? @jseyfried
  • Loading branch information
bors committed Oct 21, 2016
2 parents da5b646 + fea630e commit 5509ae3
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 74 deletions.
6 changes: 6 additions & 0 deletions src/libsyntax/parse/lexer/mod.rs
Expand Up @@ -74,6 +74,12 @@ pub struct TokenAndSpan {
pub sp: Span,
}

impl Default for TokenAndSpan {
fn default() -> Self {
TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP }
}
}

pub struct StringReader<'a> {
pub span_diagnostic: &'a Handler,
/// The absolute offset within the codemap of the next character to read
Expand Down
111 changes: 58 additions & 53 deletions src/libsyntax/parse/parser.rs
Expand Up @@ -245,6 +245,22 @@ enum PrevTokenKind {
Other,
}

// Simple circular buffer used for keeping few next tokens.
#[derive(Default)]
struct LookaheadBuffer {
buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY],
start: usize,
end: usize,
}

const LOOKAHEAD_BUFFER_CAPACITY: usize = 8;

impl LookaheadBuffer {
fn len(&self) -> usize {
(LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY
}
}

/* ident is handled by common.rs */

pub struct Parser<'a> {
Expand All @@ -258,9 +274,7 @@ pub struct Parser<'a> {
pub cfg: CrateConfig,
/// the previous token kind
prev_token_kind: PrevTokenKind,
pub buffer: [TokenAndSpan; 4],
pub buffer_start: isize,
pub buffer_end: isize,
lookahead_buffer: LookaheadBuffer,
pub tokens_consumed: usize,
pub restrictions: Restrictions,
pub quote_depth: usize, // not (yet) related to the quasiquoter
Expand Down Expand Up @@ -356,10 +370,6 @@ impl<'a> Parser<'a> {
_ => PathBuf::from(sess.codemap().span_to_filename(span)),
};
directory.pop();
let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: span,
};

Parser {
reader: rdr,
Expand All @@ -369,14 +379,7 @@ impl<'a> Parser<'a> {
span: span,
prev_span: span,
prev_token_kind: PrevTokenKind::Other,
buffer: [
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
],
buffer_start: 0,
buffer_end: 0,
lookahead_buffer: Default::default(),
tokens_consumed: 0,
restrictions: Restrictions::empty(),
quote_depth: 0,
Expand Down Expand Up @@ -937,19 +940,13 @@ impl<'a> Parser<'a> {
_ => PrevTokenKind::Other,
};

let next = if self.buffer_start == self.buffer_end {
let next = if self.lookahead_buffer.start == self.lookahead_buffer.end {
self.reader.real_token()
} else {
// Avoid token copies with `replace`.
let buffer_start = self.buffer_start as usize;
let next_index = (buffer_start + 1) & 3;
self.buffer_start = next_index as isize;

let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: self.span,
};
mem::replace(&mut self.buffer[buffer_start], placeholder)
let old_start = self.lookahead_buffer.start;
self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY;
mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default())
};
self.span = next.sp;
self.token = next.tok;
Expand Down Expand Up @@ -982,21 +979,22 @@ impl<'a> Parser<'a> {
self.expected_tokens.clear();
}

pub fn buffer_length(&mut self) -> isize {
if self.buffer_start <= self.buffer_end {
return self.buffer_end - self.buffer_start;
}
return (4 - self.buffer_start) + self.buffer_end;
}
pub fn look_ahead<R, F>(&mut self, distance: usize, f: F) -> R where
pub fn look_ahead<R, F>(&mut self, dist: usize, f: F) -> R where
F: FnOnce(&token::Token) -> R,
{
let dist = distance as isize;
while self.buffer_length() < dist {
self.buffer[self.buffer_end as usize] = self.reader.real_token();
self.buffer_end = (self.buffer_end + 1) & 3;
if dist == 0 {
f(&self.token)
} else if dist < LOOKAHEAD_BUFFER_CAPACITY {
while self.lookahead_buffer.len() < dist {
self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.reader.real_token();
self.lookahead_buffer.end =
(self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY;
}
let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY;
f(&self.lookahead_buffer.buffer[index].tok)
} else {
self.bug("lookahead distance is too large");
}
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as usize].tok)
}
pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> {
self.sess.span_diagnostic.struct_span_fatal(self.span, m)
Expand Down Expand Up @@ -1118,7 +1116,6 @@ impl<'a> Parser<'a> {
Ok(ast::TyKind::ImplTrait(bounds))
}


pub fn parse_ty_path(&mut self) -> PResult<'a, TyKind> {
Ok(TyKind::Path(None, self.parse_path(PathStyle::Type)?))
}
Expand Down Expand Up @@ -3623,7 +3620,7 @@ impl<'a> Parser<'a> {
// Parse box pat
let subpat = self.parse_pat()?;
pat = PatKind::Box(subpat);
} else if self.token.is_ident() && self.token.is_path_start() &&
} else if self.token.is_ident() && !self.token.is_any_keyword() &&
self.look_ahead(1, |t| match *t {
token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) |
token::DotDotDot | token::ModSep | token::Not => false,
Expand Down Expand Up @@ -3874,6 +3871,11 @@ impl<'a> Parser<'a> {
})
}

fn is_union_item(&mut self) -> bool {
self.token.is_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword())
}

fn parse_stmt_without_recovery(&mut self,
macro_legacy_warnings: bool)
-> PResult<'a, Option<Stmt>> {
Expand All @@ -3888,10 +3890,10 @@ impl<'a> Parser<'a> {
node: StmtKind::Local(self.parse_local(attrs.into())?),
span: mk_sp(lo, self.prev_span.hi),
}
} else if self.token.is_path_start() && self.token != token::Lt && {
!self.check_keyword(keywords::Union) ||
self.look_ahead(1, |t| *t == token::Not || *t == token::ModSep)
} {
// Starts like a simple path, but not a union item.
} else if self.token.is_path_start() &&
!self.token.is_qpath_start() &&
!self.is_union_item() {
let pth = self.parse_path(PathStyle::Expr)?;

if !self.eat(&token::Not) {
Expand Down Expand Up @@ -4602,6 +4604,10 @@ impl<'a> Parser<'a> {
token::Ident(ident) => { this.bump(); codemap::respan(this.prev_span, ident) }
_ => unreachable!()
};
let isolated_self = |this: &mut Self, n| {
this.look_ahead(n, |t| t.is_keyword(keywords::SelfValue)) &&
this.look_ahead(n + 1, |t| t != &token::ModSep)
};

// Parse optional self parameter of a method.
// Only a limited set of initial token sequences is considered self parameters, anything
Expand All @@ -4614,22 +4620,22 @@ impl<'a> Parser<'a> {
// &'lt self
// &'lt mut self
// &not_self
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
(SelfKind::Region(None, Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
(SelfKind::Region(None, Mutability::Mutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
let lt = self.parse_lifetime()?;
(SelfKind::Region(Some(lt), Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(3, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 3) {
self.bump();
let lt = self.parse_lifetime()?;
self.bump();
Expand All @@ -4644,12 +4650,12 @@ impl<'a> Parser<'a> {
// *mut self
// *not_self
// Emit special error for `self` cases.
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
(SelfKind::Value(Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_mutability()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
Expand All @@ -4659,7 +4665,7 @@ impl<'a> Parser<'a> {
}
}
token::Ident(..) => {
if self.token.is_keyword(keywords::SelfValue) {
if isolated_self(self, 0) {
// self
// self: TYPE
let eself_ident = expect_ident(self);
Expand All @@ -4670,7 +4676,7 @@ impl<'a> Parser<'a> {
(SelfKind::Value(Mutability::Immutable), eself_ident)
}
} else if self.token.is_keyword(keywords::Mut) &&
self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 1) {
// mut self
// mut self: TYPE
self.bump();
Expand Down Expand Up @@ -5961,8 +5967,7 @@ impl<'a> Parser<'a> {
maybe_append(attrs, extra_attrs));
return Ok(Some(item));
}
if self.check_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) {
if self.is_union_item() {
// UNION ITEM
self.bump();
let (ident, item_, extra_attrs) = self.parse_item_union()?;
Expand Down
11 changes: 7 additions & 4 deletions src/libsyntax/parse/token.rs
Expand Up @@ -159,10 +159,8 @@ impl Token {
/// Returns `true` if the token can appear at the start of an expression.
pub fn can_begin_expr(&self) -> bool {
match *self {
OpenDelim(_) => true,
OpenDelim(..) => true,
Ident(..) => true,
Underscore => true,
Tilde => true,
Literal(..) => true,
Not => true,
BinOp(Minus) => true,
Expand All @@ -172,6 +170,7 @@ impl Token {
OrOr => true, // in lambda syntax
AndAnd => true, // double borrow
DotDot | DotDotDot => true, // range notation
Lt | BinOp(Shl) => true, // associated path
ModSep => true,
Interpolated(NtExpr(..)) => true,
Interpolated(NtIdent(..)) => true,
Expand Down Expand Up @@ -236,8 +235,12 @@ impl Token {
self.is_keyword(keywords::Const)
}

pub fn is_qpath_start(&self) -> bool {
self == &Lt || self == &BinOp(Shl)
}

pub fn is_path_start(&self) -> bool {
self == &ModSep || self == &Lt || self.is_path() ||
self == &ModSep || self.is_qpath_start() || self.is_path() ||
self.is_path_segment_keyword() || self.is_ident() && !self.is_any_keyword()
}

Expand Down
20 changes: 20 additions & 0 deletions src/test/compile-fail/associated-path-shl.rs
@@ -0,0 +1,20 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Check that associated paths starting with `<<` are successfully parsed.

fn main() {
let _: <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let _ = <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let 0 ... <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
//~^ ERROR only char and numeric types are allowed in range patterns
<<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
}
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let Self = "foo"; //~ error: expected identifier, found keyword `Self`
let Self = "foo"; //~ ERROR unresolved unit struct/variant or constant `Self`
}
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let super = "foo"; //~ error: expected identifier, found keyword `super`
let super = "foo"; //~ ERROR unresolved unit struct/variant or constant `super`
}
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let super: isize; //~ ERROR expected identifier, found keyword `super`
let super: isize; //~ ERROR unresolved unit struct/variant or constant `super`
}
23 changes: 23 additions & 0 deletions src/test/compile-fail/self-vs-path-ambiguity.rs
@@ -0,0 +1,23 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Check that `self::foo` is parsed as a general pattern and not a self argument.

struct S;

impl S {
fn f(self::S: S) {}
fn g(&self::S: &S) {}
fn h(&mut self::S: &mut S) {}
fn i(&'a self::S: &S) {} //~ ERROR unexpected lifetime `'a` in pattern
//~^ ERROR expected one of `)` or `mut`, found `'a`
}

fn main() {}
12 changes: 11 additions & 1 deletion src/test/compile-fail/self_type_keyword-2.rs
Expand Up @@ -10,4 +10,14 @@

use self::Self as Foo; //~ ERROR unresolved import `self::Self`

pub fn main() {}
pub fn main() {
let Self = 5;
//~^ ERROR unresolved unit struct/variant or constant `Self`

match 15 {
Self => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
Foo { x: Self } => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
}
}

0 comments on commit 5509ae3

Please sign in to comment.