diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs index 8905f475647ba..fb9919d777db1 100644 --- a/src/librustc_save_analysis/span_utils.rs +++ b/src/librustc_save_analysis/span_utils.rs @@ -53,7 +53,7 @@ impl<'a> SpanUtils<'a> { pub fn sub_span_of_token(&self, span: Span, tok: TokenKind) -> Option { let mut toks = self.retokenise_span(span); loop { - let next = toks.real_token(); + let next = toks.next_token(); if next == token::Eof { return None; } diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 99ca8c43cfbe2..8132074d6e0e7 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -38,17 +38,17 @@ pub fn render_with_highlighting( FileName::Custom(String::from("rustdoc-highlighting")), src.to_owned(), ); - let highlight_result = - lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| { - let mut classifier = Classifier::new(lexer, sess.source_map()); - - let mut highlighted_source = vec![]; - if classifier.write_source(&mut highlighted_source).is_err() { - Err(classifier.lexer.buffer_fatal_errors()) - } else { - Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) - } - }); + let highlight_result = { + let lexer = lexer::StringReader::new(&sess, fm, None); + let mut classifier = Classifier::new(lexer, sess.source_map()); + + let mut highlighted_source = vec![]; + if classifier.write_source(&mut highlighted_source).is_err() { + Err(classifier.lexer.buffer_fatal_errors()) + } else { + Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) + } + }; match highlight_result { Ok(highlighted_source) => { @@ -79,6 +79,7 @@ pub fn render_with_highlighting( /// each span of text in sequence. struct Classifier<'a> { lexer: lexer::StringReader<'a>, + peek_token: Option, source_map: &'a SourceMap, // State of the classifier. @@ -178,6 +179,7 @@ impl<'a> Classifier<'a> { fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> { Classifier { lexer, + peek_token: None, source_map, in_attribute: false, in_macro: false, @@ -187,10 +189,19 @@ impl<'a> Classifier<'a> { /// Gets the next token out of the lexer. fn try_next_token(&mut self) -> Result { - match self.lexer.try_next_token() { - Ok(token) => Ok(token), - Err(_) => Err(HighlightError::LexError), + if let Some(token) = self.peek_token.take() { + return Ok(token); + } + self.lexer.try_next_token().map_err(|()| HighlightError::LexError) + } + + fn peek(&mut self) -> Result<&Token, HighlightError> { + if self.peek_token.is_none() { + self.peek_token = Some( + self.lexer.try_next_token().map_err(|()| HighlightError::LexError)? + ); } + Ok(self.peek_token.as_ref().unwrap()) } /// Exhausts the `lexer` writing the output into `out`. @@ -234,7 +245,7 @@ impl<'a> Classifier<'a> { // reference or dereference operator or a reference or pointer type, instead of the // bit-and or multiplication operator. token::BinOp(token::And) | token::BinOp(token::Star) - if self.lexer.peek() != &token::Whitespace => Class::RefKeyWord, + if self.peek()? != &token::Whitespace => Class::RefKeyWord, // Consider this as part of a macro invocation if there was a // leading identifier. @@ -257,7 +268,7 @@ impl<'a> Classifier<'a> { token::Question => Class::QuestionMark, token::Dollar => { - if self.lexer.peek().is_ident() { + if self.peek()?.is_ident() { self.in_macro_nonterminal = true; Class::MacroNonTerminal } else { @@ -280,9 +291,9 @@ impl<'a> Classifier<'a> { // as an attribute. // Case 1: #![inner_attribute] - if self.lexer.peek() == &token::Not { + if self.peek()? == &token::Not { self.try_next_token()?; // NOTE: consumes `!` token! - if self.lexer.peek() == &token::OpenDelim(token::Bracket) { + if self.peek()? == &token::OpenDelim(token::Bracket) { self.in_attribute = true; out.enter_span(Class::Attribute)?; } @@ -292,7 +303,7 @@ impl<'a> Classifier<'a> { } // Case 2: #[outer_attribute] - if self.lexer.peek() == &token::OpenDelim(token::Bracket) { + if self.peek()? == &token::OpenDelim(token::Bracket) { self.in_attribute = true; out.enter_span(Class::Attribute)?; } @@ -341,7 +352,7 @@ impl<'a> Classifier<'a> { if self.in_macro_nonterminal { self.in_macro_nonterminal = false; Class::MacroNonTerminal - } else if self.lexer.peek() == &token::Not { + } else if self.peek()? == &token::Not { self.in_macro = true; Class::Macro } else { diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs index f6ab1290da37c..0488153e7cb73 100644 --- a/src/librustdoc/passes/check_code_block_syntax.rs +++ b/src/librustdoc/passes/check_code_block_syntax.rs @@ -32,7 +32,8 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { dox[code_block.code].to_owned(), ); - let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| { + let errors = { + let mut lexer = Lexer::new(&sess, source_file, None); while let Ok(token::Token { kind, .. }) = lexer.try_next_token() { if kind == token::Eof { break; @@ -46,7 +47,7 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { } else { Ok(()) } - }); + }; if let Err(errors) = errors { let mut diag = if let Some(sp) = diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 97d3fc002e9b0..988f1aa38d926 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -268,7 +268,7 @@ fn read_block_comment(rdr: &mut StringReader<'_>, while level > 0 { debug!("=== block comment level {}", level); if rdr.is_eof() { - rdr.fatal("unterminated block comment").raise(); + rdr.fatal_span_(rdr.pos, rdr.pos, "unterminated block comment").raise(); } if rdr.ch_is('\n') { trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); @@ -346,7 +346,7 @@ pub fn gather_comments(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) -> srdr.read_to_string(&mut src).unwrap(); let cm = SourceMap::new(sess.source_map().path_mapping().clone()); let source_file = cm.new_source_file(path, src); - let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); + let mut rdr = lexer::StringReader::new(sess, source_file, None); let mut comments: Vec = Vec::new(); let mut code_to_the_left = false; // Only code diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 49f714e4e4654..2ad562485eb7c 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -38,9 +38,6 @@ pub struct StringReader<'a> { crate source_file: Lrc, /// Stop reading src at this index. crate end_src_index: usize, - // cached: - peek_token: Token, - peek_span_src_raw: Span, fatal_errs: Vec>, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time. @@ -49,15 +46,59 @@ pub struct StringReader<'a> { } impl<'a> StringReader<'a> { - fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { - self.mk_sp_and_raw(lo, hi).0 + pub fn new(sess: &'a ParseSess, + source_file: Lrc, + override_span: Option) -> Self { + let mut sr = StringReader::new_internal(sess, source_file, override_span); + sr.bump(); + sr + } + + pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { + let begin = sess.source_map().lookup_byte_offset(span.lo()); + let end = sess.source_map().lookup_byte_offset(span.hi()); + + // Make the range zero-length if the span is invalid. + if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { + span = span.shrink_to_lo(); + } + + let mut sr = StringReader::new_internal(sess, begin.sf, None); + + // Seek the lexer to the right byte range. + sr.next_pos = span.lo(); + sr.end_src_index = sr.src_index(span.hi()); + + sr.bump(); + + sr } - fn mk_sp_and_raw(&self, lo: BytePos, hi: BytePos) -> (Span, Span) { - let raw = Span::new(lo, hi, NO_EXPANSION); - let real = self.override_span.unwrap_or(raw); + fn new_internal(sess: &'a ParseSess, source_file: Lrc, + override_span: Option) -> Self + { + if source_file.src.is_none() { + sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", + source_file.name)); + } + + let src = (*source_file.src.as_ref().unwrap()).clone(); + + StringReader { + sess, + next_pos: source_file.start_pos, + pos: source_file.start_pos, + ch: Some('\n'), + source_file, + end_src_index: src.len(), + src, + fatal_errs: Vec::new(), + override_span, + } + } - (real, raw) + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } fn unwrap_or_abort(&mut self, res: Result) -> Token { @@ -70,35 +111,32 @@ impl<'a> StringReader<'a> { } } - fn next_token(&mut self) -> Token where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - - /// Returns the next token. EFFECT: advances the string_reader. + /// Returns the next token, including trivia like whitespace or comments. + /// + /// `Err(())` means that some errors were encountered, which can be + /// retrieved using `buffer_fatal_errors`. pub fn try_next_token(&mut self) -> Result { assert!(self.fatal_errs.is_empty()); - let ret_val = self.peek_token.take(); - self.advance_token()?; - Ok(ret_val) - } - - fn try_real_token(&mut self) -> Result { - let mut t = self.try_next_token()?; - loop { - match t.kind { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, + match self.scan_whitespace_or_comment() { + Some(comment) => Ok(comment), + None => { + let (kind, start_pos, end_pos) = if self.is_eof() { + (token::Eof, self.source_file.end_pos, self.source_file.end_pos) + } else { + let start_pos = self.pos; + (self.next_token_inner()?, start_pos, self.pos) + }; + let span = self.mk_sp(start_pos, end_pos); + Ok(Token::new(kind, span)) } } - - Ok(t) } - pub fn real_token(&mut self) -> Token { - let res = self.try_real_token(); + /// Returns the next token, including trivia like whitespace or comments. + /// + /// Aborts in case of an error. + pub fn next_token(&mut self) -> Token { + let res = self.try_next_token(); self.unwrap_or_abort(res) } @@ -120,10 +158,6 @@ impl<'a> StringReader<'a> { FatalError.raise(); } - fn fatal(&self, m: &str) -> FatalError { - self.fatal_span(self.peek_token.span, m) - } - crate fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); @@ -142,81 +176,6 @@ impl<'a> StringReader<'a> { buffer } - pub fn peek(&self) -> &Token { - &self.peek_token - } - - /// For comments.rs, which hackily pokes into next_pos and ch - fn new_raw(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Self { - let mut sr = StringReader::new_raw_internal(sess, source_file, override_span); - sr.bump(); - - sr - } - - fn new_raw_internal(sess: &'a ParseSess, source_file: Lrc, - override_span: Option) -> Self - { - if source_file.src.is_none() { - sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", - source_file.name)); - } - - let src = (*source_file.src.as_ref().unwrap()).clone(); - - StringReader { - sess, - next_pos: source_file.start_pos, - pos: source_file.start_pos, - ch: Some('\n'), - source_file, - end_src_index: src.len(), - peek_token: Token::dummy(), - peek_span_src_raw: syntax_pos::DUMMY_SP, - src, - fatal_errs: Vec::new(), - override_span, - } - } - - pub fn new_or_buffered_errs(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Result> { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - Err(sr.buffer_fatal_errors()) - } else { - Ok(sr) - } - } - - pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { - let begin = sess.source_map().lookup_byte_offset(span.lo()); - let end = sess.source_map().lookup_byte_offset(span.hi()); - - // Make the range zero-length if the span is invalid. - if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { - span = span.shrink_to_lo(); - } - - let mut sr = StringReader::new_raw_internal(sess, begin.sf, None); - - // Seek the lexer to the right byte range. - sr.next_pos = span.lo(); - sr.end_src_index = sr.src_index(span.hi()); - - sr.bump(); - - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - - sr - } - #[inline] fn ch_is(&self, c: char) -> bool { self.ch == Some(c) @@ -269,30 +228,6 @@ impl<'a> StringReader<'a> { self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) } - /// Advance peek_token to refer to the next token, and - /// possibly update the interner. - fn advance_token(&mut self) -> Result<(), ()> { - match self.scan_whitespace_or_comment() { - Some(comment) => { - self.peek_span_src_raw = comment.span; - self.peek_token = comment; - } - None => { - let (kind, start_pos, end_pos) = if self.is_eof() { - (token::Eof, self.source_file.end_pos, self.source_file.end_pos) - } else { - let start_pos = self.pos; - (self.next_token_inner()?, start_pos, self.pos) - }; - let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); - self.peek_token = Token::new(kind, real); - self.peek_span_src_raw = raw; - } - } - - Ok(()) - } - #[inline] fn src_index(&self, pos: BytePos) -> usize { (pos - self.source_file.start_pos).to_usize() @@ -1461,12 +1396,7 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - let mut sr = StringReader::new_raw(sess, sf, None); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - sr + StringReader::new(sess, sf, None) } #[test] @@ -1488,17 +1418,17 @@ mod tests { assert_eq!(tok1.kind, tok2.kind); assert_eq!(tok1.span, tok2.span); assert_eq!(string_reader.next_token(), token::Whitespace); - // the 'main' id is already read: - assert_eq!(string_reader.pos.clone(), BytePos(28)); // read another token: let tok3 = string_reader.next_token(); + assert_eq!(string_reader.pos.clone(), BytePos(28)); let tok4 = Token::new( mk_ident("main"), Span::new(BytePos(24), BytePos(28), NO_EXPANSION), ); assert_eq!(tok3.kind, tok4.kind); assert_eq!(tok3.span, tok4.span); - // the lparen is already read: + + assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); assert_eq!(string_reader.pos.clone(), BytePos(29)) }) } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 99d9d40a45b93..830fbec58ded9 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -4,13 +4,14 @@ use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; use crate::parse::token::{self, Token}; use crate::parse::PResult; -use crate::tokenstream::{DelimSpan, IsJoint::*, TokenStream, TokenTree, TreeAndJoint}; +use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; impl<'a> StringReader<'a> { crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { let mut tt_reader = TokenTreesReader { string_reader: self, token: Token::dummy(), + joint_to_prev: Joint, open_braces: Vec::new(), unmatched_braces: Vec::new(), matching_delim_spans: Vec::new(), @@ -24,6 +25,7 @@ impl<'a> StringReader<'a> { struct TokenTreesReader<'a> { string_reader: StringReader<'a>, token: Token, + joint_to_prev: IsJoint, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(token::DelimToken, Span)>, unmatched_braces: Vec, @@ -203,21 +205,26 @@ impl<'a> TokenTreesReader<'a> { }, _ => { let tt = TokenTree::Token(self.token.take()); - // Note that testing for joint-ness here is done via the raw - // source span as the joint-ness is a property of the raw source - // rather than wanting to take `override_span` into account. - // Additionally, we actually check if the *next* pair of tokens - // is joint, but this is equivalent to checking the current pair. - let raw = self.string_reader.peek_span_src_raw; self.real_token(); - let is_joint = raw.hi() == self.string_reader.peek_span_src_raw.lo() - && self.token.is_op(); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } fn real_token(&mut self) { - self.token = self.string_reader.real_token(); + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + }, + } + } } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index e19eab371f44e..ff2275ca348bc 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -305,7 +305,7 @@ pub fn maybe_file_to_stream( source_file: Lrc, override_span: Option, ) -> Result<(TokenStream, Vec), Vec> { - let srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; + let srdr = lexer::StringReader::new(sess, source_file, override_span); let (token_trees, unmatched_braces) = srdr.into_token_trees(); match token_trees {