From 830ff4a592cf6a5adc0e5482d4294779d7a91177 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 2 Jul 2019 13:44:38 +0300 Subject: [PATCH 1/9] remove StringReader::peek The reader itself doesn't need ability to peek tokens, so it's better if clients implement this functionality. This hopefully becomes especially easy once we use iterator interface for lexer, but this is not too easy at the moment, because of buffered errors. --- src/librustdoc/html/highlight.rs | 29 ++++++++++++++++++++--------- src/libsyntax/parse/lexer/mod.rs | 4 ---- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 99ca8c43cfbe2..852c1e031de4a 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -79,6 +79,7 @@ pub fn render_with_highlighting( /// each span of text in sequence. struct Classifier<'a> { lexer: lexer::StringReader<'a>, + peek_token: Option, source_map: &'a SourceMap, // State of the classifier. @@ -178,6 +179,7 @@ impl<'a> Classifier<'a> { fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> { Classifier { lexer, + peek_token: None, source_map, in_attribute: false, in_macro: false, @@ -187,10 +189,19 @@ impl<'a> Classifier<'a> { /// Gets the next token out of the lexer. fn try_next_token(&mut self) -> Result { - match self.lexer.try_next_token() { - Ok(token) => Ok(token), - Err(_) => Err(HighlightError::LexError), + if let Some(token) = self.peek_token.take() { + return Ok(token); } + self.lexer.try_next_token().map_err(|()| HighlightError::LexError) + } + + fn peek(&mut self) -> Result<&Token, HighlightError> { + if self.peek_token.is_none() { + self.peek_token = Some( + self.lexer.try_next_token().map_err(|()| HighlightError::LexError)? + ); + } + Ok(self.peek_token.as_ref().unwrap()) } /// Exhausts the `lexer` writing the output into `out`. @@ -234,7 +245,7 @@ impl<'a> Classifier<'a> { // reference or dereference operator or a reference or pointer type, instead of the // bit-and or multiplication operator. token::BinOp(token::And) | token::BinOp(token::Star) - if self.lexer.peek() != &token::Whitespace => Class::RefKeyWord, + if self.peek()? != &token::Whitespace => Class::RefKeyWord, // Consider this as part of a macro invocation if there was a // leading identifier. @@ -257,7 +268,7 @@ impl<'a> Classifier<'a> { token::Question => Class::QuestionMark, token::Dollar => { - if self.lexer.peek().is_ident() { + if self.peek()?.is_ident() { self.in_macro_nonterminal = true; Class::MacroNonTerminal } else { @@ -280,9 +291,9 @@ impl<'a> Classifier<'a> { // as an attribute. // Case 1: #![inner_attribute] - if self.lexer.peek() == &token::Not { + if self.peek()? == &token::Not { self.try_next_token()?; // NOTE: consumes `!` token! - if self.lexer.peek() == &token::OpenDelim(token::Bracket) { + if self.peek()? == &token::OpenDelim(token::Bracket) { self.in_attribute = true; out.enter_span(Class::Attribute)?; } @@ -292,7 +303,7 @@ impl<'a> Classifier<'a> { } // Case 2: #[outer_attribute] - if self.lexer.peek() == &token::OpenDelim(token::Bracket) { + if self.peek()? == &token::OpenDelim(token::Bracket) { self.in_attribute = true; out.enter_span(Class::Attribute)?; } @@ -341,7 +352,7 @@ impl<'a> Classifier<'a> { if self.in_macro_nonterminal { self.in_macro_nonterminal = false; Class::MacroNonTerminal - } else if self.lexer.peek() == &token::Not { + } else if self.peek()? == &token::Not { self.in_macro = true; Class::Macro } else { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 49f714e4e4654..021b623d509c7 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -142,10 +142,6 @@ impl<'a> StringReader<'a> { buffer } - pub fn peek(&self) -> &Token { - &self.peek_token - } - /// For comments.rs, which hackily pokes into next_pos and ch fn new_raw(sess: &'a ParseSess, source_file: Lrc, From e9dc95c86ecb296e0a2067ca5813043f380b9ea6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 2 Jul 2019 17:08:11 +0300 Subject: [PATCH 2/9] remove peek_token from StringReader --- src/libsyntax/parse/lexer/comments.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 24 +++++++----------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 97d3fc002e9b0..2ab0bebf92927 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -268,7 +268,7 @@ fn read_block_comment(rdr: &mut StringReader<'_>, while level > 0 { debug!("=== block comment level {}", level); if rdr.is_eof() { - rdr.fatal("unterminated block comment").raise(); + rdr.fatal_span_(rdr.pos, rdr.pos, "unterminated block comment").raise(); } if rdr.ch_is('\n') { trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 021b623d509c7..a24c72ecc24f1 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -39,7 +39,6 @@ pub struct StringReader<'a> { /// Stop reading src at this index. crate end_src_index: usize, // cached: - peek_token: Token, peek_span_src_raw: Span, fatal_errs: Vec>, // cache a direct reference to the source text, so that we don't have to @@ -78,9 +77,7 @@ impl<'a> StringReader<'a> { /// Returns the next token. EFFECT: advances the string_reader. pub fn try_next_token(&mut self) -> Result { assert!(self.fatal_errs.is_empty()); - let ret_val = self.peek_token.take(); - self.advance_token()?; - Ok(ret_val) + self.advance_token() } fn try_real_token(&mut self) -> Result { @@ -120,10 +117,6 @@ impl<'a> StringReader<'a> { FatalError.raise(); } - fn fatal(&self, m: &str) -> FatalError { - self.fatal_span(self.peek_token.span, m) - } - crate fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); @@ -169,7 +162,6 @@ impl<'a> StringReader<'a> { ch: Some('\n'), source_file, end_src_index: src.len(), - peek_token: Token::dummy(), peek_span_src_raw: syntax_pos::DUMMY_SP, src, fatal_errs: Vec::new(), @@ -267,11 +259,11 @@ impl<'a> StringReader<'a> { /// Advance peek_token to refer to the next token, and /// possibly update the interner. - fn advance_token(&mut self) -> Result<(), ()> { + fn advance_token(&mut self) -> Result { match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span_src_raw = comment.span; - self.peek_token = comment; + Ok(comment) } None => { let (kind, start_pos, end_pos) = if self.is_eof() { @@ -281,12 +273,10 @@ impl<'a> StringReader<'a> { (self.next_token_inner()?, start_pos, self.pos) }; let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); - self.peek_token = Token::new(kind, real); self.peek_span_src_raw = raw; + Ok(Token::new(kind, real)) } } - - Ok(()) } #[inline] @@ -1484,17 +1474,17 @@ mod tests { assert_eq!(tok1.kind, tok2.kind); assert_eq!(tok1.span, tok2.span); assert_eq!(string_reader.next_token(), token::Whitespace); - // the 'main' id is already read: - assert_eq!(string_reader.pos.clone(), BytePos(28)); // read another token: let tok3 = string_reader.next_token(); + assert_eq!(string_reader.pos.clone(), BytePos(28)); let tok4 = Token::new( mk_ident("main"), Span::new(BytePos(24), BytePos(28), NO_EXPANSION), ); assert_eq!(tok3.kind, tok4.kind); assert_eq!(tok3.span, tok4.span); - // the lparen is already read: + + assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); assert_eq!(string_reader.pos.clone(), BytePos(29)) }) } From 256df83f642ff3cfff82b266edc7d9bbe3fd2ecc Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 12:52:22 +0300 Subject: [PATCH 3/9] remove peek_span_src_raw from StringReader --- src/librustc_save_analysis/span_utils.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 98 +++++++++--------------- src/libsyntax/parse/lexer/tokentrees.rs | 15 ++-- 3 files changed, 46 insertions(+), 69 deletions(-) diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs index 8905f475647ba..fb9919d777db1 100644 --- a/src/librustc_save_analysis/span_utils.rs +++ b/src/librustc_save_analysis/span_utils.rs @@ -53,7 +53,7 @@ impl<'a> SpanUtils<'a> { pub fn sub_span_of_token(&self, span: Span, tok: TokenKind) -> Option { let mut toks = self.retokenise_span(span); loop { - let next = toks.real_token(); + let next = toks.next_token(); if next == token::Eof { return None; } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index a24c72ecc24f1..8b43b88fbac9a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -38,8 +38,6 @@ pub struct StringReader<'a> { crate source_file: Lrc, /// Stop reading src at this index. crate end_src_index: usize, - // cached: - peek_span_src_raw: Span, fatal_errs: Vec>, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time. @@ -59,7 +57,7 @@ impl<'a> StringReader<'a> { (real, raw) } - fn unwrap_or_abort(&mut self, res: Result) -> Token { + fn unwrap_or_abort(&mut self, res: Result) -> T { match res { Ok(tok) => tok, Err(_) => { @@ -69,36 +67,52 @@ impl<'a> StringReader<'a> { } } - fn next_token(&mut self) -> Token where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - /// Returns the next token. EFFECT: advances the string_reader. pub fn try_next_token(&mut self) -> Result { - assert!(self.fatal_errs.is_empty()); - self.advance_token() + let (token, _raw_span) = self.try_next_token_with_raw_span()?; + Ok(token) } - fn try_real_token(&mut self) -> Result { - let mut t = self.try_next_token()?; + pub fn next_token(&mut self) -> Token { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } + + fn try_real_token(&mut self) -> Result<(Token, Span), ()> { loop { - match t.kind { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, + let t = self.try_next_token_with_raw_span()?; + match t.0.kind { + token::Whitespace | token::Comment | token::Shebang(_) => continue, + _ => return Ok(t), } } - - Ok(t) } - pub fn real_token(&mut self) -> Token { + fn real_token(&mut self) -> (Token, Span) { let res = self.try_real_token(); self.unwrap_or_abort(res) } + fn try_next_token_with_raw_span(&mut self) -> Result<(Token, Span), ()> { + assert!(self.fatal_errs.is_empty()); + match self.scan_whitespace_or_comment() { + Some(comment) => { + let raw_span = comment.span; + Ok((comment, raw_span)) + } + None => { + let (kind, start_pos, end_pos) = if self.is_eof() { + (token::Eof, self.source_file.end_pos, self.source_file.end_pos) + } else { + let start_pos = self.pos; + (self.next_token_inner()?, start_pos, self.pos) + }; + let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); + Ok((Token::new(kind, real), raw)) + } + } + } + #[inline] fn is_eof(&self) -> bool { self.ch.is_none() @@ -141,7 +155,6 @@ impl<'a> StringReader<'a> { override_span: Option) -> Self { let mut sr = StringReader::new_raw_internal(sess, source_file, override_span); sr.bump(); - sr } @@ -162,7 +175,6 @@ impl<'a> StringReader<'a> { ch: Some('\n'), source_file, end_src_index: src.len(), - peek_span_src_raw: syntax_pos::DUMMY_SP, src, fatal_errs: Vec::new(), override_span, @@ -172,12 +184,8 @@ impl<'a> StringReader<'a> { pub fn new_or_buffered_errs(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Result> { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - Err(sr.buffer_fatal_errors()) - } else { - Ok(sr) - } + let sr = StringReader::new_raw(sess, source_file, override_span); + Ok(sr) } pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { @@ -197,11 +205,6 @@ impl<'a> StringReader<'a> { sr.bump(); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - sr } @@ -257,28 +260,6 @@ impl<'a> StringReader<'a> { self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) } - /// Advance peek_token to refer to the next token, and - /// possibly update the interner. - fn advance_token(&mut self) -> Result { - match self.scan_whitespace_or_comment() { - Some(comment) => { - self.peek_span_src_raw = comment.span; - Ok(comment) - } - None => { - let (kind, start_pos, end_pos) = if self.is_eof() { - (token::Eof, self.source_file.end_pos, self.source_file.end_pos) - } else { - let start_pos = self.pos; - (self.next_token_inner()?, start_pos, self.pos) - }; - let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); - self.peek_span_src_raw = raw; - Ok(Token::new(kind, real)) - } - } - } - #[inline] fn src_index(&self, pos: BytePos) -> usize { (pos - self.source_file.start_pos).to_usize() @@ -1447,12 +1428,7 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - let mut sr = StringReader::new_raw(sess, sf, None); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - sr + StringReader::new_raw(sess, sf, None) } #[test] diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 99d9d40a45b93..9593a50bdd2a1 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -1,4 +1,4 @@ -use syntax_pos::Span; +use syntax_pos::{Span, DUMMY_SP}; use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; @@ -11,6 +11,7 @@ impl<'a> StringReader<'a> { let mut tt_reader = TokenTreesReader { string_reader: self, token: Token::dummy(), + raw_span: DUMMY_SP, open_braces: Vec::new(), unmatched_braces: Vec::new(), matching_delim_spans: Vec::new(), @@ -24,6 +25,7 @@ impl<'a> StringReader<'a> { struct TokenTreesReader<'a> { string_reader: StringReader<'a>, token: Token, + raw_span: Span, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(token::DelimToken, Span)>, unmatched_braces: Vec, @@ -206,18 +208,17 @@ impl<'a> TokenTreesReader<'a> { // Note that testing for joint-ness here is done via the raw // source span as the joint-ness is a property of the raw source // rather than wanting to take `override_span` into account. - // Additionally, we actually check if the *next* pair of tokens - // is joint, but this is equivalent to checking the current pair. - let raw = self.string_reader.peek_span_src_raw; + let raw_span = self.raw_span; self.real_token(); - let is_joint = raw.hi() == self.string_reader.peek_span_src_raw.lo() - && self.token.is_op(); + let is_joint = raw_span.hi() == self.raw_span.lo() && self.token.is_op(); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } fn real_token(&mut self) { - self.token = self.string_reader.real_token(); + let (token, raw_span) = self.string_reader.real_token(); + self.token = token; + self.raw_span = raw_span; } } From 601bad86b227a73970a6912d1efea48553728b3d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 13:30:12 +0300 Subject: [PATCH 4/9] cleanup lexer constructors --- src/librustdoc/html/highlight.rs | 22 +++++++++---------- .../passes/check_code_block_syntax.rs | 5 +++-- src/libsyntax/parse/lexer/comments.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 18 +++++---------- src/libsyntax/parse/mod.rs | 2 +- 5 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 852c1e031de4a..8132074d6e0e7 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -38,17 +38,17 @@ pub fn render_with_highlighting( FileName::Custom(String::from("rustdoc-highlighting")), src.to_owned(), ); - let highlight_result = - lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| { - let mut classifier = Classifier::new(lexer, sess.source_map()); - - let mut highlighted_source = vec![]; - if classifier.write_source(&mut highlighted_source).is_err() { - Err(classifier.lexer.buffer_fatal_errors()) - } else { - Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) - } - }); + let highlight_result = { + let lexer = lexer::StringReader::new(&sess, fm, None); + let mut classifier = Classifier::new(lexer, sess.source_map()); + + let mut highlighted_source = vec![]; + if classifier.write_source(&mut highlighted_source).is_err() { + Err(classifier.lexer.buffer_fatal_errors()) + } else { + Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) + } + }; match highlight_result { Ok(highlighted_source) => { diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs index f6ab1290da37c..0488153e7cb73 100644 --- a/src/librustdoc/passes/check_code_block_syntax.rs +++ b/src/librustdoc/passes/check_code_block_syntax.rs @@ -32,7 +32,8 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { dox[code_block.code].to_owned(), ); - let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| { + let errors = { + let mut lexer = Lexer::new(&sess, source_file, None); while let Ok(token::Token { kind, .. }) = lexer.try_next_token() { if kind == token::Eof { break; @@ -46,7 +47,7 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { } else { Ok(()) } - }); + }; if let Err(errors) = errors { let mut diag = if let Some(sp) = diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 2ab0bebf92927..988f1aa38d926 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -346,7 +346,7 @@ pub fn gather_comments(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) -> srdr.read_to_string(&mut src).unwrap(); let cm = SourceMap::new(sess.source_map().path_mapping().clone()); let source_file = cm.new_source_file(path, src); - let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); + let mut rdr = lexer::StringReader::new(sess, source_file, None); let mut comments: Vec = Vec::new(); let mut code_to_the_left = false; // Only code diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 8b43b88fbac9a..fd593fb0d090a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -149,16 +149,15 @@ impl<'a> StringReader<'a> { buffer } - /// For comments.rs, which hackily pokes into next_pos and ch - fn new_raw(sess: &'a ParseSess, + pub fn new(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Self { - let mut sr = StringReader::new_raw_internal(sess, source_file, override_span); + let mut sr = StringReader::new_internal(sess, source_file, override_span); sr.bump(); sr } - fn new_raw_internal(sess: &'a ParseSess, source_file: Lrc, + fn new_internal(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Self { if source_file.src.is_none() { @@ -181,13 +180,6 @@ impl<'a> StringReader<'a> { } } - pub fn new_or_buffered_errs(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Result> { - let sr = StringReader::new_raw(sess, source_file, override_span); - Ok(sr) - } - pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { let begin = sess.source_map().lookup_byte_offset(span.lo()); let end = sess.source_map().lookup_byte_offset(span.hi()); @@ -197,7 +189,7 @@ impl<'a> StringReader<'a> { span = span.shrink_to_lo(); } - let mut sr = StringReader::new_raw_internal(sess, begin.sf, None); + let mut sr = StringReader::new_internal(sess, begin.sf, None); // Seek the lexer to the right byte range. sr.next_pos = span.lo(); @@ -1428,7 +1420,7 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new_raw(sess, sf, None) + StringReader::new(sess, sf, None) } #[test] diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index e19eab371f44e..ff2275ca348bc 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -305,7 +305,7 @@ pub fn maybe_file_to_stream( source_file: Lrc, override_span: Option, ) -> Result<(TokenStream, Vec), Vec> { - let srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; + let srdr = lexer::StringReader::new(sess, source_file, override_span); let (token_trees, unmatched_braces) = srdr.into_token_trees(); match token_trees { From 30fa99e5b8c89df2c27d10a5d38a7c0d50f155a7 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 13:31:52 +0300 Subject: [PATCH 5/9] move constructors to top --- src/libsyntax/parse/lexer/mod.rs | 102 +++++++++++++++---------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index fd593fb0d090a..b764f9678c5b4 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -46,6 +46,57 @@ pub struct StringReader<'a> { } impl<'a> StringReader<'a> { + pub fn new(sess: &'a ParseSess, + source_file: Lrc, + override_span: Option) -> Self { + let mut sr = StringReader::new_internal(sess, source_file, override_span); + sr.bump(); + sr + } + + pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { + let begin = sess.source_map().lookup_byte_offset(span.lo()); + let end = sess.source_map().lookup_byte_offset(span.hi()); + + // Make the range zero-length if the span is invalid. + if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { + span = span.shrink_to_lo(); + } + + let mut sr = StringReader::new_internal(sess, begin.sf, None); + + // Seek the lexer to the right byte range. + sr.next_pos = span.lo(); + sr.end_src_index = sr.src_index(span.hi()); + + sr.bump(); + + sr + } + + fn new_internal(sess: &'a ParseSess, source_file: Lrc, + override_span: Option) -> Self + { + if source_file.src.is_none() { + sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", + source_file.name)); + } + + let src = (*source_file.src.as_ref().unwrap()).clone(); + + StringReader { + sess, + next_pos: source_file.start_pos, + pos: source_file.start_pos, + ch: Some('\n'), + source_file, + end_src_index: src.len(), + src, + fatal_errs: Vec::new(), + override_span, + } + } + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { self.mk_sp_and_raw(lo, hi).0 } @@ -149,57 +200,6 @@ impl<'a> StringReader<'a> { buffer } - pub fn new(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Self { - let mut sr = StringReader::new_internal(sess, source_file, override_span); - sr.bump(); - sr - } - - fn new_internal(sess: &'a ParseSess, source_file: Lrc, - override_span: Option) -> Self - { - if source_file.src.is_none() { - sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", - source_file.name)); - } - - let src = (*source_file.src.as_ref().unwrap()).clone(); - - StringReader { - sess, - next_pos: source_file.start_pos, - pos: source_file.start_pos, - ch: Some('\n'), - source_file, - end_src_index: src.len(), - src, - fatal_errs: Vec::new(), - override_span, - } - } - - pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { - let begin = sess.source_map().lookup_byte_offset(span.lo()); - let end = sess.source_map().lookup_byte_offset(span.hi()); - - // Make the range zero-length if the span is invalid. - if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { - span = span.shrink_to_lo(); - } - - let mut sr = StringReader::new_internal(sess, begin.sf, None); - - // Seek the lexer to the right byte range. - sr.next_pos = span.lo(); - sr.end_src_index = sr.src_index(span.hi()); - - sr.bump(); - - sr - } - #[inline] fn ch_is(&self, c: char) -> bool { self.ch == Some(c) From 1c6eb19d2fd9be130b6265f6bdbf8da3ba49c513 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 14:06:10 +0300 Subject: [PATCH 6/9] slightly comment lexer API --- src/libsyntax/parse/lexer/mod.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b764f9678c5b4..829083fe4f7d0 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -118,29 +118,36 @@ impl<'a> StringReader<'a> { } } - /// Returns the next token. EFFECT: advances the string_reader. + /// Returns the next token, including trivia like whitespace or comments. + /// + /// `Err(())` means that some errors were encountered, which can be + /// retrieved using `buffer_fatal_errors`. pub fn try_next_token(&mut self) -> Result { let (token, _raw_span) = self.try_next_token_with_raw_span()?; Ok(token) } + /// Returns the next token, including trivia like whitespace or comments. + /// + /// Aborts in case of an error. pub fn next_token(&mut self) -> Token { let res = self.try_next_token(); self.unwrap_or_abort(res) } - fn try_real_token(&mut self) -> Result<(Token, Span), ()> { - loop { - let t = self.try_next_token_with_raw_span()?; - match t.0.kind { - token::Whitespace | token::Comment | token::Shebang(_) => continue, - _ => return Ok(t), + /// Returns the next token, skipping over trivia. + /// Also returns an unoverriden span which can be used to check tokens + fn real_token(&mut self) -> (Token, Span) { + let res = try { + loop { + let t = self.try_next_token_with_raw_span()?; + match t.0.kind { + token::Whitespace | token::Comment | token::Shebang(_) => continue, + _ => break t, + } } - } - } + }; - fn real_token(&mut self) -> (Token, Span) { - let res = self.try_real_token(); self.unwrap_or_abort(res) } From 8bea334a266dcf439ca2f61f448a15770a3766b7 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 15:07:41 +0300 Subject: [PATCH 7/9] don't rely on spans when checking tokens for jointness --- src/libsyntax/parse/lexer/mod.rs | 46 ++++++------------------- src/libsyntax/parse/lexer/tokentrees.rs | 30 +++++++++------- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 829083fe4f7d0..f9b9c85fb5602 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -123,41 +123,9 @@ impl<'a> StringReader<'a> { /// `Err(())` means that some errors were encountered, which can be /// retrieved using `buffer_fatal_errors`. pub fn try_next_token(&mut self) -> Result { - let (token, _raw_span) = self.try_next_token_with_raw_span()?; - Ok(token) - } - - /// Returns the next token, including trivia like whitespace or comments. - /// - /// Aborts in case of an error. - pub fn next_token(&mut self) -> Token { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - - /// Returns the next token, skipping over trivia. - /// Also returns an unoverriden span which can be used to check tokens - fn real_token(&mut self) -> (Token, Span) { - let res = try { - loop { - let t = self.try_next_token_with_raw_span()?; - match t.0.kind { - token::Whitespace | token::Comment | token::Shebang(_) => continue, - _ => break t, - } - } - }; - - self.unwrap_or_abort(res) - } - - fn try_next_token_with_raw_span(&mut self) -> Result<(Token, Span), ()> { assert!(self.fatal_errs.is_empty()); match self.scan_whitespace_or_comment() { - Some(comment) => { - let raw_span = comment.span; - Ok((comment, raw_span)) - } + Some(comment) => Ok(comment), None => { let (kind, start_pos, end_pos) = if self.is_eof() { (token::Eof, self.source_file.end_pos, self.source_file.end_pos) @@ -165,12 +133,20 @@ impl<'a> StringReader<'a> { let start_pos = self.pos; (self.next_token_inner()?, start_pos, self.pos) }; - let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); - Ok((Token::new(kind, real), raw)) + let (real, _raw) = self.mk_sp_and_raw(start_pos, end_pos); + Ok(Token::new(kind, real)) } } } + /// Returns the next token, including trivia like whitespace or comments. + /// + /// Aborts in case of an error. + pub fn next_token(&mut self) -> Token { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } + #[inline] fn is_eof(&self) -> bool { self.ch.is_none() diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 9593a50bdd2a1..830fbec58ded9 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -1,17 +1,17 @@ -use syntax_pos::{Span, DUMMY_SP}; +use syntax_pos::Span; use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; use crate::parse::token::{self, Token}; use crate::parse::PResult; -use crate::tokenstream::{DelimSpan, IsJoint::*, TokenStream, TokenTree, TreeAndJoint}; +use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; impl<'a> StringReader<'a> { crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { let mut tt_reader = TokenTreesReader { string_reader: self, token: Token::dummy(), - raw_span: DUMMY_SP, + joint_to_prev: Joint, open_braces: Vec::new(), unmatched_braces: Vec::new(), matching_delim_spans: Vec::new(), @@ -25,7 +25,7 @@ impl<'a> StringReader<'a> { struct TokenTreesReader<'a> { string_reader: StringReader<'a>, token: Token, - raw_span: Span, + joint_to_prev: IsJoint, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(token::DelimToken, Span)>, unmatched_braces: Vec, @@ -205,20 +205,26 @@ impl<'a> TokenTreesReader<'a> { }, _ => { let tt = TokenTree::Token(self.token.take()); - // Note that testing for joint-ness here is done via the raw - // source span as the joint-ness is a property of the raw source - // rather than wanting to take `override_span` into account. - let raw_span = self.raw_span; self.real_token(); - let is_joint = raw_span.hi() == self.raw_span.lo() && self.token.is_op(); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } fn real_token(&mut self) { - let (token, raw_span) = self.string_reader.real_token(); - self.token = token; - self.raw_span = raw_span; + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + }, + } + } } } From 3035a05a74db3a1a7f95e139c4d683cc7be51159 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 15:09:06 +0300 Subject: [PATCH 8/9] remove unused mk_sp_and_raw --- src/libsyntax/parse/lexer/mod.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f9b9c85fb5602..a56a4ce097e20 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -98,14 +98,7 @@ impl<'a> StringReader<'a> { } fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { - self.mk_sp_and_raw(lo, hi).0 - } - - fn mk_sp_and_raw(&self, lo: BytePos, hi: BytePos) -> (Span, Span) { - let raw = Span::new(lo, hi, NO_EXPANSION); - let real = self.override_span.unwrap_or(raw); - - (real, raw) + self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } fn unwrap_or_abort(&mut self, res: Result) -> T { @@ -133,8 +126,8 @@ impl<'a> StringReader<'a> { let start_pos = self.pos; (self.next_token_inner()?, start_pos, self.pos) }; - let (real, _raw) = self.mk_sp_and_raw(start_pos, end_pos); - Ok(Token::new(kind, real)) + let span = self.mk_sp(start_pos, end_pos); + Ok(Token::new(kind, span)) } } } From 3e362a4800932186c7351972753ecdf715050983 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 3 Jul 2019 15:31:59 +0300 Subject: [PATCH 9/9] make unwrap_or_abort non-generic again --- src/libsyntax/parse/lexer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index a56a4ce097e20..2ad562485eb7c 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -101,7 +101,7 @@ impl<'a> StringReader<'a> { self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } - fn unwrap_or_abort(&mut self, res: Result) -> T { + fn unwrap_or_abort(&mut self, res: Result) -> Token { match res { Ok(tok) => tok, Err(_) => {