From a06a837d3bffa1404b8faadc3f15ad5e400847d5 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Thu, 29 Dec 2022 15:06:09 +0800 Subject: [PATCH] Fix `pair.line_col` for supports skiped characters, and add test for rev iter. --- pest/src/iterators/flat_pairs.rs | 4 +- pest/src/iterators/pair.rs | 11 ++-- pest/src/iterators/pairs.rs | 90 +++++++++++++++++++++++--------- pest/src/macros.rs | 2 + 4 files changed, 73 insertions(+), 34 deletions(-) diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 5c132e6e..411d88b2 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -107,7 +107,7 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { return None; } - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start, None) }; + let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }; self.next_start(); Some(pair) @@ -122,7 +122,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { self.next_start_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end, None) }; + let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; Some(pair) } diff --git a/pest/src/iterators/pair.rs b/pest/src/iterators/pair.rs index 0dc95027..2c813478 100644 --- a/pest/src/iterators/pair.rs +++ b/pest/src/iterators/pair.rs @@ -20,7 +20,7 @@ use core::str; #[cfg(feature = "pretty-print")] use serde::ser::SerializeStruct; -use super::pairs::{self, Cursor, Pairs}; +use super::pairs::{self, Pairs}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; use crate::span::{self, Span}; @@ -43,7 +43,7 @@ pub struct Pair<'i, R> { input: &'i str, /// Token index into `queue`. start: usize, - cursor: Option, + pub(crate) line_col: Option<(usize, usize)>, } /// # Safety @@ -53,13 +53,12 @@ pub unsafe fn new( queue: Rc>>, input: &str, start: usize, - cursor: Option, ) -> Pair<'_, R> { Pair { queue, input, start, - cursor, + line_col: None, } } @@ -246,8 +245,8 @@ impl<'i, R: RuleType> Pair<'i, R> { /// Returns the `line`, `col` of this pair start. pub fn line_col(&self) -> (usize, usize) { - match &self.cursor { - Some(cursor) => (cursor.line, cursor.col), + match &self.line_col { + Some(line_col) => (line_col.0, line_col.1), None => self.as_span().start_pos().line_col(), } } diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index bbcf10cb..ef88555a 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -29,17 +29,16 @@ use crate::{position, RuleType}; pub struct Cursor { pub line: usize, pub col: usize, + pub end: usize, } impl Default for Cursor { fn default() -> Cursor { - Cursor { line: 1, col: 1 } - } -} - -impl Cursor { - pub(crate) fn get(&self) -> (usize, usize) { - (self.line, self.col) + Cursor { + line: 1, + col: 1, + end: 0, + } } } @@ -48,14 +47,24 @@ pub trait CursorPairs { fn cursor_mut(&mut self) -> &mut Cursor; /// Move the (line, col) with string part - fn move_cursor(&mut self, part: &str) -> (usize, usize) { + fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) { + // Move cursor for some skiped characters (by skip(n)) + let prev_end = self.cursor().end; + if prev_end != start { + self.move_cursor(input, prev_end, start); + } + + let (prev_line, prev_col) = (self.cursor().line, self.cursor().col); + + let part = &input[self.cursor().end..end]; let (l, c) = position::line_col(part, part.len()); // because original_line_col line, col start from 1 let l = l - 1; - let c = c - 1; - - let (prev_line, prev_col) = self.cursor().get(); + let mut c = c - 1; + if c < 1 { + c = 1 + } self.cursor_mut().line += l; // Has new line @@ -64,6 +73,8 @@ pub trait CursorPairs { } else { self.cursor_mut().col += c; } + self.cursor_mut().end = end; + (prev_line, prev_col) } } @@ -226,14 +237,7 @@ impl<'i, R: RuleType> Pairs<'i, R> { #[inline] pub fn peek(&self) -> Option> { if self.start < self.end { - Some(unsafe { - pair::new( - Rc::clone(&self.queue), - self.input, - self.start, - Some(self.cursor.clone()), - ) - }) + Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }) } else { None } @@ -287,8 +291,12 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; fn next(&mut self) -> Option { - let pair = self.peek()?; - self.move_cursor(pair.as_str()); + let mut pair = self.peek()?; + let span = pair.as_span(); + + let (l, c) = self.move_cursor(self.input, span.start(), span.end()); + pair.line_col = Some((l, c)); + self.start = self.pair() + 1; Some(pair) } @@ -302,7 +310,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { self.end = self.pair_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end, None) }; + let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; Some(pair) } @@ -489,15 +497,45 @@ mod tests { #[test] fn test_line_col() { - let mut pairs = AbcParser::parse(Rule::a, "abcde\nabcde").unwrap(); + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); let pair = pairs.next().unwrap(); assert_eq!(pair.as_str(), "abc"); assert_eq!(pair.line_col(), (1, 1)); - assert_eq!(pairs.cursor.get(), (1, 4)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (1, 4, 3) + ); let pair = pairs.next().unwrap(); assert_eq!(pair.as_str(), "e"); - assert_eq!(pair.line_col(), (1, 4)); - assert_eq!(pairs.cursor.get(), (1, 5)); + assert_eq!(pair.line_col(), (2, 1)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (2, 2, 5) + ); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (2, 5, 8) + ); + } + + #[test] + fn test_rev_iter_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (2, 1)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); } } diff --git a/pest/src/macros.rs b/pest/src/macros.rs index 0ca6d9e0..1b83f388 100644 --- a/pest/src/macros.rs +++ b/pest/src/macros.rs @@ -329,6 +329,7 @@ pub mod tests { a, b, c, + d, } pub struct AbcParser; @@ -345,6 +346,7 @@ pub mod tests { .skip(1) }) .and_then(|s| s.skip(1).unwrap().rule(Rule::c, |s| s.match_string("e"))) + .and_then(|s| s.optional(|s| s.rule(Rule::d, |s| s.match_string("fgh")))) }) } }