Skip to content

Commit

Permalink
Using regex::Match but not regex::Captures to boost performance
Browse files Browse the repository at this point in the history
* Performance guideline from the author: https://github.com/rust-lang/regex/blob/master/PERFORMANCE.md
* It reduced the "example" runtime from 10s to 8s on Macbook 2017 (2.5 GHz Intel Core i7)
  • Loading branch information
MnO2 committed Jun 4, 2019
1 parent f7fa656 commit 3d01321
Showing 1 changed file with 11 additions and 12 deletions.
23 changes: 11 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use std::io::{self, BufRead, BufReader};
use std::collections::BTreeMap;
use std::cmp::Ordering;

use regex::{Regex, Captures, CaptureMatches};
use regex::{Regex, Match, Matches};
use smallvec::SmallVec;

mod hmm;
Expand All @@ -47,36 +47,36 @@ lazy_static! {
}

struct SplitCaptures<'r, 't> {
finder: CaptureMatches<'r, 't>,
finder: Matches<'r, 't>,
text: &'t str,
last: usize,
caps: Option<Captures<'t>>,
matched: Option<Match<'t>>,
}

impl<'r, 't> SplitCaptures<'r, 't> {
#[inline]
fn new(re: &'r Regex, text: &'t str) -> SplitCaptures<'r, 't> {
SplitCaptures {
finder: re.captures_iter(text),
finder: re.find_iter(text),
text,
last: 0,
caps: None,
matched: None,
}
}
}

#[derive(Debug)]
pub(crate) enum SplitState<'t> {
Unmatched(&'t str),
Captured(Captures<'t>),
Matched(Match<'t>),
}

impl<'t> SplitState<'t> {
#[inline]
fn into_str(self) -> &'t str {
match self {
SplitState::Unmatched(t) => t,
SplitState::Captured(caps) => caps.get(0).unwrap().as_str(),
SplitState::Matched(matched) => matched.as_str(),
}
}
}
Expand All @@ -85,8 +85,8 @@ impl<'r, 't> Iterator for SplitCaptures<'r, 't> {
type Item = SplitState<'t>;

fn next(&mut self) -> Option<SplitState<'t>> {
if let Some(caps) = self.caps.take() {
return Some(SplitState::Captured(caps));
if let Some(matched) = self.matched.take() {
return Some(SplitState::Matched(matched));
}
match self.finder.next() {
None => {
Expand All @@ -98,11 +98,10 @@ impl<'r, 't> Iterator for SplitCaptures<'r, 't> {
Some(SplitState::Unmatched(s))
}
}
Some(caps) => {
let m = caps.get(0).unwrap();
Some(m) => {
let unmatched = &self.text[self.last..m.start()];
self.last = m.end();
self.caps = Some(caps);
self.matched = Some(m);
Some(SplitState::Unmatched(unmatched))
}
}
Expand Down

0 comments on commit 3d01321

Please sign in to comment.