From 2885f5ccdc59ea2ee4d6c76c4fd5d3cde1024bda Mon Sep 17 00:00:00 2001 From: defuz Date: Tue, 2 Feb 2016 01:23:19 +0200 Subject: [PATCH 1/5] Storing mapping from names to group indices into Regex --- regex_macros/src/lib.rs | 16 ++++++ src/program.rs | 5 ++ src/re.rs | 116 +++++++++++++++++++--------------------- 3 files changed, 75 insertions(+), 62 deletions(-) diff --git a/regex_macros/src/lib.rs b/regex_macros/src/lib.rs index 8e11047553..d96aae9ea2 100644 --- a/regex_macros/src/lib.rs +++ b/regex_macros/src/lib.rs @@ -109,6 +109,19 @@ impl<'a> NfaGen<'a> { None => cx.expr_none(self.sp), } ); + let named_groups = { + let mut named_groups = ::std::collections::BTreeMap::new(); + for (i, name) in self.names.iter().enumerate() { + if let Some(ref name) = *name { + named_groups.insert(name.to_owned(), i); + } + } + self.vec_expr(named_groups.iter(), + &mut |cx, (name, group_idx)| + quote_expr!(cx, ($name, $group_idx)) + ) + }; + let prefix_anchor = self.prog.anchored_begin; let step_insts = self.step_insts(); @@ -123,6 +136,8 @@ impl<'a> NfaGen<'a> { // unused code generated by regex!. See #14185 for an example. #[allow(dead_code)] static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names; +#[allow(dead_code)] +static NAMED_GROUPS: &'static [(&'static str, usize)] = &$named_groups; #[allow(dead_code)] fn exec<'t>( @@ -308,6 +323,7 @@ fn exec<'t>( ::regex::Regex::Native(::regex::internal::ExNative { original: $regex, names: &CAP_NAMES, + groups: &NAMED_GROUPS, prog: exec, }) }) diff --git a/src/program.rs b/src/program.rs index 184ca6f948..d1d0b6c4ba 100644 --- a/src/program.rs +++ b/src/program.rs @@ -8,6 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. + + use syntax; use backtrack::BacktrackCache; @@ -39,6 +41,9 @@ pub struct Program { /// The sequence of capture group names. There is an entry for each capture /// group index and a name exists only if the capture group is named. pub cap_names: Vec>, + /// The map of named capture groups. The keys are group names and + /// the values are group indices. + pub named_groups: ::std::collections::HashMap, /// If the regular expression requires a literal prefix in order to have a /// match, that prefix is stored here as a DFA. pub prefixes: Literals, diff --git a/src/re.rs b/src/re.rs index 43185abe28..fef1496317 100644 --- a/src/re.rs +++ b/src/re.rs @@ -9,8 +9,6 @@ // except according to those terms. use std::borrow::Cow; -use std::collections::HashMap; -use std::collections::hash_map::Iter; use std::fmt; use std::ops::Index; #[cfg(feature = "pattern")] @@ -186,6 +184,8 @@ pub struct ExNative { #[doc(hidden)] pub names: &'static &'static [Option<&'static str>], #[doc(hidden)] + pub groups: &'static &'static [(&'static str, usize)], + #[doc(hidden)] pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool, } @@ -394,10 +394,14 @@ impl Regex { /// /// The `0`th capture group is always unnamed, so it must always be /// accessed with `at(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t str) -> Option> { - let mut caps = self.alloc_captures(); - if exec(self, &mut caps, text, 0) { - Some(Captures::new(self, text, caps)) + pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option> { + let mut locs = self.alloc_captures(); + if exec(self, &mut locs, text, 0) { + Some(Captures { + regex: self, + text: text, + locs: locs, + }) } else { None } @@ -815,37 +819,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> { /// Positions returned from a capture group are always byte indices. /// /// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { +pub struct Captures<'r, 't> { + regex: &'r Regex, text: &'t str, locs: Vec>, - named: Option>, } -impl<'t> Captures<'t> { - fn new( - re: &Regex, - search: &'t str, - locs: Vec>, - ) -> Captures<'t> { - let named = - if re.captures_len() == 0 { - None - } else { - let mut named = HashMap::new(); - for (i, name) in re.capture_names().enumerate() { - if let Some(name) = name { - named.insert(name.to_owned(), i); - } - } - Some(named) - }; - Captures { - text: search, - locs: locs, - named: named, - } - } - +impl<'r, 't> Captures<'r, 't> { /// Returns the start and end positions of the Nth capture group. /// Returns `None` if `i` is not a valid capture group or if the capture /// group did not match anything. @@ -874,37 +854,49 @@ impl<'t> Captures<'t> { /// `name` isn't a valid capture group or didn't match anything, then /// `None` is returned. pub fn name(&self, name: &str) -> Option<&'t str> { - match self.named { - None => None, - Some(ref h) => { - match h.get(name) { - None => None, - Some(i) => self.at(*i), + match *self.regex { + Regex::Native(ExNative { ref groups, .. }) => { + match groups.binary_search_by(|&(n, _)| n.cmp(name)) { + Ok(i) => self.at(groups[i].1), + Err(_) => None } - } + }, + Regex::Dynamic(Program { ref named_groups, .. }) => { + named_groups.get(name).and_then(|i| self.at(*i)) + }, } } /// Creates an iterator of all the capture groups in order of appearance /// in the regular expression. - pub fn iter(&'t self) -> SubCaptures<'t> { + pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> { SubCaptures { idx: 0, caps: self, } } /// Creates an iterator of all the capture group positions in order of /// appearance in the regular expression. Positions are byte indices /// in terms of the original string matched. - pub fn iter_pos(&'t self) -> SubCapturesPos<'t> { + pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> { SubCapturesPos { idx: 0, caps: self, } } /// Creates an iterator of all named groups as an tuple with the group /// name and the value. The iterator returns these values in arbitrary /// order. - pub fn iter_named(&'t self) -> SubCapturesNamed<'t> { + pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> { + let iter = match *self.regex { + Regex::Native(ExNative { ref groups, .. }) => { + Box::new(groups.iter().map(|&v| v)) + as Box + 'r> + }, + Regex::Dynamic(Program { ref named_groups, .. }) => { + Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i))) + as Box + 'r> + }, + }; SubCapturesNamed { caps: self, - inner: self.named.as_ref().map(|n| n.iter()), + inner: iter } } @@ -948,7 +940,7 @@ impl<'t> Captures<'t> { /// /// # Panics /// If there is no group at the given index. -impl<'t> Index for Captures<'t> { +impl<'r, 't> Index for Captures<'r, 't> { type Output = str; @@ -962,7 +954,7 @@ impl<'t> Index for Captures<'t> { /// /// # Panics /// If there is no group named by the given value. -impl<'t> Index<&'t str> for Captures<'t> { +impl<'r, 't> Index<&'t str> for Captures<'r, 't> { type Output = str; @@ -979,12 +971,12 @@ impl<'t> Index<&'t str> for Captures<'t> { /// expression. /// /// `'t` is the lifetime of the matched text. -pub struct SubCaptures<'t> { +pub struct SubCaptures<'c, 'r: 'c, 't: 'c> { idx: usize, - caps: &'t Captures<'t>, + caps: &'c Captures<'r, 't>, } -impl<'t> Iterator for SubCaptures<'t> { +impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> { type Item = Option<&'t str>; fn next(&mut self) -> Option> { @@ -1003,12 +995,12 @@ impl<'t> Iterator for SubCaptures<'t> { /// Positions are byte indices in terms of the original string matched. /// /// `'t` is the lifetime of the matched text. -pub struct SubCapturesPos<'t> { +pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> { idx: usize, - caps: &'t Captures<'t>, + caps: &'c Captures<'r, 't>, } -impl<'t> Iterator for SubCapturesPos<'t> { +impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> { type Item = Option<(usize, usize)>; fn next(&mut self) -> Option> { @@ -1025,17 +1017,17 @@ impl<'t> Iterator for SubCapturesPos<'t> { /// name and the value. /// /// `'t` is the lifetime of the matched text. -pub struct SubCapturesNamed<'t>{ - caps: &'t Captures<'t>, - inner: Option>, +pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> { + caps: &'c Captures<'r, 't>, + inner: Box + 'r>, } -impl<'t> Iterator for SubCapturesNamed<'t> { - type Item = (&'t str, Option<&'t str>); +impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> { + type Item = (&'r str, Option<&'t str>); - fn next(&mut self) -> Option<(&'t str, Option<&'t str>)> { - match self.inner.as_mut().map_or(None, |it| it.next()) { - Some((name, pos)) => Some((name, self.caps.at(*pos))), + fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> { + match self.inner.next() { + Some((name, pos)) => Some((name, self.caps.at(pos))), None => None } } @@ -1056,9 +1048,9 @@ pub struct FindCaptures<'r, 't> { } impl<'r, 't> Iterator for FindCaptures<'r, 't> { - type Item = Captures<'t>; + type Item = Captures<'r, 't>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if self.last_end > self.search.len() { return None } From 4bc2ceff5f0800cbfd285db605b228760e3fd773 Mon Sep 17 00:00:00 2001 From: defuz Date: Thu, 18 Feb 2016 03:03:43 +0200 Subject: [PATCH 2/5] Using Arc for named groups --- src/exec.rs | 9 ++++ src/program.rs | 12 ++++- src/re.rs | 142 +++++++++++++++++++++++++++++-------------------- 3 files changed, 104 insertions(+), 59 deletions(-) diff --git a/src/exec.rs b/src/exec.rs index d68c14ec1c..f048c0572f 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -8,6 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use std::collections::HashMap; +use std::sync::Arc; + use backtrack::{self, Backtrack}; use dfa::{self, Dfa, DfaResult}; use input::{ByteInput, CharInput}; @@ -375,6 +378,12 @@ impl Exec { &self.prog.cap_names } + /// Return a reference to named groups mapping (from group name to + /// group position). + pub fn named_groups(&self) -> &Arc> { + &self.prog.named_groups + } + /// Return a fresh allocation for storing all possible captures in the /// underlying regular expression. pub fn alloc_captures(&self) -> Vec> { diff --git a/src/program.rs b/src/program.rs index d1d0b6c4ba..9b5dcddc17 100644 --- a/src/program.rs +++ b/src/program.rs @@ -8,7 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - +use std::collections::HashMap; +use std::sync::Arc; use syntax; @@ -43,7 +44,7 @@ pub struct Program { pub cap_names: Vec>, /// The map of named capture groups. The keys are group names and /// the values are group indices. - pub named_groups: ::std::collections::HashMap, + pub named_groups: Arc>, /// If the regular expression requires a literal prefix in order to have a /// match, that prefix is stored here as a DFA. pub prefixes: Literals, @@ -120,10 +121,17 @@ impl ProgramBuilder { insts.anchored_begin(), insts.anchored_end(), ); + let mut named_groups = HashMap::new(); + for (i, name) in cap_names.iter().enumerate() { + if let Some(ref name) = *name { + named_groups.insert(name.to_owned(), i); + } + } Ok(Program { original: self.re, insts: insts, cap_names: cap_names, + named_groups: Arc::new(named_groups), prefixes: prefixes, anchored_begin: anchored_begin, anchored_end: anchored_end, diff --git a/src/re.rs b/src/re.rs index fef1496317..41a618c2e5 100644 --- a/src/re.rs +++ b/src/re.rs @@ -14,6 +14,8 @@ use std::ops::Index; #[cfg(feature = "pattern")] use std::str::pattern::{Pattern, Searcher, SearchStep}; use std::str::FromStr; +use std::collections::HashMap; +use std::sync::Arc; use exec::{Exec, ExecBuilder}; use syntax; @@ -394,13 +396,13 @@ impl Regex { /// /// The `0`th capture group is always unnamed, so it must always be /// accessed with `at(0)` or `[0]`. - pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option> { + pub fn captures<'t>(&self, text: &'t str) -> Option> { let mut locs = self.alloc_captures(); if exec(self, &mut locs, text, 0) { Some(Captures { - regex: self, text: text, locs: locs, + named_groups: NamedGroups::from_regex(self) }) } else { None @@ -808,6 +810,47 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> { } } +enum NamedGroups { + Native(&'static [(&'static str, usize)]), + Dynamic(Arc>), +} + +impl NamedGroups { + fn from_regex(regex: &Regex) -> NamedGroups { + match *regex { + Regex::Native(ExNative { ref groups, .. }) => + NamedGroups::Native(groups), + Regex::Dynamic(ref exec) => + NamedGroups::Dynamic(exec.named_groups().clone()) + } + } + + fn pos(&self, name: &str) -> Option { + match *self { + NamedGroups::Native(groups) => { + groups.binary_search_by(|&(n, _)| n.cmp(name)) + .ok().map(|i| groups[i].1) + }, + NamedGroups::Dynamic(ref groups) => { + groups.get(name).map(|i| *i) + }, + } + } + + fn iter<'n>(&'n self) -> Box + 'n> { + match *self { + NamedGroups::Native(groups) => { + Box::new(groups.iter().map(|&v| v)) + as Box + 'n> + }, + NamedGroups::Dynamic(ref groups) => { + Box::new(groups.iter().map(|(s, i)| (&s[..], *i))) + as Box + 'n> + }, + } + } +} + /// Captures represents a group of captured strings for a single match. /// /// The 0th capture always corresponds to the entire match. Each subsequent @@ -819,13 +862,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> { /// Positions returned from a capture group are always byte indices. /// /// `'t` is the lifetime of the matched text. -pub struct Captures<'r, 't> { - regex: &'r Regex, +pub struct Captures<'t> { text: &'t str, locs: Vec>, + named_groups: NamedGroups, } -impl<'r, 't> Captures<'r, 't> { +impl<'t> Captures<'t> { /// Returns the start and end positions of the Nth capture group. /// Returns `None` if `i` is not a valid capture group or if the capture /// group did not match anything. @@ -854,49 +897,29 @@ impl<'r, 't> Captures<'r, 't> { /// `name` isn't a valid capture group or didn't match anything, then /// `None` is returned. pub fn name(&self, name: &str) -> Option<&'t str> { - match *self.regex { - Regex::Native(ExNative { ref groups, .. }) => { - match groups.binary_search_by(|&(n, _)| n.cmp(name)) { - Ok(i) => self.at(groups[i].1), - Err(_) => None - } - }, - Regex::Dynamic(Program { ref named_groups, .. }) => { - named_groups.get(name).and_then(|i| self.at(*i)) - }, - } + self.named_groups.pos(name).and_then(|i| self.at(i)) } /// Creates an iterator of all the capture groups in order of appearance /// in the regular expression. - pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> { + pub fn iter<'c>(&'c self) -> SubCaptures<'c, 't> { SubCaptures { idx: 0, caps: self, } } /// Creates an iterator of all the capture group positions in order of /// appearance in the regular expression. Positions are byte indices /// in terms of the original string matched. - pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> { - SubCapturesPos { idx: 0, caps: self, } + pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c> { + SubCapturesPos { idx: 0, locs: &self.locs } } /// Creates an iterator of all named groups as an tuple with the group /// name and the value. The iterator returns these values in arbitrary /// order. - pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> { - let iter = match *self.regex { - Regex::Native(ExNative { ref groups, .. }) => { - Box::new(groups.iter().map(|&v| v)) - as Box + 'r> - }, - Regex::Dynamic(Program { ref named_groups, .. }) => { - Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i))) - as Box + 'r> - }, - }; + pub fn iter_named<'c: 't>(&'c self) -> SubCapturesNamed<'c, 't> { SubCapturesNamed { caps: self, - inner: iter + names: self.named_groups.iter() } } @@ -940,7 +963,7 @@ impl<'r, 't> Captures<'r, 't> { /// /// # Panics /// If there is no group at the given index. -impl<'r, 't> Index for Captures<'r, 't> { +impl<'t> Index for Captures<'t> { type Output = str; @@ -954,7 +977,7 @@ impl<'r, 't> Index for Captures<'r, 't> { /// /// # Panics /// If there is no group named by the given value. -impl<'r, 't> Index<&'t str> for Captures<'r, 't> { +impl<'t> Index<&'t str> for Captures<'t> { type Output = str; @@ -971,12 +994,12 @@ impl<'r, 't> Index<&'t str> for Captures<'r, 't> { /// expression. /// /// `'t` is the lifetime of the matched text. -pub struct SubCaptures<'c, 'r: 'c, 't: 'c> { +pub struct SubCaptures<'c, 't: 'c> { idx: usize, - caps: &'c Captures<'r, 't>, + caps: &'c Captures<'t>, } -impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> { +impl<'c, 't> Iterator for SubCaptures<'c, 't> { type Item = Option<&'t str>; fn next(&mut self) -> Option> { @@ -995,21 +1018,25 @@ impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> { /// Positions are byte indices in terms of the original string matched. /// /// `'t` is the lifetime of the matched text. -pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> { +pub struct SubCapturesPos<'c> { idx: usize, - caps: &'c Captures<'r, 't>, + locs: &'c [Option] } -impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> { +impl<'c> Iterator for SubCapturesPos<'c> { type Item = Option<(usize, usize)>; fn next(&mut self) -> Option> { - if self.idx < self.caps.len() { - self.idx += 1; - Some(self.caps.pos(self.idx - 1)) - } else { - None + if self.idx >= self.locs.len() { + return None } + let r = match (self.locs[self.idx], self.locs[self.idx + 1]) { + (Some(s), Some(e)) => Some((s, e)), + (None, None) => None, + _ => unreachable!() + }; + self.idx += 2; + Some(r) } } @@ -1017,19 +1044,16 @@ impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> { /// name and the value. /// /// `'t` is the lifetime of the matched text. -pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> { - caps: &'c Captures<'r, 't>, - inner: Box + 'r>, +pub struct SubCapturesNamed<'c, 't: 'c> { + caps: &'c Captures<'t>, + names: Box + 'c>, } -impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> { - type Item = (&'r str, Option<&'t str>); +impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> { + type Item = (&'c str, Option<&'t str>); - fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> { - match self.inner.next() { - Some((name, pos)) => Some((name, self.caps.at(pos))), - None => None - } + fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> { + self.names.next().map(|(name, pos)| (name, self.caps.at(pos))) } } @@ -1048,9 +1072,9 @@ pub struct FindCaptures<'r, 't> { } impl<'r, 't> Iterator for FindCaptures<'r, 't> { - type Item = Captures<'r, 't>; + type Item = Captures<'t>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if self.last_end > self.search.len() { return None } @@ -1073,7 +1097,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> { } self.last_end = e; self.last_match = Some(self.last_end); - Some(Captures::new(self.re, self.search, caps)) + Some(Captures { + text: self.search, + locs: caps, + named_groups: NamedGroups::from_regex(self.re), + }) } } From e628d68e6de2246e8da4714a2507be9a3c21df4a Mon Sep 17 00:00:00 2001 From: defuz Date: Fri, 19 Feb 2016 23:52:28 +0200 Subject: [PATCH 3/5] Getting rid of boxed iterator, fix documentation --- src/re.rs | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/re.rs b/src/re.rs index 41a618c2e5..9ab8fad272 100644 --- a/src/re.rs +++ b/src/re.rs @@ -9,12 +9,12 @@ // except according to those terms. use std::borrow::Cow; +use std::collections::HashMap; use std::fmt; use std::ops::Index; #[cfg(feature = "pattern")] use std::str::pattern::{Pattern, Searcher, SearchStep}; use std::str::FromStr; -use std::collections::HashMap; use std::sync::Arc; use exec::{Exec, ExecBuilder}; @@ -837,16 +837,28 @@ impl NamedGroups { } } - fn iter<'n>(&'n self) -> Box + 'n> { + fn iter<'n>(&'n self) -> NamedGroupsIter<'n> { match *self { - NamedGroups::Native(groups) => { - Box::new(groups.iter().map(|&v| v)) - as Box + 'n> - }, - NamedGroups::Dynamic(ref groups) => { - Box::new(groups.iter().map(|(s, i)| (&s[..], *i))) - as Box + 'n> - }, + NamedGroups::Native(g) => NamedGroupsIter::Native(g.iter()), + NamedGroups::Dynamic(ref g) => NamedGroupsIter::Dynamic(g.iter()), + } + } +} + +enum NamedGroupsIter<'n> { + Native(::std::slice::Iter<'static, (&'static str, usize)>), + Dynamic(::std::collections::hash_map::Iter<'n, String, usize>), +} + +impl<'n> Iterator for NamedGroupsIter<'n> { + type Item = (&'n str, usize); + + fn next(&mut self) -> Option { + match *self { + NamedGroupsIter::Native(ref mut it) => + it.next().map(|&v| v), + NamedGroupsIter::Dynamic(ref mut it) => + it.next().map(|(s, i)| (s.as_ref(), *i)) } } } @@ -994,6 +1006,7 @@ impl<'t> Index<&'t str> for Captures<'t> { /// expression. /// /// `'t` is the lifetime of the matched text. +/// `'c` is the lifetime of the captures. pub struct SubCaptures<'c, 't: 'c> { idx: usize, caps: &'c Captures<'t>, @@ -1017,7 +1030,7 @@ impl<'c, 't> Iterator for SubCaptures<'c, 't> { /// /// Positions are byte indices in terms of the original string matched. /// -/// `'t` is the lifetime of the matched text. +/// `'c` is the lifetime of the captures. pub struct SubCapturesPos<'c> { idx: usize, locs: &'c [Option] @@ -1044,9 +1057,10 @@ impl<'c> Iterator for SubCapturesPos<'c> { /// name and the value. /// /// `'t` is the lifetime of the matched text. +/// `'c` is the lifetime of the captures. pub struct SubCapturesNamed<'c, 't: 'c> { caps: &'c Captures<'t>, - names: Box + 'c>, + names: NamedGroupsIter<'c>, } impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> { From cf1fdb840a45d673a0c585f4efb2ddb1101241d2 Mon Sep 17 00:00:00 2001 From: defuz Date: Sat, 20 Feb 2016 00:37:14 +0200 Subject: [PATCH 4/5] Add Empty variant for NamedGroups to avoid cloning empty Arc --- src/re.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/re.rs b/src/re.rs index 9ab8fad272..3fe4ef560d 100644 --- a/src/re.rs +++ b/src/re.rs @@ -811,6 +811,7 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> { } enum NamedGroups { + Empty, Native(&'static [(&'static str, usize)]), Dynamic(Arc>), } @@ -821,12 +822,17 @@ impl NamedGroups { Regex::Native(ExNative { ref groups, .. }) => NamedGroups::Native(groups), Regex::Dynamic(ref exec) => - NamedGroups::Dynamic(exec.named_groups().clone()) + if exec.named_groups().is_empty() { + NamedGroups::Empty + } else { + NamedGroups::Dynamic(exec.named_groups().clone()) + } } } fn pos(&self, name: &str) -> Option { match *self { + NamedGroups::Empty => None, NamedGroups::Native(groups) => { groups.binary_search_by(|&(n, _)| n.cmp(name)) .ok().map(|i| groups[i].1) @@ -839,6 +845,7 @@ impl NamedGroups { fn iter<'n>(&'n self) -> NamedGroupsIter<'n> { match *self { + NamedGroups::Empty => NamedGroupsIter::Empty, NamedGroups::Native(g) => NamedGroupsIter::Native(g.iter()), NamedGroups::Dynamic(ref g) => NamedGroupsIter::Dynamic(g.iter()), } @@ -846,6 +853,7 @@ impl NamedGroups { } enum NamedGroupsIter<'n> { + Empty, Native(::std::slice::Iter<'static, (&'static str, usize)>), Dynamic(::std::collections::hash_map::Iter<'n, String, usize>), } @@ -855,6 +863,8 @@ impl<'n> Iterator for NamedGroupsIter<'n> { fn next(&mut self) -> Option { match *self { + NamedGroupsIter::Empty => + None, NamedGroupsIter::Native(ref mut it) => it.next().map(|&v| v), NamedGroupsIter::Dynamic(ref mut it) => From 014461342641800d6a94e226e53fdf7998fe4e65 Mon Sep 17 00:00:00 2001 From: defuz Date: Sat, 20 Feb 2016 00:50:49 +0200 Subject: [PATCH 5/5] Revert incompatible changes --- src/re.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/re.rs b/src/re.rs index 3fe4ef560d..9ce1ef4561 100644 --- a/src/re.rs +++ b/src/re.rs @@ -821,12 +821,14 @@ impl NamedGroups { match *regex { Regex::Native(ExNative { ref groups, .. }) => NamedGroups::Native(groups), - Regex::Dynamic(ref exec) => - if exec.named_groups().is_empty() { + Regex::Dynamic(ref exec) => { + let groups = exec.named_groups(); + if groups.is_empty() { NamedGroups::Empty } else { - NamedGroups::Dynamic(exec.named_groups().clone()) + NamedGroups::Dynamic(groups.clone()) } + } } } @@ -924,21 +926,21 @@ impl<'t> Captures<'t> { /// Creates an iterator of all the capture groups in order of appearance /// in the regular expression. - pub fn iter<'c>(&'c self) -> SubCaptures<'c, 't> { + pub fn iter(&'t self) -> SubCaptures<'t> { SubCaptures { idx: 0, caps: self, } } /// Creates an iterator of all the capture group positions in order of /// appearance in the regular expression. Positions are byte indices /// in terms of the original string matched. - pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c> { + pub fn iter_pos(&'t self) -> SubCapturesPos<'t> { SubCapturesPos { idx: 0, locs: &self.locs } } /// Creates an iterator of all named groups as an tuple with the group /// name and the value. The iterator returns these values in arbitrary /// order. - pub fn iter_named<'c: 't>(&'c self) -> SubCapturesNamed<'c, 't> { + pub fn iter_named(&'t self) -> SubCapturesNamed<'t> { SubCapturesNamed { caps: self, names: self.named_groups.iter() @@ -1015,17 +1017,16 @@ impl<'t> Index<&'t str> for Captures<'t> { /// An iterator over capture groups for a particular match of a regular /// expression. /// -/// `'t` is the lifetime of the matched text. /// `'c` is the lifetime of the captures. -pub struct SubCaptures<'c, 't: 'c> { +pub struct SubCaptures<'c> { idx: usize, - caps: &'c Captures<'t>, + caps: &'c Captures<'c>, } -impl<'c, 't> Iterator for SubCaptures<'c, 't> { - type Item = Option<&'t str>; +impl<'c> Iterator for SubCaptures<'c> { + type Item = Option<&'c str>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if self.idx < self.caps.len() { self.idx += 1; Some(self.caps.at(self.idx - 1)) @@ -1066,17 +1067,16 @@ impl<'c> Iterator for SubCapturesPos<'c> { /// An Iterator over named capture groups as a tuple with the group /// name and the value. /// -/// `'t` is the lifetime of the matched text. /// `'c` is the lifetime of the captures. -pub struct SubCapturesNamed<'c, 't: 'c> { - caps: &'c Captures<'t>, +pub struct SubCapturesNamed<'c> { + caps: &'c Captures<'c>, names: NamedGroupsIter<'c>, } -impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> { - type Item = (&'c str, Option<&'t str>); +impl<'c> Iterator for SubCapturesNamed<'c> { + type Item = (&'c str, Option<&'c str>); - fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> { + fn next(&mut self) -> Option<(&'c str, Option<&'c str>)> { self.names.next().map(|(name, pos)| (name, self.caps.at(pos))) } }