diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index c22b6fb9286d1..28ba7369d52a3 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -58,6 +58,8 @@ use core::iter::{Iterator, Extend}; use core::option::Option::{self, Some, None}; use core::result::Result; use core::str as core_str; +use core::str::pattern::Pattern; +use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use unicode::str::{UnicodeStr, Utf16Encoder}; use core::convert::AsRef; @@ -69,14 +71,16 @@ use vec::Vec; use slice::SliceConcatExt; pub use core::str::{FromStr, Utf8Error, Str}; -pub use core::str::{Lines, LinesAny, MatchIndices, CharRange}; -pub use core::str::{Split, SplitTerminator, SplitN}; -pub use core::str::{RSplit, RSplitN}; +pub use core::str::{Lines, LinesAny, CharRange}; +pub use core::str::{Split, RSplit}; +pub use core::str::{SplitN, RSplitN}; +pub use core::str::{SplitTerminator, RSplitTerminator}; +pub use core::str::{Matches, RMatches}; +pub use core::str::{MatchIndices, RMatchIndices}; pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, ParseBoolError}; pub use unicode::str::{Words, Graphemes, GraphemeIndices}; -pub use core::str::Pattern; -pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep}; +pub use core::str::pattern; /* Section: Creating a string @@ -429,7 +433,8 @@ impl str { /// Replaces all occurrences of one string with another. /// - /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a second `&str` to + /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a + /// second `&str` to /// replace it with. If the original `&str` isn't found, no change occurs. /// /// # Examples @@ -581,12 +586,24 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// - /// The pattern can be a simple `&str`, or a closure that determines - /// the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a + /// reverse search and forward/reverse search yields the same elements. + /// This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit()` can be used. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); @@ -594,81 +611,116 @@ impl str { /// /// let v: Vec<&str> = "".split('X').collect(); /// assert_eq!(v, [""]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); + /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["abc", "def", "ghi"]); /// - /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); - /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { core_str::StrExt::split(&self[..], pat) } - /// An iterator over substrings of `self`, separated by characters matched - /// by a pattern, returning most `count` items. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. /// - /// The pattern can be a simple `&str`, or a closure that determines - /// the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// The last element returned, if any, will contain the remainder of the - /// string. + /// # Iterator behavior /// - /// # Examples + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. /// - /// Simple `&str` patterns: + /// For iterating from the front, `split()` can be used. /// - /// ``` - /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect(); - /// assert_eq!(v, ["Mary", "had a little lambda"]); + /// # Examples /// - /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect(); - /// assert_eq!(v, ["lion", "XtigerXleopard"]); + /// Simple patterns: /// - /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); - /// assert_eq!(v, ["abcXdef"]); + /// ```rust + /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); + /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); /// - /// let v: Vec<&str> = "".splitn(1, 'X').collect(); + /// let v: Vec<&str> = "".rsplit('X').collect(); /// assert_eq!(v, [""]); - /// ``` /// - /// More complex patterns with a lambda: + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); /// + /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` - /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); - /// assert_eq!(v, ["abc", "def2ghi"]); + /// + /// More complex patterns with closures: + /// + /// ```rust + /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["ghi", "def", "abc"]); + /// + /// let v: Vec<&str> = "lionXtigerXleopard".rsplit(char::is_uppercase).collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { - core_str::StrExt::splitn(&self[..], count, pat) + pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rsplit(&self[..], pat) } /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// - /// Equivalent to `split`, except that the trailing substring is skipped if empty. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns + /// like regular expressions. /// - /// The pattern can be a simple `&str`, or a closure that determines - /// the split. + /// Equivalent to `split`, except that the trailing substring + /// is skipped if empty. + /// + /// This method can be used for string data that is _terminated_, + /// rather than _seperated_ by a pattern. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit_terminator()` can be used. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); /// assert_eq!(v, ["A", "B"]); /// - /// let v: Vec<&str> = "A..B..".split_terminator('.').collect(); + /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); /// assert_eq!(v, ["A", "", "B", ""]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi3".split_terminator(|c: char| c.is_numeric()).collect(); @@ -679,32 +731,98 @@ impl str { core_str::StrExt::split_terminator(&self[..], pat) } - /// An iterator over substrings of `self`, separated by a pattern, - /// starting from the end of the string. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// Equivalent to `split`, except that the trailing substring is + /// skipped if empty. + /// + /// This method can be used for string data that is _terminated_, + /// rather than _seperated_ by a pattern. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a + /// reverse search, and it will be double ended if a forward/reverse + /// search yields the same elements. + /// + /// For iterating from the front, `split_terminator()` can be used. /// /// # Examples /// /// Simple patterns: /// /// ``` - /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); - /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); + /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); + /// assert_eq!(v, ["B", "A"]); /// - /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); - /// assert_eq!(v, ["leopard", "tiger", "lion"]); + /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); + /// assert_eq!(v, ["", "B", "", "A"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` - /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); + /// let v: Vec<&str> = "abc1def2ghi3".rsplit_terminator(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["ghi", "def", "abc"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> where P::Searcher: ReverseSearcher<'a> { - core_str::StrExt::rsplit(&self[..], pat) + core_str::StrExt::rsplit_terminator(&self[..], pat) + } + + /// An iterator over substrings of `self`, separated by a pattern, + /// restricted to returning + /// at most `count` items. + /// + /// The last element returned, if any, will contain the remainder of the + /// string. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will not be double ended, because it is + /// not efficient to support. + /// + /// If the pattern allows a reverse search, `rsplitn()` can be used. + /// + /// # Examples + /// + /// Simple patterns: + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); + /// assert_eq!(v, ["Mary", "had", "a little lambda"]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); + /// assert_eq!(v, ["lion", "", "tigerXleopard"]); + /// + /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); + /// assert_eq!(v, ["abcXdef"]); + /// + /// let v: Vec<&str> = "".splitn(1, 'X').collect(); + /// assert_eq!(v, [""]); + /// ``` + /// + /// More complex patterns with closures: + /// + /// ``` + /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["abc", "def2ghi"]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { + core_str::StrExt::splitn(&self[..], count, pat) } /// An iterator over substrings of `self`, separated by a pattern, @@ -714,6 +832,18 @@ impl str { /// The last element returned, if any, will contain the remainder of the /// string. /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will not be double ended, because it is not + /// efficient to support. + /// + /// `splitn()` can be used for splitting from the front. + /// /// # Examples /// /// Simple patterns: @@ -722,11 +852,14 @@ impl str { /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); /// assert_eq!(v, ["lamb", "little", "Mary had a"]); /// + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "lionX"]); + /// /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); /// assert_eq!(v, ["leopard", "lion::tiger"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi".rsplitn(2, |c: char| c.is_numeric()).collect(); @@ -739,34 +872,166 @@ impl str { core_str::StrExt::rsplitn(&self[..], count, pat) } - /// An iterator over the start and end indices of the disjoint matches of a `&str` within - /// `self`. + /// An iterator over the matches of a pattern within `self`. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows + /// a reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatches()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); + /// + /// let v: Vec<&str> = "1abc2abc3".matches(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["1", "2", "3"]); + /// ``` + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + core_str::StrExt::matches(&self[..], pat) + } + + /// An iterator over the matches of a pattern within `self`, yielded in + /// reverse order. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. + /// + /// For iterating from the front, `matches()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); + /// + /// let v: Vec<&str> = "1abc2abc3".rmatches(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["3", "2", "1"]); + /// ``` + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatches(&self[..], pat) + } + + /// An iterator over the start and end indices of the disjoint matches + /// of a pattern within `self`. /// - /// That is, each returned value `(start, end)` satisfies `self.slice(start, end) == sep`. For - /// matches of `sep` within `self` that overlap, only the indices corresponding to the first + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the first /// match are returned. /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines + /// the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true for, + /// eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatch_indices()` can be used. + /// /// # Examples /// /// ``` /// # #![feature(collections)] /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); - /// assert_eq!(v, [(0,3), (6,9), (12,15)]); + /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); /// /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); - /// assert_eq!(v, [(1,4), (4,7)]); + /// assert_eq!(v, [(1, 4), (4, 7)]); /// /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); /// assert_eq!(v, [(0, 3)]); // only the first `aba` /// ``` #[unstable(feature = "collections", reason = "might have its iterator type changed")] - // NB: Right now MatchIndices yields `(usize, usize)`, - // but it would be more consistent and useful to return `(usize, &str)` + // NB: Right now MatchIndices yields `(usize, usize)`, but it would + // be more consistent with `matches` and `char_indices` to return `(usize, &str)` pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { core_str::StrExt::match_indices(&self[..], pat) } + /// An iterator over the start and end indices of the disjoint matches of + /// a pattern within + /// `self`, yielded in reverse order. + /// + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the last + /// match are returned. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines + /// the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. + /// + /// For iterating from the front, `match_indices()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); + /// + /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(4, 7), (1, 4)]); + /// + /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); + /// assert_eq!(v, [(2, 5)]); // only the last `aba` + /// ``` + #[unstable(feature = "collections", + reason = "might have its iterator type changed")] + // NB: Right now RMatchIndices yields `(usize, usize)`, but it would + // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` + pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatch_indices(&self[..], pat) + } + /// An iterator over the lines of a string, separated by `\n`. /// /// This does not include the empty string after a trailing `\n`. @@ -793,7 +1058,8 @@ impl str { core_str::StrExt::lines(&self[..]) } - /// An iterator over the lines of a string, separated by either `\n` or `\r\n`. + /// An iterator over the lines of a string, separated by either + /// `\n` or `\r\n`. /// /// As with `.lines()`, this does not include an empty trailing line. /// @@ -855,7 +1121,8 @@ impl str { /// /// # Unsafety /// - /// Caller must check both UTF-8 character boundaries and the boundaries of the entire slice as + /// Caller must check both UTF-8 character boundaries and the boundaries + /// of the entire slice as /// well. /// /// # Examples @@ -898,13 +1165,15 @@ impl str { core_str::StrExt::ends_with(&self[..], pat) } - /// Returns a string with all pre- and suffixes that match a pattern repeatedly removed. + /// Returns a string with all pre- and suffixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `char`, or a closure that determines + /// the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); @@ -913,7 +1182,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); @@ -925,13 +1194,15 @@ impl str { core_str::StrExt::trim_matches(&self[..], pat) } - /// Returns a string with all prefixes that match a pattern repeatedly removed. + /// Returns a string with all prefixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); @@ -940,7 +1211,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); @@ -950,13 +1221,15 @@ impl str { core_str::StrExt::trim_left_matches(&self[..], pat) } - /// Returns a string with all suffixes that match a pattern repeatedly removed. + /// Returns a string with all suffixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); @@ -964,7 +1237,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); @@ -976,9 +1249,11 @@ impl str { core_str::StrExt::trim_right_matches(&self[..], pat) } - /// Check that `index`-th byte lies at the start and/or end of a UTF-8 code point sequence. + /// Check that `index`-th byte lies at the start and/or end of a + /// UTF-8 code point sequence. /// - /// The start and end of the string (when `index == self.len()`) are considered to be + /// The start and end of the string (when `index == self.len()`) are + /// considered to be /// boundaries. /// /// # Panics @@ -1021,7 +1296,8 @@ impl str { /// /// # Examples /// - /// This example manually iterates through the characters of a string; this should normally be + /// This example manually iterates through the characters of a string; + /// this should normally be /// done by `.chars()` or `.char_indices()`. /// /// ``` @@ -1072,7 +1348,8 @@ impl str { /// /// # Examples /// - /// This example manually iterates through the characters of a string; this should normally be + /// This example manually iterates through the characters of a string; + /// this should normally be /// done by `.chars().rev()` or `.char_indices()`. /// /// ``` @@ -1135,7 +1412,8 @@ impl str { core_str::StrExt::char_at(&self[..], i) } - /// Given a byte position, return the `char` at that position, counting from the end. + /// Given a byte position, return the `char` at that position, counting + /// from the end. /// /// # Panics /// @@ -1170,31 +1448,36 @@ impl str { core_str::StrExt::as_bytes(&self[..]) } - /// Returns the byte index of the first character of `self` that matches the pattern, if it + /// Returns the byte index of the first character of `self` that matches + /// the pattern, if it /// exists. /// /// Returns `None` if it doesn't exist. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the + /// split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.find('L'), Some(0)); /// assert_eq!(s.find('é'), Some(14)); + /// assert_eq!(s.find("Léopard"), Some(13)); /// /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5)); + /// assert_eq!(s.find(char::is_lowercase), Some(1)); /// ``` /// /// Not finding the pattern: @@ -1210,16 +1493,18 @@ impl str { core_str::StrExt::find(&self[..], pat) } - /// Returns the byte index of the last character of `self` that matches the pattern, if it + /// Returns the byte index of the last character of `self` that + /// matches the pattern, if it /// exists. /// /// Returns `None` if it doesn't exist. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, + /// or a closure that determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let s = "Löwe 老虎 Léopard"; @@ -1228,12 +1513,13 @@ impl str { /// assert_eq!(s.rfind('é'), Some(14)); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12)); + /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); /// ``` /// /// Not finding the pattern: @@ -1253,7 +1539,8 @@ impl str { /// Retrieves the first character from a `&str` and returns it. /// - /// This does not allocate a new string; instead, it returns a slice that points one character + /// This does not allocate a new string; instead, it returns a slice that + /// points one character /// beyond the character that was shifted. /// /// If the slice does not contain any characters, None is returned instead. @@ -1281,7 +1568,8 @@ impl str { core_str::StrExt::slice_shift_char(&self[..]) } - /// Returns the byte offset of an inner slice relative to an enclosing outer slice. + /// Returns the byte offset of an inner slice relative to an enclosing + /// outer slice. /// /// # Panics /// @@ -1306,7 +1594,8 @@ impl str { /// Return an unsafe pointer to the `&str`'s buffer. /// - /// The caller must ensure that the string outlives this pointer, and that it is not + /// The caller must ensure that the string outlives this pointer, and + /// that it is not /// reallocated (e.g. by pushing to the string). /// /// # Examples @@ -1382,7 +1671,8 @@ impl str { /// /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries /// - /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*; + /// If `is_extended` is true, the iterator is over the + /// *extended grapheme clusters*; /// otherwise, the iterator is over the *legacy grapheme clusters*. /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) /// recommends extended grapheme cluster boundaries for general processing. @@ -1407,7 +1697,8 @@ impl str { UnicodeStr::graphemes(&self[..], is_extended) } - /// Returns an iterator over the grapheme clusters of `self` and their byte offsets. See + /// Returns an iterator over the grapheme clusters of `self` and their + /// byte offsets. See /// `graphemes()` for more information. /// /// # Examples @@ -1427,7 +1718,8 @@ impl str { /// An iterator over the non-empty words of `self`. /// - /// A 'word' is a subsequence separated by any sequence of whitespace. Sequences of whitespace + /// A 'word' is a subsequence separated by any sequence of whitespace. + /// Sequences of whitespace /// are collapsed, so empty "words" are not included. /// /// # Examples @@ -1449,11 +1741,15 @@ impl str { /// /// Control characters have zero width. /// - /// `is_cjk` determines behavior for characters in the Ambiguous category: if `is_cjk` is - /// `true`, these are 2 columns wide; otherwise, they are 1. In CJK locales, `is_cjk` should be + /// `is_cjk` determines behavior for characters in the Ambiguous category: + /// if `is_cjk` is + /// `true`, these are 2 columns wide; otherwise, they are 1. + /// In CJK locales, `is_cjk` should be /// `true`, else it should be `false`. - /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) recommends that these - /// characters be treated as 1 column (i.e., `is_cjk = false`) if the locale is unknown. + /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// recommends that these + /// characters be treated as 1 column (i.e., `is_cjk = false`) if the + /// locale is unknown. #[unstable(feature = "unicode", reason = "this functionality may only be provided by libunicode")] pub fn width(&self, is_cjk: bool) -> usize { diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 8da8cad98a705..441d0f2c5df79 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -24,7 +24,7 @@ use core::mem; use core::ops::{self, Deref, Add, Index}; use core::ptr; use core::slice; -use core::str::Pattern; +use core::str::pattern::Pattern; use unicode::str as unicode_str; use unicode::str::Utf16Item; diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 495a961fa360e..bc07c9b65a588 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -1506,6 +1506,403 @@ fn test_str_from_utf8() { assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort)); } +#[test] +fn test_pattern_deref_forward() { + let data = "aabcdaa"; + assert!(data.contains("bcd")); + assert!(data.contains(&"bcd")); + assert!(data.contains(&"bcd".to_string())); +} + +#[test] +fn test_empty_match_indices() { + let data = "aä中!"; + let vec: Vec<_> = data.match_indices("").collect(); + assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); +} + +#[test] +fn test_bool_from_str() { + assert_eq!("true".parse().ok(), Some(true)); + assert_eq!("false".parse().ok(), Some(false)); + assert_eq!("not even a boolean".parse::().ok(), None); +} + +fn check_contains_all_substrings(s: &str) { + assert!(s.contains("")); + for i in 0..s.len() { + for j in i+1..s.len() + 1 { + assert!(s.contains(&s[i..j])); + } + } +} + +#[test] +fn strslice_issue_16589() { + assert!("bananas".contains("nana")); + + // prior to the fix for #16589, x.contains("abcdabcd") returned false + // test all substrings for good measure + check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); +} + +#[test] +fn strslice_issue_16878() { + assert!(!"1234567ah012345678901ah".contains("hah")); + assert!(!"00abc01234567890123456789abc".contains("bcabc")); +} + + +#[test] +fn test_strslice_contains() { + let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; + check_contains_all_substrings(x); +} + +#[test] +fn test_rsplitn_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); + + let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); +} + +#[test] +fn test_split_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split(' ').collect(); + assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); + assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let split: Vec<&str> = data.split('ä').collect(); + assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); + assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); +} + +#[test] +fn test_rev_split_char_iterator_no_trailing() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let mut split: Vec<&str> = data.split('\n').rev().collect(); + split.reverse(); + assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); + + let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); + split.reverse(); + assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); +} + +#[test] +fn test_utf16_code_units() { + use unicode::str::Utf16Encoder; + assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), + [0xE9, 0xD83D, 0xDCA9]) +} + +#[test] +fn starts_with_in_unicode() { + assert!(!"├── Cargo.toml".starts_with("# ")); +} + +#[test] +fn starts_short_long() { + assert!(!"".starts_with("##")); + assert!(!"##".starts_with("####")); + assert!("####".starts_with("##")); + assert!(!"##ä".starts_with("####")); + assert!("####ä".starts_with("##")); + assert!(!"##".starts_with("####ä")); + assert!("##ä##".starts_with("##ä")); + + assert!("".starts_with("")); + assert!("ä".starts_with("")); + assert!("#ä".starts_with("")); + assert!("##ä".starts_with("")); + assert!("ä###".starts_with("")); + assert!("#ä##".starts_with("")); + assert!("##ä#".starts_with("")); +} + +#[test] +fn contains_weird_cases() { + assert!("* \t".contains(' ')); + assert!(!"* \t".contains('?')); + assert!(!"* \t".contains('\u{1F4A9}')); +} + +#[test] +fn trim_ws() { + assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), + "a \t "); + assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), + " \t a"); + assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), + "a"); + assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), + ""); +} + +mod pattern { + use std::str::pattern::Pattern; + use std::str::pattern::{Searcher, ReverseSearcher}; + use std::str::pattern::SearchStep::{self, Match, Reject, Done}; + + macro_rules! make_test { + ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { + mod $name { + use std::str::pattern::SearchStep::{Match, Reject}; + use super::{cmp_search_to_vec}; + #[test] + fn fwd() { + cmp_search_to_vec(false, $p, $h, vec![$($e),*]); + } + #[test] + fn bwd() { + cmp_search_to_vec(true, $p, $h, vec![$($e),*]); + } + } + } + } + + fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, + right: Vec) + where P::Searcher: ReverseSearcher<'a> + { + let mut searcher = pat.into_searcher(haystack); + let mut v = vec![]; + loop { + match if !rev {searcher.next()} else {searcher.next_back()} { + Match(a, b) => v.push(Match(a, b)), + Reject(a, b) => v.push(Reject(a, b)), + Done => break, + } + } + if rev { + v.reverse(); + } + + let mut first_index = 0; + let mut err = None; + + for (i, e) in right.iter().enumerate() { + match *e { + Match(a, b) | Reject(a, b) + if a <= b && a == first_index => { + first_index = b; + } + _ => { + err = Some(i); + break; + } + } + } + + if let Some(err) = err { + panic!("Input skipped range at {}", err); + } + + if first_index != haystack.len() { + panic!("Did not cover whole input"); + } + + assert_eq!(v, right); + } + + make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ + Reject(0, 1), + Match (1, 3), + Reject(3, 4), + Match (4, 6), + Reject(6, 7), + ]); + make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ + Match (0, 0), + Reject(0, 1), + Match (1, 1), + Reject(1, 2), + Match (2, 2), + Reject(2, 3), + Match (3, 3), + Reject(3, 4), + Match (4, 4), + Reject(4, 5), + Match (5, 5), + Reject(5, 6), + Match (6, 6), + Reject(6, 7), + Match (7, 7), + ]); + make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ + Match (0, 0), + Reject(0, 3), + Match (3, 3), + Reject(3, 6), + Match (6, 6), + Reject(6, 9), + Match (9, 9), + ]); + make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ + Match(0, 0), + ]); + make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ + ]); + make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ + Reject(0, 1), + Match (1, 2), + Match (2, 3), + Reject(3, 4), + Match (4, 5), + Match (5, 6), + Reject(6, 7), + ]); + make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ + Reject(0, 1), + Reject(1, 2), + Reject(2, 3), + ]); + +} + +macro_rules! generate_iterator_test { + { + $name:ident { + $( + ($($arg:expr),*) -> [$($t:tt)*]; + )* + } + with $fwd:expr, $bwd:expr; + } => { + #[test] + fn $name() { + $( + { + let res = vec![$($t)*]; + + let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); + assert_eq!(fwd_vec, res); + + let mut bwd_vec: Vec<_> = ($bwd)($($arg),*).collect(); + bwd_vec.reverse(); + assert_eq!(bwd_vec, res); + } + )* + } + }; + { + $name:ident { + $( + ($($arg:expr),*) -> [$($t:tt)*]; + )* + } + with $fwd:expr; + } => { + #[test] + fn $name() { + $( + { + let res = vec![$($t)*]; + + let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); + assert_eq!(fwd_vec, res); + } + )* + } + } +} + +generate_iterator_test! { + double_ended_split { + ("foo.bar.baz", '.') -> ["foo", "bar", "baz"]; + ("foo::bar::baz", "::") -> ["foo", "bar", "baz"]; + } + with str::split, str::rsplit; +} + +generate_iterator_test! { + double_ended_split_terminator { + ("foo;bar;baz;", ';') -> ["foo", "bar", "baz"]; + } + with str::split_terminator, str::rsplit_terminator; +} + +generate_iterator_test! { + double_ended_matches { + ("a1b2c3", char::is_numeric) -> ["1", "2", "3"]; + } + with str::matches, str::rmatches; +} + +generate_iterator_test! { + double_ended_match_indices { + ("a1b2c3", char::is_numeric) -> [(1, 2), (3, 4), (5, 6)]; + } + with str::match_indices, str::rmatch_indices; +} + +generate_iterator_test! { + not_double_ended_splitn { + ("foo::bar::baz", 2, "::") -> ["foo", "bar::baz"]; + } + with str::splitn; +} + +generate_iterator_test! { + not_double_ended_rsplitn { + ("foo::bar::baz", 2, "::") -> ["baz", "foo::bar"]; + } + with str::rsplitn; +} + mod bench { use test::{Bencher, black_box}; @@ -1693,4 +2090,106 @@ malesuada sollicitudin quam eu fermentum."; assert!(haystack.contains(needle)); }) } + + macro_rules! make_test_inner { + ($s:ident, $code:expr, $name:ident, $str:expr) => { + #[bench] + fn $name(bencher: &mut Bencher) { + let mut $s = $str; + black_box(&mut $s); + bencher.iter(|| $code); + } + } + } + + macro_rules! make_test { + ($name:ident, $s:ident, $code:expr) => { + mod $name { + use test::Bencher; + use test::black_box; + + // Short strings: 65 bytes each + make_test_inner!($s, $code, short_ascii, + "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); + make_test_inner!($s, $code, short_mixed, + "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); + make_test_inner!($s, $code, short_pile_of_poo, + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); + make_test_inner!($s, $code, long_lorem_ipsum,"\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ +ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ +eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ +sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ +tempus vel, gravida nec quam. + +In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ +sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ +diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ +lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ +eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ +interdum. Curabitur ut nisi justo. + +Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ +mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ +lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ +est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ +felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ +ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ +feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ +Aliquam sit amet placerat lorem. + +Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ +mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ +Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ +lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ +suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ +cursus accumsan. + +Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ +feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ +vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ +leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ +malesuada sollicitudin quam eu fermentum!"); + } + } + } + + make_test!(chars_count, s, s.chars().count()); + + make_test!(contains_bang_str, s, s.contains("!")); + make_test!(contains_bang_char, s, s.contains('!')); + + make_test!(match_indices_a_str, s, s.match_indices("a").count()); + + make_test!(split_a_str, s, s.split("a").count()); + + make_test!(trim_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_left_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_left_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_right_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_right_matches(|c: char| c.is_ascii()) + }); + + make_test!(find_underscore_char, s, s.find('_')); + make_test!(rfind_underscore_char, s, s.rfind('_')); + make_test!(find_underscore_str, s, s.find("_")); + + make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); + make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); + make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); + + make_test!(split_space_char, s, s.split(' ').count()); + make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); + + make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); + make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); + + make_test!(split_space_str, s, s.split(" ").count()); + make_test!(split_ad_str, s, s.split("ad").count()); } diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 471e3c5ea6736..5c8b6a774cd8b 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -17,6 +17,8 @@ #![doc(primitive = "str")] use self::OldSearcher::{TwoWay, TwoWayLong}; +use self::pattern::Pattern; +use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use char::CharExt; use clone::Clone; @@ -34,100 +36,7 @@ use result::Result::{self, Ok, Err}; use slice::{self, SliceExt}; use usize; -pub use self::pattern::Pattern; -pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep}; - -mod pattern; - -macro_rules! delegate_iter { - (exact $te:ty : $ti:ty) => { - delegate_iter!{$te : $ti} - impl<'a> ExactSizeIterator for $ti { - #[inline] - fn len(&self) -> usize { - self.0.len() - } - } - }; - ($te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a> Iterator for $ti { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a> DoubleEndedIterator for $ti { - #[inline] - fn next_back(&mut self) -> Option<$te> { - self.0.next_back() - } - } - }; - (pattern $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> DoubleEndedIterator for $ti - where P::Searcher: DoubleEndedSearcher<'a> { - #[inline] - fn next_back(&mut self) -> Option<$te> { - self.0.next_back() - } - } - }; - (pattern forward $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti - where P::Searcher: DoubleEndedSearcher<'a> { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - }; - (pattern reverse $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti - where P::Searcher: ReverseSearcher<'a> - { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - }; -} +pub mod pattern; /// A trait to abstract the idea of creating a new instance of a type from a /// string. @@ -443,11 +352,9 @@ impl<'a> DoubleEndedIterator for CharIndices<'a> { #[stable(feature = "rust1", since = "1.0.0")] #[derive(Clone)] pub struct Bytes<'a>(Map, BytesDeref>); -delegate_iter!{exact u8 : Bytes<'a>} -/// A temporary fn new type that ensures that the `Bytes` iterator -/// is cloneable. -#[derive(Copy, Clone)] +/// A nameable, clonable fn type +#[derive(Clone)] struct BytesDeref; impl<'a> Fn<(&'a u8,)> for BytesDeref { @@ -473,58 +380,210 @@ impl<'a> FnOnce<(&'a u8,)> for BytesDeref { } } -/// An iterator over the substrings of a string, separated by `sep`. -struct CharSplits<'a, P: Pattern<'a>> { - /// The slice remaining to be iterated - start: usize, - end: usize, - matcher: P::Searcher, - /// Whether an empty string at the end is allowed - allow_trailing_empty: bool, - finished: bool, +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for Bytes<'a> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } } -/// An iterator over the substrings of a string, separated by `sep`, -/// splitting at most `count` times. -struct CharSplitsN<'a, P: Pattern<'a>> { - iter: CharSplits<'a, P>, - /// The number of items remaining - count: usize, +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Bytes<'a> { + #[inline] + fn next_back(&mut self) -> Option { + self.0.next_back() + } } -/// An iterator over the substrings of a string, separated by a -/// pattern, in reverse order. -struct RCharSplits<'a, P: Pattern<'a>> { - /// The slice remaining to be iterated - start: usize, - end: usize, - matcher: P::Searcher, - /// Whether an empty string at the end of iteration is allowed - allow_final_empty: bool, - finished: bool, +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> ExactSizeIterator for Bytes<'a> { + #[inline] + fn len(&self) -> usize { + self.0.len() + } } -/// An iterator over the substrings of a string, separated by a -/// pattern, splitting at most `count` times, in reverse order. -struct RCharSplitsN<'a, P: Pattern<'a>> { - iter: RCharSplits<'a, P>, - /// The number of splits remaining - count: usize, +/// This macro generates a Clone impl for string pattern API +/// wrapper types of the form X<'a, P> +macro_rules! derive_pattern_clone { + (clone $t:ident with |$s:ident| $e:expr) => { + impl<'a, P: Pattern<'a>> Clone for $t<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + let $s = self; + $e + } + } + } } -/// An iterator over the lines of a string, separated by `\n`. -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Lines<'a> { - inner: CharSplits<'a, char>, +/// This macro generates two public iterator structs +/// wrapping an private internal one that makes use of the `Pattern` API. +/// +/// For all patterns `P: Pattern<'a>` the following items will be +/// generated (generics ommitted): +/// +/// struct $forward_iterator($internal_iterator); +/// struct $reverse_iterator($internal_iterator); +/// +/// impl Iterator for $forward_iterator +/// { /* internal ends up calling Searcher::next_match() */ } +/// +/// impl DoubleEndedIterator for $forward_iterator +/// where P::Searcher: DoubleEndedSearcher +/// { /* internal ends up calling Searcher::next_match_back() */ } +/// +/// impl Iterator for $reverse_iterator +/// where P::Searcher: ReverseSearcher +/// { /* internal ends up calling Searcher::next_match_back() */ } +/// +/// impl DoubleEndedIterator for $reverse_iterator +/// where P::Searcher: DoubleEndedSearcher +/// { /* internal ends up calling Searcher::next_match() */ } +/// +/// The internal one is defined outside the macro, and has almost the same +/// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and +/// `pattern::ReverseSearcher` for both forward and reverse iteration. +/// +/// "Almost", because a `Searcher` and a `ReverseSearcher` for a given +/// `Pattern` might not return the same elements, so actually implementing +/// `DoubleEndedIterator` for it would be incorrect. +/// (See the docs in `str::pattern` for more details) +/// +/// However, the internal struct still represents a single ended iterator from +/// either end, and depending on pattern is also a valid double ended iterator, +/// so the two wrapper structs implement `Iterator` +/// and `DoubleEndedIterator` depending on the concrete pattern type, leading +/// to the complex impls seen above. +macro_rules! generate_pattern_iterators { + { + // Forward iterator + forward: + $(#[$forward_iterator_attribute:meta])* + struct $forward_iterator:ident; + + // Reverse iterator + reverse: + $(#[$reverse_iterator_attribute:meta])* + struct $reverse_iterator:ident; + + // Stability of all generated items + stability: + $(#[$common_stability_attribute:meta])* + + // Internal almost-iterator that is being delegated to + internal: + $internal_iterator:ident yielding ($iterty:ty); + + // Kind of delgation - either single ended or double ended + delegate $($t:tt)* + } => { + $(#[$forward_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option<$iterty> { + self.0.next() + } + } + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + $forward_iterator(self.0.clone()) + } + } + + $(#[$reverse_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option<$iterty> { + self.0.next_back() + } + } + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + $reverse_iterator(self.0.clone()) + } + } + + generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, + $forward_iterator, + $reverse_iterator, $iterty); + }; + { + double ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident, $iterty:ty + } => { + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P> + where P::Searcher: DoubleEndedSearcher<'a> + { + #[inline] + fn next_back(&mut self) -> Option<$iterty> { + self.0.next_back() + } + } + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P> + where P::Searcher: DoubleEndedSearcher<'a> + { + #[inline] + fn next_back(&mut self) -> Option<$iterty> { + self.0.next() + } + } + }; + { + single ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident, $iterty:ty + } => {} } -/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`). -#[stable(feature = "rust1", since = "1.0.0")] -pub struct LinesAny<'a> { - inner: Map, fn(&str) -> &str>, +derive_pattern_clone!{ + clone SplitInternal + with |s| SplitInternal { matcher: s.matcher.clone(), ..*s } +} +struct SplitInternal<'a, P: Pattern<'a>> { + start: usize, + end: usize, + matcher: P::Searcher, + allow_trailing_empty: bool, + finished: bool, } -impl<'a, P: Pattern<'a>> CharSplits<'a, P> { +impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn get_end(&mut self) -> Option<&'a str> { if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) { @@ -537,11 +596,6 @@ impl<'a, P: Pattern<'a>> CharSplits<'a, P> { None } } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> { - type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { @@ -557,13 +611,11 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> { None => self.get_end(), } } -} -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> DoubleEndedIterator for CharSplits<'a, P> -where P::Searcher: DoubleEndedSearcher<'a> { #[inline] - fn next_back(&mut self) -> Option<&'a str> { + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { if self.finished { return None } if !self.allow_trailing_empty { @@ -589,10 +641,45 @@ where P::Searcher: DoubleEndedSearcher<'a> { } } -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> { - type Item = &'a str; +generate_pattern_iterators! { + forward: + /// Return type of `str::split()` + struct Split; + reverse: + /// Return type of `str::rsplit()` + struct RSplit; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitInternal yielding (&'a str); + delegate double ended; +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::split_terminator()` + struct SplitTerminator; + reverse: + /// Return type of `str::rsplit_terminator()` + struct RSplitTerminator; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitInternal yielding (&'a str); + delegate double ended; +} +derive_pattern_clone!{ + clone SplitNInternal + with |s| SplitNInternal { iter: s.iter.clone(), ..*s } +} +struct SplitNInternal<'a, P: Pattern<'a>> { + iter: SplitInternal<'a, P>, + /// The number of splits remaining + count: usize, +} + +impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { #[inline] fn next(&mut self) -> Option<&'a str> { match self.count { @@ -601,58 +688,190 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> { _ => { self.count -= 1; self.iter.next() } } } -} -impl<'a, P: Pattern<'a>> RCharSplits<'a, P> { #[inline] - fn get_remainder(&mut self) -> Option<&'a str> { - if !self.finished && (self.allow_final_empty || self.end - self.start > 0) { - self.finished = true; - unsafe { - let string = self.matcher.haystack().slice_unchecked(self.start, self.end); - Some(string) - } - } else { - None + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + match self.count { + 0 => None, + 1 => { self.count = 0; self.iter.get_end() } + _ => { self.count -= 1; self.iter.next_back() } } } } +generate_pattern_iterators! { + forward: + /// Return type of `str::splitn()` + struct SplitN; + reverse: + /// Return type of `str::rsplitn()` + struct RSplitN; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitNInternal yielding (&'a str); + delegate single ended; +} + +derive_pattern_clone!{ + clone MatchIndicesInternal + with |s| MatchIndicesInternal(s.0.clone()) +} +struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher); + +impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { + #[inline] + fn next(&mut self) -> Option<(usize, usize)> { + self.0.next_match() + } + + #[inline] + fn next_back(&mut self) -> Option<(usize, usize)> + where P::Searcher: ReverseSearcher<'a> + { + self.0.next_match_back() + } +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::match_indices()` + struct MatchIndices; + reverse: + /// Return type of `str::rmatch_indices()` + struct RMatchIndices; + stability: + #[unstable(feature = "core", + reason = "type may be removed or have its iterator impl changed")] + internal: + MatchIndicesInternal yielding ((usize, usize)); + delegate double ended; +} + +derive_pattern_clone!{ + clone MatchesInternal + with |s| MatchesInternal(s.0.clone()) +} +struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher); + +impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_match().map(|(a, b)| unsafe { + // Indices are known to be on utf8 boundaries + self.0.haystack().slice_unchecked(a, b) + }) + } + + #[inline] + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + self.0.next_match_back().map(|(a, b)| unsafe { + // Indices are known to be on utf8 boundaries + self.0.haystack().slice_unchecked(a, b) + }) + } +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::matches()` + struct Matches; + reverse: + /// Return type of `str::rmatches()` + struct RMatches; + stability: + #[unstable(feature = "core", reason = "type got recently added")] + internal: + MatchesInternal yielding (&'a str); + delegate double ended; +} + +/// Return type of `str::lines()` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] +pub struct Lines<'a>(SplitTerminator<'a, char>); + #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P> - where P::Searcher: ReverseSearcher<'a> -{ +impl<'a> Iterator for Lines<'a> { type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { - if self.finished { return None } + self.0.next() + } - let haystack = self.matcher.haystack(); - match self.matcher.next_match_back() { - Some((a, b)) => unsafe { - let elt = haystack.slice_unchecked(b, self.end); - self.end = a; - Some(elt) - }, - None => self.get_remainder(), - } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Lines<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back() + } +} + +/// Return type of `str::lines_any()` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] +pub struct LinesAny<'a>(Map, LinesAnyMap>); + +/// A nameable, clonable fn type +#[derive(Clone)] +struct LinesAnyMap; + +impl<'a> Fn<(&'a str,)> for LinesAnyMap { + #[inline] + extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str { + let l = line.len(); + if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } + else { line } + } +} + +impl<'a> FnMut<(&'a str,)> for LinesAnyMap { + #[inline] + extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str { + Fn::call(&*self, (line,)) + } +} + +impl<'a> FnOnce<(&'a str,)> for LinesAnyMap { + type Output = &'a str; + + #[inline] + extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str { + Fn::call(&self, (line,)) } } #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for RCharSplitsN<'a, P> - where P::Searcher: ReverseSearcher<'a> -{ +impl<'a> Iterator for LinesAny<'a> { type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { - match self.count { - 0 => None, - 1 => { self.count -= 1; self.iter.get_remainder() } - _ => { self.count -= 1; self.iter.next() } - } + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for LinesAny<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back() } } @@ -938,22 +1157,6 @@ struct OldMatchIndices<'a, 'b> { searcher: OldSearcher } -// FIXME: #21637 Prevents a Clone impl -/// An iterator over the start and end indices of the matches of a -/// substring within a larger string -#[unstable(feature = "core", reason = "type may be removed")] -pub struct MatchIndices<'a, P: Pattern<'a>>(P::Searcher); - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for MatchIndices<'a, P> { - type Item = (usize, usize); - - #[inline] - fn next(&mut self) -> Option<(usize, usize)> { - self.0.next_match() - } -} - impl<'a, 'b> OldMatchIndices<'a, 'b> { #[inline] #[allow(dead_code)] @@ -1291,31 +1494,6 @@ impl<'a, S: ?Sized> Str for &'a S where S: Str { fn as_slice(&self) -> &str { Str::as_slice(*self) } } -/// Return type of `str::split` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>); -delegate_iter!{pattern &'a str : Split<'a, P>} - -/// Return type of `str::split_terminator` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct SplitTerminator<'a, P: Pattern<'a>>(CharSplits<'a, P>); -delegate_iter!{pattern &'a str : SplitTerminator<'a, P>} - -/// Return type of `str::splitn` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>); -delegate_iter!{pattern forward &'a str : SplitN<'a, P>} - -/// Return type of `str::rsplit` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct RSplit<'a, P: Pattern<'a>>(RCharSplits<'a, P>); -delegate_iter!{pattern reverse &'a str : RSplit<'a, P>} - -/// Return type of `str::rsplitn` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct RSplitN<'a, P: Pattern<'a>>(RCharSplitsN<'a, P>); -delegate_iter!{pattern reverse &'a str : RSplitN<'a, P>} - /// Methods for string slices #[allow(missing_docs)] pub trait StrExt { @@ -1328,13 +1506,20 @@ pub trait StrExt { fn bytes<'a>(&'a self) -> Bytes<'a>; fn char_indices<'a>(&'a self) -> CharIndices<'a>; fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>; - fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; - fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> where P::Searcher: ReverseSearcher<'a>; + fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> where P::Searcher: ReverseSearcher<'a>; + fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; + fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> + where P::Searcher: ReverseSearcher<'a>; + fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>; + fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; + fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn lines<'a>(&'a self) -> Lines<'a>; fn lines_any<'a>(&'a self) -> LinesAny<'a>; fn char_len(&self) -> usize; @@ -1401,7 +1586,7 @@ impl StrExt for str { #[inline] fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { - Split(CharSplits { + Split(SplitInternal { start: 0, end: self.len(), matcher: pat.into_searcher(self), @@ -1410,64 +1595,74 @@ impl StrExt for str { }) } + #[inline] + fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplit(self.split(pat).0) + } + #[inline] fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { - SplitN(CharSplitsN { + SplitN(SplitNInternal { iter: self.split(pat).0, count: count, }) } + #[inline] + fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplitN(self.splitn(count, pat).0) + } + #[inline] fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { - SplitTerminator(CharSplits { + SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 }) } #[inline] - fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> where P::Searcher: ReverseSearcher<'a> { - RSplit(RCharSplits { - start: 0, - end: self.len(), - matcher: pat.into_searcher(self), - allow_final_empty: true, - finished: false, - }) + RSplitTerminator(self.split_terminator(pat).0) } #[inline] - fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + Matches(MatchesInternal(pat.into_searcher(self))) + } + + #[inline] + fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> where P::Searcher: ReverseSearcher<'a> { - RSplitN(RCharSplitsN { - iter: self.rsplit(pat).0, - count: count, - }) + RMatches(self.matches(pat).0) } #[inline] fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { - MatchIndices(pat.into_searcher(self)) + MatchIndices(MatchIndicesInternal(pat.into_searcher(self))) } + #[inline] + fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RMatchIndices(self.match_indices(pat).0) + } #[inline] fn lines(&self) -> Lines { - Lines { inner: self.split_terminator('\n').0 } + Lines(self.split_terminator('\n')) } + #[inline] fn lines_any(&self) -> LinesAny { - fn f(line: &str) -> &str { - let l = line.len(); - if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } - else { line } - } - - let f: fn(&str) -> &str = f; // coerce to fn pointer - LinesAny { inner: self.lines().map(f) } + LinesAny(self.lines().map(LinesAnyMap)) } #[inline] @@ -1708,35 +1903,3 @@ impl<'a> Default for &'a str { #[stable(feature = "rust1", since = "1.0.0")] fn default() -> &'a str { "" } } - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for Lines<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { self.inner.next() } - #[inline] - fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for Lines<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for LinesAny<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { self.inner.next() } - #[inline] - fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for LinesAny<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } -} diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 922ab2c14a6b7..9f701e1b03181 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -8,6 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! The string Pattern API. +//! +//! For more details, see the traits `Pattern`, `Searcher`, +//! `ReverseSearcher` and `DoubleEndedSearcher`. + use prelude::*; // Pattern @@ -223,7 +228,9 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> { /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {} +///////////////////////////////////////////////////////////////////////////// // Impl for a CharEq wrapper +///////////////////////////////////////////////////////////////////////////// #[doc(hidden)] trait CharEq { @@ -261,6 +268,7 @@ impl<'a> CharEq for &'a [char] { struct CharEqPattern(C); +#[derive(Clone)] struct CharEqSearcher<'a, C: CharEq> { char_eq: C, haystack: &'a str, @@ -330,17 +338,27 @@ unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> { impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {} +///////////////////////////////////////////////////////////////////////////// // Impl for &str +///////////////////////////////////////////////////////////////////////////// // Todo: Optimize the naive implementation here +/// Associated type for `<&str as Pattern<'a>>::Searcher`. #[derive(Clone)] -struct StrSearcher<'a, 'b> { +pub struct StrSearcher<'a, 'b> { haystack: &'a str, needle: &'b str, start: usize, end: usize, - done: bool, + state: State, +} + +#[derive(Clone, PartialEq)] +enum State { Done, NotDone, Reject(usize, usize) } +impl State { + #[inline] fn done(&self) -> bool { *self == State::Done } + #[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) } } /// Non-allocating substring search. @@ -357,7 +375,7 @@ impl<'a, 'b> Pattern<'a> for &'b str { needle: self, start: 0, end: haystack.len(), - done: false, + state: State::NotDone, } } } @@ -374,8 +392,9 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { |m: &mut StrSearcher| { // Forward step for empty needle let current_start = m.start; - if !m.done { + if !m.state.done() { m.start = m.haystack.char_range_at(current_start).next; + m.state = State::Reject(current_start, m.start); } SearchStep::Match(current_start, current_start) }, @@ -404,8 +423,9 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { |m: &mut StrSearcher| { // Backward step for empty needle let current_end = m.end; - if !m.done { + if !m.state.done() { m.end = m.haystack.char_range_at_reverse(current_end).next; + m.state = State::Reject(m.end, current_end); } SearchStep::Match(current_end, current_end) }, @@ -435,137 +455,178 @@ fn str_search_step(mut m: &mut StrSearcher, where F: FnOnce(&mut StrSearcher) -> SearchStep, G: FnOnce(&mut StrSearcher) -> SearchStep { - if m.done { + if m.state.done() { SearchStep::Done } else if m.needle.len() == 0 && m.start <= m.end { // Case for needle == "" - if m.start == m.end { - m.done = true; + if let State::Reject(a, b) = m.state.take() { + SearchStep::Reject(a, b) + } else { + if m.start == m.end { + m.state = State::Done; + } + empty_needle_step(&mut m) } - empty_needle_step(&mut m) } else if m.start + m.needle.len() <= m.end { // Case for needle != "" nonempty_needle_step(&mut m) } else if m.start < m.end { // Remaining slice shorter than needle, reject it - m.done = true; + m.state = State::Done; SearchStep::Reject(m.start, m.end) } else { - m.done = true; + m.state = State::Done; SearchStep::Done } } -macro_rules! char_eq_pattern_impl { - ($wrapper:ty, $wrapper_ident:ident) => { - fn into_searcher(self, haystack: &'a str) -> $wrapper { - $wrapper_ident(CharEqPattern(self).into_searcher(haystack)) +///////////////////////////////////////////////////////////////////////////// + +macro_rules! pattern_methods { + ($t:ty, $pmap:expr, $smap:expr) => { + type Searcher = $t; + + #[inline] + fn into_searcher(self, haystack: &'a str) -> $t { + ($smap)(($pmap)(self).into_searcher(haystack)) } + #[inline] fn is_contained_in(self, haystack: &'a str) -> bool { - CharEqPattern(self).is_contained_in(haystack) + ($pmap)(self).is_contained_in(haystack) } + #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - CharEqPattern(self).is_prefix_of(haystack) + ($pmap)(self).is_prefix_of(haystack) } + #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool - where $wrapper: ReverseSearcher<'a> + where $t: ReverseSearcher<'a> { - CharEqPattern(self).is_suffix_of(haystack) + ($pmap)(self).is_suffix_of(haystack) } } } -// Pattern for char - -impl<'a> Pattern<'a> for char { - type Searcher = CharSearcher<'a>; - char_eq_pattern_impl!(CharSearcher<'a>, CharSearcher); +macro_rules! searcher_methods { + (forward) => { + #[inline] + fn haystack(&self) -> &'a str { + self.0.haystack() + } + #[inline] + fn next(&mut self) -> SearchStep { + self.0.next() + } + #[inline] + fn next_match(&mut self) -> Option<(usize, usize)> { + self.0.next_match() + } + #[inline] + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.0.next_reject() + } + }; + (reverse) => { + #[inline] + fn next_back(&mut self) -> SearchStep { + self.0.next_back() + } + #[inline] + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.0.next_match_back() + } + #[inline] + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.0.next_reject_back() + } + } } -pub struct CharSearcher<'a>(CharEqSearcher<'a, char>); +///////////////////////////////////////////////////////////////////////////// +// Impl for char +///////////////////////////////////////////////////////////////////////////// + +/// Associated type for `>::Searcher`. +#[derive(Clone)] +pub struct CharSearcher<'a>( as Pattern<'a>>::Searcher); unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } + unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} -// Pattern for &[char] +impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} -impl<'a, 'b> Pattern<'a> for &'b [char] { - type Searcher = CharSliceSearcher<'a, 'b>; - char_eq_pattern_impl!(CharSliceSearcher<'a, 'b>, CharSliceSearcher); +/// Searches for chars that are equal to a given char +impl<'a> Pattern<'a> for char { + pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher); } -pub struct CharSliceSearcher<'a, 'b>(CharEqSearcher<'a, &'b [char]>); +///////////////////////////////////////////////////////////////////////////// +// Impl for &[char] +///////////////////////////////////////////////////////////////////////////// + +// Todo: Change / Remove due to ambiguity in meaning. + +/// Associated type for `<&[char] as Pattern<'a>>::Searcher`. +#[derive(Clone)] +pub struct CharSliceSearcher<'a, 'b>( as Pattern<'a>>::Searcher); unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } + unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} -// Pattern for predicates +impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} -impl<'a, F: FnMut(char) -> bool> Pattern<'a> for F { - type Searcher = CharPredSearcher<'a, F>; - char_eq_pattern_impl!(CharPredSearcher<'a, F>, CharPredSearcher); +/// Searches for chars that are equal to any of the chars in the array +impl<'a, 'b> Pattern<'a> for &'b [char] { + pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher); } -pub struct CharPredSearcher<'a, F: FnMut(char) -> bool>(CharEqSearcher<'a, F>); +///////////////////////////////////////////////////////////////////////////// +// Impl for F: FnMut(char) -> bool +///////////////////////////////////////////////////////////////////////////// + +/// Associated type for `>::Searcher`. +#[derive(Clone)] +pub struct CharPredicateSearcher<'a, F>( as Pattern<'a>>::Searcher) + where F: FnMut(char) -> bool; -unsafe impl<'a, F> Searcher<'a> for CharPredSearcher<'a, F> +unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } -unsafe impl<'a, F> ReverseSearcher<'a> for CharPredSearcher<'a, F> + +unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a, F> DoubleEndedSearcher<'a> for CharPredSearcher<'a, F> - where F: FnMut(char) -> bool -{} -// Pattern for &&str +impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> + where F: FnMut(char) -> bool {} +/// Searches for chars that match the given predicate +impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool { + pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher); +} + +///////////////////////////////////////////////////////////////////////////// +// Impl for &&str +///////////////////////////////////////////////////////////////////////////// + +/// Delegates to the `&str` impl. impl<'a, 'b> Pattern<'a> for &'b &'b str { - type Searcher = <&'b str as Pattern<'a>>::Searcher; - #[inline] - fn into_searcher(self, haystack: &'a str) - -> <&'b str as Pattern<'a>>::Searcher { - (*self).into_searcher(haystack) - } - #[inline] - fn is_contained_in(self, haystack: &'a str) -> bool { - (*self).is_contained_in(haystack) - } - #[inline] - fn is_prefix_of(self, haystack: &'a str) -> bool { - (*self).is_prefix_of(haystack) - } - #[inline] - fn is_suffix_of(self, haystack: &'a str) -> bool { - (*self).is_suffix_of(haystack) - } + pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s); } diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index 5fce527d9798d..b7d9ba4463d98 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -8,378 +8,4 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[test] -fn test_pattern_deref_forward() { - let data = "aabcdaa"; - assert!(data.contains("bcd")); - assert!(data.contains(&"bcd")); - assert!(data.contains(&"bcd".to_string())); -} - -#[test] -fn test_empty_match_indices() { - let data = "aä中!"; - let vec: Vec<_> = data.match_indices("").collect(); - assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); -} - -#[test] -fn test_bool_from_str() { - assert_eq!("true".parse().ok(), Some(true)); - assert_eq!("false".parse().ok(), Some(false)); - assert_eq!("not even a boolean".parse::().ok(), None); -} - -fn check_contains_all_substrings(s: &str) { - assert!(s.contains("")); - for i in 0..s.len() { - for j in i+1..s.len() + 1 { - assert!(s.contains(&s[i..j])); - } - } -} - -#[test] -fn strslice_issue_16589() { - assert!("bananas".contains("nana")); - - // prior to the fix for #16589, x.contains("abcdabcd") returned false - // test all substrings for good measure - check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); -} - -#[test] -fn strslice_issue_16878() { - assert!(!"1234567ah012345678901ah".contains("hah")); - assert!(!"00abc01234567890123456789abc".contains("bcabc")); -} - - -#[test] -fn test_strslice_contains() { - let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; - check_contains_all_substrings(x); -} - -#[test] -fn test_rsplitn_char_iterator() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); - - // Unicode - let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); - - let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); -} - -#[test] -fn test_split_char_iterator() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let split: Vec<&str> = data.split(' ').collect(); - assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); - assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - // Unicode - let split: Vec<&str> = data.split('ä').collect(); - assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); - assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); -} - -#[test] -fn test_rev_split_char_iterator_no_trailing() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let mut split: Vec<&str> = data.split('\n').rev().collect(); - split.reverse(); - assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); - - let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); - split.reverse(); - assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); -} - -#[test] -fn test_utf16_code_units() { - use unicode::str::Utf16Encoder; - assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), - [0xE9, 0xD83D, 0xDCA9]) -} - -#[test] -fn starts_with_in_unicode() { - assert!(!"├── Cargo.toml".starts_with("# ")); -} - -#[test] -fn starts_short_long() { - assert!(!"".starts_with("##")); - assert!(!"##".starts_with("####")); - assert!("####".starts_with("##")); - assert!(!"##ä".starts_with("####")); - assert!("####ä".starts_with("##")); - assert!(!"##".starts_with("####ä")); - assert!("##ä##".starts_with("##ä")); - - assert!("".starts_with("")); - assert!("ä".starts_with("")); - assert!("#ä".starts_with("")); - assert!("##ä".starts_with("")); - assert!("ä###".starts_with("")); - assert!("#ä##".starts_with("")); - assert!("##ä#".starts_with("")); -} - -#[test] -fn contains_weird_cases() { - assert!("* \t".contains(' ')); - assert!(!"* \t".contains('?')); - assert!(!"* \t".contains('\u{1F4A9}')); -} - -#[test] -fn trim_ws() { - assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), - "a \t "); - assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), - " \t a"); - assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), - "a"); - assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), - ""); - assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), - ""); - assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), - ""); -} - -mod pattern { - use std::str::Pattern; - use std::str::{Searcher, ReverseSearcher}; - use std::str::SearchStep::{self, Match, Reject, Done}; - - macro_rules! make_test { - ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { - mod $name { - use std::str::SearchStep::{Match, Reject}; - use super::{cmp_search_to_vec}; - #[test] - fn fwd() { - cmp_search_to_vec(false, $p, $h, vec![$($e),*]); - } - #[test] - fn bwd() { - cmp_search_to_vec(true, $p, $h, vec![$($e),*]); - } - } - } - } - - fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, - right: Vec) - where P::Searcher: ReverseSearcher<'a> - { - let mut searcher = pat.into_searcher(haystack); - let mut v = vec![]; - loop { - match if !rev {searcher.next()} else {searcher.next_back()} { - Match(a, b) => v.push(Match(a, b)), - Reject(a, b) => v.push(Reject(a, b)), - Done => break, - } - } - if rev { - v.reverse(); - } - assert_eq!(v, right); - } - - make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ - Reject(0, 1), - Match (1, 3), - Reject(3, 4), - Match (4, 6), - Reject(6, 7), - ]); - make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ - Match(0, 0), - Match(1, 1), - Match(2, 2), - Match(3, 3), - Match(4, 4), - Match(5, 5), - Match(6, 6), - Match(7, 7), - ]); - make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ - Reject(0, 3), - Reject(3, 6), - Reject(6, 9), - ]); - make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ - Match(0, 0), - Match(3, 3), - Match(6, 6), - Match(9, 9), - ]); - make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ - Match(0, 0), - ]); - make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ - ]); - make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ - Reject(0, 1), - Match (1, 2), - Match (2, 3), - Reject(3, 4), - Match (4, 5), - Match (5, 6), - Reject(6, 7), - ]); - make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ - Reject(0, 3), - Reject(3, 6), - Reject(6, 9), - ]); - make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ - Reject(0, 1), - Reject(1, 2), - Reject(2, 3), - ]); - -} - -mod bench { - macro_rules! make_test_inner { - ($s:ident, $code:expr, $name:ident, $str:expr) => { - #[bench] - fn $name(bencher: &mut Bencher) { - let mut $s = $str; - black_box(&mut $s); - bencher.iter(|| $code); - } - } - } - - macro_rules! make_test { - ($name:ident, $s:ident, $code:expr) => { - mod $name { - use test::Bencher; - use test::black_box; - - // Short strings: 65 bytes each - make_test_inner!($s, $code, short_ascii, - "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); - make_test_inner!($s, $code, short_mixed, - "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); - make_test_inner!($s, $code, short_pile_of_poo, - "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); - make_test_inner!($s, $code, long_lorem_ipsum,"\ -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ -ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ -eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ -sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ -tempus vel, gravida nec quam. - -In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ -sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ -diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ -lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ -eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ -interdum. Curabitur ut nisi justo. - -Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ -mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ -lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ -est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ -felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ -ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ -feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ -Aliquam sit amet placerat lorem. - -Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ -mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ -Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ -lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ -suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ -cursus accumsan. - -Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ -feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ -vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ -leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ -malesuada sollicitudin quam eu fermentum!"); - } - } - } - - make_test!(chars_count, s, s.chars().count()); - - make_test!(contains_bang_str, s, s.contains("!")); - make_test!(contains_bang_char, s, s.contains('!')); - - make_test!(match_indices_a_str, s, s.match_indices("a").count()); - - make_test!(split_a_str, s, s.split("a").count()); - - make_test!(trim_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_matches(|c: char| c.is_ascii()) - }); - make_test!(trim_left_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_left_matches(|c: char| c.is_ascii()) - }); - make_test!(trim_right_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_right_matches(|c: char| c.is_ascii()) - }); - - make_test!(find_underscore_char, s, s.find('_')); - make_test!(rfind_underscore_char, s, s.rfind('_')); - make_test!(find_underscore_str, s, s.find("_")); - - make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); - make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); - make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); - - make_test!(split_space_char, s, s.split(' ').count()); - make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); - - make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); - make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); - - make_test!(split_space_str, s, s.split(" ").count()); - make_test!(split_ad_str, s, s.split("ad").count()); -} +// All `str` tests live in libcollectiontest::str