Permalink
Browse files

Regex improvements

This adds findAllMatchIn to Regex to mirror other similar methods.

It also overloads StringLike's "r", adding a version that accepts
group names.

It includes test cases for both methods.

Closes SI-2460.
  • Loading branch information...
dcsobral authored and paulp committed Jan 25, 2012
1 parent 8f42361 commit e3dec9f006ac2631281fb936c4ca206daa8fda5d
@@ -207,9 +207,20 @@ self =>
/** You can follow a string with `.r`, turning it into a `Regex`. E.g.
*
* """A\w*""".r is the regular expression for identifiers starting with `A`.
* `"""A\w*""".r` is the regular expression for identifiers starting with `A`.
*/
def r: Regex = new Regex(toString)
def r: Regex = r()
/** You can follow a string with `.r(g1, ... , gn)`, turning it into a `Regex`,
* with group names g1 through gn.
*
* `"""(\d\d)-(\d\d)-(\d\d\d\d)""".r("month", "day", "year")` matches dates
* and provides its subcomponents through groups named "month", "day" and
* "year".
*
* @param groupNames The names of the groups in the pattern, in the order they appear.
*/
def r(groupNames: String*): Regex = new Regex(toString, groupNames: _*)
def toBoolean: Boolean = parseBoolean(toString)
def toByte: Byte = java.lang.Byte.parseByte(toString)
@@ -180,7 +180,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
None
}
/** Return all matches of this regexp in given character sequence as a [[scala.util.mathcing.Regex.MatchIterator]],
/** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]],
* which is a special [[scala.collection.Iterator]] that returns the
* matched strings, but can also be converted into a normal iterator
* that returns objects of type [[scala.util.matching.Regex.Match]]
@@ -193,6 +193,25 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
*/
def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames)
/** Return all matches of this regexp in given character sequence as a
* [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match].
*
* @param source The text to match against.
* @return A [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]] for all matches.
* @example {{{for (words <- """\w+""".r findAllMatchIn "A simple example.") yield words.start}}}
*/
def findAllMatchIn(source: java.lang.CharSequence): Iterator[Match] = {
val matchIterator = findAllIn(source)
new Iterator[Match] {
def hasNext = matchIterator.hasNext
def next: Match = {
matchIterator.next;
new Match(matchIterator.source, matchIterator.matcher, matchIterator.groupNames).force
}
}
}
/** Return optionally first matching string of this regexp in given character sequence,
* or None if it does not exist.
*
@@ -505,7 +524,7 @@ object Regex {
class MatchIterator(val source: java.lang.CharSequence, val regex: Regex, val groupNames: Seq[String])
extends AbstractIterator[String] with Iterator[String] with MatchData { self =>
protected val matcher = regex.pattern.matcher(source)
protected[Regex] val matcher = regex.pattern.matcher(source)
private var nextSeen = false
/** Is there another match? */
@@ -0,0 +1,32 @@
import org.scalacheck.Prop.forAll
import org.scalacheck.Properties
import org.scalacheck.ConsoleReporter.testStatsEx
import org.scalacheck.{Test => SCTest}
import org.scalacheck.Gen
object Test extends Properties("Regex : Ticket 2460") {
val vowel = Gen.oneOf("a", "z")
val numberOfMatch = forAll(vowel) {
(s: String) => "\\s*([a-z])\\s*".r("data").findAllMatchIn((1 to 20).map(_ => s).mkString).size == 20
}
val numberOfGroup = forAll(vowel) {
(s: String) => "\\s*([a-z])\\s*([a-z])\\s*".r("data").findAllMatchIn((1 to 20).map(_ => s).mkString).next.groupCount == 2
}
val nameOfGroup = forAll(vowel) {
(s: String) => "([a-z])".r("data").findAllMatchIn(s).next.group("data") == s
}
val tests = List(
("numberOfMatch", numberOfMatch),
("numberOfGroup", numberOfGroup),
("nameOfGroup", nameOfGroup)
)
/*tests foreach {
case (name, p) => testStatsEx(name, SCTest.check(p))
}*/
}

0 comments on commit e3dec9f

Please sign in to comment.