Permalink
Browse files

Unanchored regex extractors.

This patch is really by Lanny Ripple <lanny@spotinfluence.com>,
but I reworked it because I didn't want to put any more methods
onto String.  Instead, there is a method on Regex which removes
the anchoring quality.

  """\d\d'"".r.unanchored
  • Loading branch information...
paulp committed May 2, 2012
1 parent 4b8c54c commit 1475df9bedc03417708f20d94b5e3db5c80f3036
Showing with 84 additions and 7 deletions.
  1. +32 −7 src/library/scala/util/matching/Regex.scala
  2. +6 −0 test/files/run/si5045.check
  3. +46 −0 test/files/run/si5045.scala
@@ -145,6 +145,7 @@ import java.util.regex.{ Pattern, Matcher }
*/
@SerialVersionUID(-2094783597747625537L)
class Regex(regex: String, groupNames: String*) extends Serializable {
outer =>
import Regex._
@@ -179,15 +180,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
* @return The matches
*/
def unapplySeq(target: Any): Option[List[String]] = target match {
case s: java.lang.CharSequence =>
val m = pattern.matcher(s)
if (m.matches) Some((1 to m.groupCount).toList map m.group)
case s: CharSequence =>
val m = pattern matcher s
if (runMatcher(m)) Some((1 to m.groupCount).toList map m.group)
else None
case Match(s) =>
unapplySeq(s)
case _ =>
None
case m: Match => unapplySeq(m.matched)
case _ => None
}
protected def runMatcher(m: Matcher) = m.matches()
/** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]],
* which is a special [[scala.collection.Iterator]] that returns the
@@ -373,10 +373,35 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
def split(toSplit: java.lang.CharSequence): Array[String] =
pattern.split(toSplit)
/** Create a new Regex with the same pattern, but no requirement that
* the entire String matches in extractor patterns. For instance, the strings
* shown below lead to successful matches, where they would not otherwise.
*
* {{{
* val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
*
* val dateP1(year, month, day) = "Date 2011-07-15"
*
* val copyright: String = "Date of this document: 2011-07-15" match {
* case dateP1(year, month, day) => "Copyright "+year
* case _ => "No copyright"
* }
* }}}
*
* @return The new unanchored regex
*/
def unanchored: UnanchoredRegex = new Regex(regex, groupNames: _*) with UnanchoredRegex { override def anchored = outer }
def anchored: Regex = this
/** The string defining the regular expression */
override def toString = regex
}
trait UnanchoredRegex extends Regex {
override protected def runMatcher(m: Matcher) = m.find()
override def unanchored = this
}
/** This object defines inner classes that describe
* regex matches and helper objects. The class hierarchy
* is as follows:
@@ -0,0 +1,6 @@
extract an exact match 2011-07-15 2011-07-15
extract from middle of string 2011-07-15 2011-07-15
extract from middle of string (P2) 2011-07-15 2011-07-15
extract from middle of string (P3) 2011-07-15 2011-07-15
copyright example has date Copyright 2011 Copyright 2011
copyright example missing date No copyright No copyright
@@ -0,0 +1,46 @@
object Test extends App {
import scala.util.matching.{ Regex, UnanchoredRegex }
val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
val dateP2 = """(\d\d\d\d)-(\d\d)-(\d\d)""" r ("year", "month", "day") unanchored
val dateP3 = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day") with UnanchoredRegex
val yearStr = "2011"
val dateStr = List(yearStr,"07","15").mkString("-")
def test(msg: String)(strs: Seq[String]): Unit = println("%40s %s".format(msg, strs mkString " "))
test("extract an exact match") {
val dateP1(y,m,d) = dateStr
Seq(List(y,m,d).mkString("-"), dateStr)
}
test("extract from middle of string") {
val dateP1(y,m,d) = "Tested on "+dateStr+"."
Seq(List(y,m,d).mkString("-"), dateStr)
}
test("extract from middle of string (P2)") {
val dateP2(y,m,d) = "Tested on "+dateStr+"."
Seq(List(y,m,d).mkString("-"), dateStr)
}
test("extract from middle of string (P3)") {
val dateP2(y,m,d) = "Tested on "+dateStr+"."
Seq(List(y,m,d).mkString("-"), dateStr)
}
def copyright(in: String): String = in match {
case dateP1(year, month, day) => "Copyright "+year
case _ => "No copyright"
}
test("copyright example has date") {
Seq(copyright("Date of this document: "+dateStr), "Copyright "+yearStr)
}
test("copyright example missing date") {
Seq(copyright("Date of this document: unknown"), "No copyright")
}
}

0 comments on commit 1475df9

Please sign in to comment.