
## Useful links

* https://docs.scala-lang.org/tour/regular-expression-patterns.html
* https://www.scala-lang.org/api/current/scala/util/matching/Regex.html
* https://www.regular-expressions.info/
* https://regex101.com/
* https://regexr.com/
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions

In [1]:
import scala.util.matching.Regex

[32mimport [39m[36mscala.util.matching.Regex[39m

In [2]:
 def matchesAny(regex: Regex, s: String): Boolean =
    regex.findFirstMatchIn(s) match {
      case Some(_) => true
      case None    => false
    }

  def matchesAll(regex: Regex, s: String): Boolean =
    regex.pattern.matcher(s).matches

  def test(regex: Regex, s: String): Unit =
    println(s""""$s" match $regex ==> ${matchesAny(regex, s)}""")

  def testAll(regex: Regex, s: String): Unit =
    println(s""""$s" match $regex\t==>\t${matchesAll(regex, s)}""")


defined [32mfunction[39m [36mmatchesAny[39m
defined [32mfunction[39m [36mmatchesAll[39m
defined [32mfunction[39m [36mtest[39m
defined [32mfunction[39m [36mtestAll[39m

In [3]:

  // The raw interpolator
  // normal strings use backslash '\' as an escape character.
  // so the string "\n" is one character, end-of-line (EOL), rather than two characters, "\" and "n"
  // the raw interpolator turns this behavior off, so that raw"\n" means "\" + "n" rather than EOL
  // this is useful with regexes that need to match literal \ or that need to use backslash to change the
  // meaning of a character in the regex. An example of this is "(" could either mean to match a literal
  // parenthesis, or the start of a capturing group.  Also, regex character classes are preceded by a backslash,
  // so it can get confusing as to how many backslashes you need to apply.  Many IDEs will incorrectly give
  // errors on regexes because they violated what a "normal" string should look like.

  // even though the type on this is Regex, we call this the Pattern because that's what the same type of
  // thing is called in Java (java.util.Pattern).

  val digitsPattern: Regex = "[0-9]".r

  val digitsPatternRaw: Regex = "[0-9]".r

  // \d isn't a valid escape sequence, so this won't even compile, giving an "invalid escape character" error
  // val digitsPatternCharacterClass: Regex = "\d".r

  // the first backslash escapes the second backslash, to give the character class for digit (\d)
  val digitsPatternCharacterClass: Regex = "\\d".r

  // a raw string doesn't need escapes
  // IntelliJ has a bug https://youtrack.jetbrains.com/issue/SCL-18824 and shows this as an error
  val digitsPatternCharacterClassRaw: Regex = raw"\d".r

  // a raw string doesn't need escapes
  // IntelliJ has a bug https://youtrack.jetbrains.com/issue/SCL-18824 and shows this as an error
  val digitsPatternCharacterClassRawTriple: Regex = raw"""\d""".r

  val x  = raw".+?\(foobar\).+".r
  val x2 = raw""".+?\(foobar\).+""".r

  test(digitsPattern, "abc")
  test(digitsPattern, "121")

  test(x2, "aaksldjasljdf(foobar)laksjfdlaks")

  // character class with -
  // character classes can express ranges, for example, [0-9a-fA-F] would be a case-insensitive match on a hex number
  // however, this gets confusing if you want to match a string with a literal - in it.
  // http://www.asciitable.com/   46 to 95
  val wrong = raw"[0-9A-Za-z.-_]+".r


  testAll(wrong, "1<")

  val correct = raw"[-._0-9A-Za-z]+".r

  testAll(correct, "123_ABC_abc_<")
  testAll(correct, "123_ABC_abc_.....-------abc-.__a")

  val correct(someofit) = "123_ABC_abc_<"
  println(someofit)

  val notOneOf = raw"[^-._0-9A-Za-z]+".r

  // [-.\w]

  // inside character class - right bracket ], backslash \, caret ^, and hyphen -

  // hyphen between two characters is a range -- so needs to be at very beginning or very end
  // convention -- just put non-alphanumeric characters at the beginning

  // backslash needs to be escaped with another backslash
  // caret at the beginning negates
  //
  // right bracket -- at beginning or escaped -- just escape it
  //
  // escape hyphen?

  // [^-\]

  val matchClosingBracket = "[]a]"

  //back references ([0-9])\1+

//  shorthand character classes

  // \d digits
  // \w [A-Za-z0-9_]
  // \s [ \t\r\n\f]

//  \D \W \S

  // reverse interpolation

  val s"$foo.$bar" = "1.2"

"abc" match [0-9] ==> false
"121" match [0-9] ==> true
"aaksldjasljdf(foobar)laksjfdlaks" match .+?\(foobar\).+ ==> true
"1<" match [0-9A-Za-z.-_]+	==>	true
"123_ABC_abc_<" match [-._0-9A-Za-z]+	==>	false
"123_ABC_abc_.....-------abc-.__a" match [-._0-9A-Za-z]+	==>	true


: 

In [None]:
import scala.util.Try

Try(raw"""[0-9A-Za-z_-.]+""".r).recover{e => println(e)}


In [None]:
testAll(raw"""[0-9A-Za-z.-_]+""".r, "1Aa<>:=;._") // 46 to 95

In [None]:
testAll(raw"""[A-Za-z.-_]+""".r, "1Aa<>:=;._") // numbers are 48-57

In [None]:
testAll(raw"""[a-z.-_]+""".r, "1Aa<>:=;._") // upper case is 41-90

In [None]:
testAll(raw"""[.-z]+""".r, "1Aa<>:=;._") // only adds backtick!

In [None]:
val moreCorrect = raw"[-.\w]+".r

testAll(moreCorrect, "123_ABC_abc_<")
testAll(moreCorrect, "123_ABC_abc_.....-------abc-.__a")

In [None]:
val r1 = raw"""[0-9A-Za-z.-_]+""".r 
val r2 = raw"""[.-z]+""".r
val s = "\u0060\u002d\u002c"

println(s)
println()

testAll(r1, "\u0060\u0060\u0060")
testAll(r2, "\u0060\u0060\u0060")

testAll(r1, s)
testAll(r2, s)
