Skip to content

Commit

Permalink
Adding tests for the new string implicits
Browse files Browse the repository at this point in the history
Also fixing charBigrams implicit method
  • Loading branch information
eponvert committed Dec 8, 2015
1 parent a54844f commit a5ffcc1
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/com/peoplepattern/text/Implicits.scala
Expand Up @@ -51,7 +51,7 @@ object Implicits {
def charNgrams(min: Int, max: Int): Map[String, Int] = StringUtil.charNgrams(str, min, max)

/** Extract counts of the char bigrams in the string */
def charBigrams(s: String): Map[String, Int] = charNgrams(2, 2)
def charBigrams: Map[String, Int] = charNgrams(2, 2)

/** The term as a URL, if it can be parsed as such */
lazy val asUrl: Option[URL] = try {
Expand Down
5 changes: 3 additions & 2 deletions src/test/scala/com/peoplepattern/text/StringUtilSpec.scala
Expand Up @@ -52,12 +52,12 @@ class StringUtilSpec extends FlatSpec {
val expected = Map(
" \t" -> 1,
"\t " -> 1,
"\nf" -> 1,
".\n" -> 1,
" K" -> 1,
" b" -> 1,
" f" -> 1,
" i" -> 2,
" o" -> 1,
"\to" -> 1,
" s" -> 1,
"Kn" -> 1,
"bo" -> 1,
Expand All @@ -76,6 +76,7 @@ class StringUtilSpec extends FlatSpec {
"x " -> 1,
"x\t" -> 1,
"x." -> 1)
assert(StringUtil.charNgrams(testStr, 2, 2) == expected)
}

it should "extract bigrams and trigrams with (2, 3)" in {
Expand Down
Expand Up @@ -80,4 +80,179 @@ class StringWithAnalysisImplicitSpec extends FunSpec with Matchers {
assert(tweet.lang == Some("en"))
}
}

describe("str.termsPlus") {
it("should get terms, @-mentions and hashtags from a tweet") {
val tweet = "Gronk makes history: 1st player to have multiple games of 3 or more receiving TDs @RobGronkowski #crazyfootballmomma @NFL 🔥🏈🔥🏈 #ballout"

val expected = Set(
"gronk",
"makes",
"history",
"player",
"multiple",
"games",
"receiving",
"tds",
"@robgronkowski",
"#crazyfootballmomma",
"@nfl",
"#ballout"
)

assert(tweet.termsPlus === expected)
}
}

describe("str.isBlank") {
it("should identify \"\" is blank") {
assert("".isBlank)
}

it("should identify all white-space strings are blank") {
assert(" \n\t \t".isBlank)
}

it("should not identifier a non-empty string") {
assert(!"hello".isBlank)
}

it("should identify null is blank") {
assert(null.asInstanceOf[String].isBlank) // Scala is amazing
}
}

describe("str.nonBlank") {
it("should not identify \"\" as non-blank") {
assert(!"".nonBlank)
}

it("should not identify all white-space strings are non-blank") {
assert(!" \n \t \t\t".nonBlank)
}

it("should identify non-empty string as non-blank") {
assert("hello".nonBlank)
}

it("should not identify null as non-blank") {
assert(!null.asInstanceOf[String].nonBlank)
}
}

describe("StringUtil.charNgrams") {
it("should extract bigrams with (2, 2)") {
val testStr = "Knox on fox in socks in box."
val expected = Map(
" b" -> 1,
" f" -> 1,
" i" -> 2,
" o" -> 1,
" s" -> 1,
"Kn" -> 1,
"bo" -> 1,
"ck" -> 1,
"fo" -> 1,
"in" -> 2,
"ks" -> 1,
"n " -> 3,
"no" -> 1,
"oc" -> 1,
"on" -> 1,
"ox" -> 3,
"s " -> 1,
"so" -> 1,
"x " -> 2,
"x." -> 1)
assert(testStr.charNgrams(2, 2) == expected)
}

it("should not throw out \\t and \\n etc") {
val testStr = " Knox\ton\nfox \t in socks in box.\n"
val expected = Map(
" \t" -> 1,
"\t " -> 1,
"\nf" -> 1,
".\n" -> 1,
" K" -> 1,
" b" -> 1,
" i" -> 2,
"\to" -> 1,
" s" -> 1,
"Kn" -> 1,
"bo" -> 1,
"ck" -> 1,
"fo" -> 1,
"in" -> 2,
"ks" -> 1,
"n\n" -> 1,
"n " -> 2,
"no" -> 1,
"oc" -> 1,
"on" -> 1,
"ox" -> 3,
"s " -> 1,
"so" -> 1,
"x " -> 1,
"x\t" -> 1,
"x." -> 1)
assert(testStr.charNgrams(2, 2) == expected)
}

it("should extract bigrams and trigrams with (2, 3)") {
val testStr = "Knox on fox in socks in box."
val expected = Map(
" b" -> 1,
" bo" -> 1,
" f" -> 1,
" fo" -> 1,
" i" -> 2,
" in" -> 2,
" o" -> 1,
" on" -> 1,
" s" -> 1,
" so" -> 1,
"Kn" -> 1,
"Kno" -> 1,
"bo" -> 1,
"box" -> 1,
"ck" -> 1,
"cks" -> 1,
"fo" -> 1,
"fox" -> 1,
"in" -> 2,
"in " -> 2,
"ks" -> 1,
"ks " -> 1,
"n " -> 3,
"n b" -> 1,
"n f" -> 1,
"n s" -> 1,
"no" -> 1,
"nox" -> 1,
"oc" -> 1,
"ock" -> 1,
"on" -> 1,
"on " -> 1,
"ox" -> 3,
"ox " -> 2,
"ox." -> 1,
"s " -> 1,
"s i" -> 1,
"so" -> 1,
"soc" -> 1,
"x " -> 2,
"x i" -> 1,
"x o" -> 1,
"x." -> 1)
assert(testStr.charNgrams(2, 3) == expected)
}
}

describe("str.charBigrams") {
it("should produce the same output as charNgrams(2, 2)") {
val testStr = "Knox on fox in socks in box."
assert(testStr.charBigrams == testStr.charNgrams(2, 2))
}
}
}

0 comments on commit a5ffcc1

Please sign in to comment.