Skip to content

Commit

Permalink
refine unittest
Browse files Browse the repository at this point in the history
  • Loading branch information
zhichao-li committed Jul 22, 2015
1 parent d92951b commit 12e108f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ case class Substring_index(strExpr: Expression, delimExpr: Expression, countExpr
} else {
val idx = lastOrdinalIndexOf(strUtf8, delimUtf8, -count)
if (idx != -1) {
strUtf8.substring(idx + 1, strUtf8.numChars())
strUtf8.substring(idx + delimUtf8.numChars(), strUtf8.numChars())
} else {
strUtf8
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,33 +157,60 @@ class StringFunctionsSuite extends QueryTest {
}

test("string substring_index function") {
val df = Seq(("ac,ab,ad,ab,cc", "aa", "zz")).toDF("a", "b", "c")
val df = Seq(("www.apache.org", ".", "zz")).toDF("a", "b", "c")
checkAnswer(
df.select(substring_index($"a", ",", 2)),
Row("ac,ab"))
df.select(substring_index($"a", ".", 3)),
Row("www.apache.org"))
checkAnswer(
df.select(substring_index($"a", "ab", 2)),
Row("ac,ab,ad,"))
df.select(substring_index($"a", ".", 2)),
Row("www.apache"))
checkAnswer(
df.select(substring_index(lit(""), "ab", 2)),
df.select(substring_index($"a", ".", 1)),
Row("www"))
checkAnswer(
df.select(substring_index($"a", ".", 0)),
Row(""))
checkAnswer(
df.select(substring_index(lit("www.apache.org"), ".", -1)),
Row("org"))
checkAnswer(
df.select(substring_index(lit("www.apache.org"), ".", -2)),
Row("apache.org"))
checkAnswer(
df.select(substring_index(lit("www.apache.org"), ".", -3)),
Row("www.apache.org"))
// str is empty string
checkAnswer(
df.select(substring_index(lit(""), ".", 1)),
Row(""))
// empty string delim
checkAnswer(
df.select(substring_index(lit("www.apache.org"), "", 1)),
Row(""))
// delim does not exist in str
checkAnswer(
df.select(substring_index(lit(null), "ab", 2)),
df.select(substring_index(lit("www.apache.org"), "#", 1)),
Row("www.apache.org"))
// delim is 2 chars
checkAnswer(
df.select(substring_index(lit("www||apache||org"), "||", 2)),
Row("www||apache"))
checkAnswer(
df.select(substring_index(lit("www||apache||org"), "||", -2)),
Row("apache||org"))
// null
checkAnswer(
df.select(substring_index(lit(null), "||", 2)),
Row(null))
checkAnswer(
df.select(substring_index(lit("www.apache.org"), null, 2)),
Row(null))
// non ascii chars
// scalastyle:off
checkAnswer(
df.select(substring_index(lit("大千世界大千世界"), "", 2)),
df.selectExpr("""substring_index("大千世界大千世界", "千", 2)"""),
Row("大千世界大"))
// scalastyle:on
checkAnswer(
df.selectExpr("""substring_index(a, ",", 2)"""),
Row("ac,ab"))
checkAnswer(
df.selectExpr("""substring_index(a, ",", -2)"""),
Row("ab,cc"))
checkAnswer(
df.selectExpr("""substring_index(a, ",", 10)"""),
Row("ac,ab,ad,ab,cc"))
}

test("string locate function") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ private int firstOfCurrentCodePoint(int bytePos) {
throw new RuntimeException("Invalid utf8 string");
}

private int endByte(int startCodePoint) {
private int indexEnd(int startCodePoint) {
int i = numBytes -1; // position in byte
int c = numChars() - 1; // position in character
while (i >=0 && c > startCodePoint) {
Expand All @@ -398,7 +398,7 @@ public int lastIndexOf(UTF8String v, int startCodePoint) {
if (numBytes == 0) {
return -1;
}
int fromIndexEnd = endByte(startCodePoint);
int fromIndexEnd = indexEnd(startCodePoint);
int count = startCodePoint;
int vNumChars = v.numChars();
do {
Expand Down

0 comments on commit 12e108f

Please sign in to comment.