Skip to content

Commit

Permalink
HBASE-27488 [hbase-connectors] Duplicate result when searching HBase …
Browse files Browse the repository at this point in the history
…by Spark (apache#106)

Signed-off-by: Reid Chan <reidchan@apache.org>
  • Loading branch information
ILuffZhe authored and subrat-mishra committed Aug 8, 2023
1 parent b64b638 commit aba779d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,20 @@ class HBaseTableScanRDD(relation: HBaseRelation,
hbaseContext: HBaseContext): Iterator[Result] = {
g.grouped(relation.bulkGetSize).flatMap{ x =>
val gets = new ArrayList[Get](x.size)
val rowkeySet = new mutable.HashSet[String]()
x.foreach{ y =>
val g = new Get(y)
handleTimeSemantics(g)
columns.foreach { d =>
if (!d.isRowKey) {
g.addColumn(d.cfBytes, d.colBytes)
if (!rowkeySet.contains(y.mkString("Array(", ", ", ")"))) {
val g = new Get(y)
handleTimeSemantics(g)
columns.foreach { d =>
if (!d.isRowKey) {
g.addColumn(d.cfBytes, d.colBytes)
}
}
filter.foreach(g.setFilter(_))
gets.add(g)
rowkeySet.add(y.mkString("Array(", ", ", ")"))
}
filter.foreach(g.setFilter(_))
gets.add(g)
}
hbaseContext.applyCreds()
val tmp = tbr.get(gets)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,22 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging {
assert(executionRules.rowKeyFilter.ranges.size == 0)
}

/**
* A example of query three fields and also only using rowkey points for the filter,
* some rowkey points are duplicate.
*/
test("Test rowKey point only rowKey query, which contains duplicate rowkey") {
val results = sqlContext.sql("SELECT KEY_FIELD, B_FIELD, A_FIELD FROM hbaseTable1 " +
"WHERE " +
"(KEY_FIELD = 'get1' or KEY_FIELD = 'get2' or KEY_FIELD = 'get1')").take(10)
val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll()
assert(results.length == 2)
assert(executionRules.dynamicLogicExpression.toExpressionString.
equals("( KEY_FIELD == 0 OR KEY_FIELD == 1 )"))
assert(executionRules.rowKeyFilter.points.size == 2)
assert(executionRules.rowKeyFilter.ranges.size == 0)
}

/**
* A example of query three fields and also only using cell points for the filter
*/
Expand Down

0 comments on commit aba779d

Please sign in to comment.