Skip to content

Commit

Permalink
[SPARK-8010] [SQL] Promote types to StringType as implicit conversion…
Browse files Browse the repository at this point in the history
… in non-binary expression of HiveTypeCoercion

1. Given a query
`select coalesce(null, 1, '1') from dual` will cause exception:
java.lang.RuntimeException: Could not determine return type of Coalesce for IntegerType,StringType
2. Given a query:
`select case when true then 1 else '1' end from dual` will cause exception:
java.lang.RuntimeException: Types in CASE WHEN must be the same or coercible to a common type: StringType != IntegerType
I checked the code, the main cause is the HiveTypeCoercion doesn't do implicit convert when there is a IntegerType and StringType.

Numeric types can be promoted to string type

Hive will always do this implicit conversion.

Author: OopsOutOfMemory <victorshengli@126.com>

Closes apache#6551 from OopsOutOfMemory/pnts and squashes the following commits:

7a209d7 [OopsOutOfMemory] rebase master
6018613 [OopsOutOfMemory] convert function to method
4cd5618 [OopsOutOfMemory] limit the data type to primitive type
df365d2 [OopsOutOfMemory] refine
95cbd58 [OopsOutOfMemory] fix style
403809c [OopsOutOfMemory] promote non-string to string when can not found tighestCommonTypeOfTwo
  • Loading branch information
OopsOutOfMemory authored and nemccarthy committed Jun 19, 2015
1 parent 21bee41 commit 53a9495
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ object HiveTypeCoercion {
})
}

/**
* Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use
* [[findTightestCommonTypeToString]] to find the TightestCommonType.
*/
private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case None => None
case Some(d) =>
findTightestCommonTypeOfTwo(d, c).orElse(findTightestCommonTypeToString(d, c))
})
}


/**
* Find the tightest common type of a set of types by continuously applying
* `findTightestCommonTypeOfTwo` on these types.
Expand Down Expand Up @@ -599,7 +612,7 @@ trait HiveTypeCoercion {
// compatible with every child column.
case Coalesce(es) if es.map(_.dataType).distinct.size > 1 =>
val types = es.map(_.dataType)
findTightestCommonType(types) match {
findTightestCommonTypeAndPromoteToString(types) match {
case Some(finalDataType) => Coalesce(es.map(Cast(_, finalDataType)))
case None =>
sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}")
Expand Down Expand Up @@ -634,7 +647,7 @@ trait HiveTypeCoercion {
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case c: CaseWhenLike if c.childrenResolved && !c.valueTypesEqual =>
logDebug(s"Input values for null casting ${c.valueTypes.mkString(",")}")
val maybeCommonType = findTightestCommonType(c.valueTypes)
val maybeCommonType = findTightestCommonTypeAndPromoteToString(c.valueTypes)
maybeCommonType.map { commonType =>
val castedBranches = c.branches.grouped(2).map {
case Seq(when, value) if value.dataType != commonType =>
Expand All @@ -650,7 +663,8 @@ trait HiveTypeCoercion {
}.getOrElse(c)

case c: CaseKeyWhen if c.childrenResolved && !c.resolved =>
val maybeCommonType = findTightestCommonType((c.key +: c.whenList).map(_.dataType))
val maybeCommonType =
findTightestCommonTypeAndPromoteToString((c.key +: c.whenList).map(_.dataType))
maybeCommonType.map { commonType =>
val castedBranches = c.branches.grouped(2).map {
case Seq(when, then) if when.dataType != commonType =>
Expand Down
10 changes: 10 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils {
Row("one", 6) :: Row("three", 3) :: Nil)
}

test("SPARK-8010: promote numeric to string") {
val df = Seq((1, 1)).toDF("key", "value")
df.registerTempTable("src")
val queryCaseWhen = sql("select case when true then 1.0 else '1' end from src ")
val queryCoalesce = sql("select coalesce(null, 1, '1') from src ")

checkAnswer(queryCaseWhen, Row("1.0") :: Nil)
checkAnswer(queryCoalesce, Row("1") :: Nil)
}

test("SPARK-6743: no columns from cache") {
Seq(
(83, 0, 38),
Expand Down

0 comments on commit 53a9495

Please sign in to comment.