Skip to content

Commit

Permalink
[SPARK-8214] [SQL] Add function hex
Browse files Browse the repository at this point in the history
cc chenghao-intel  adrian-wang

Author: zhichao.li <zhichao.li@intel.com>

Closes apache#6976 from zhichao-li/hex and squashes the following commits:

e218d1b [zhichao.li] turn off scalastyle for non-ascii
de3f5ea [zhichao.li] non-ascii char
cf9c936 [zhichao.li] give separated buffer for each hex method
967ec90 [zhichao.li] Make 'value' as a feild of Hex
3b2fa13 [zhichao.li] tiny fix
a647641 [zhichao.li] remove duplicate null check
7cab020 [zhichao.li] tiny refactoring
35ecfe5 [zhichao.li] add function hex
  • Loading branch information
zhichao-li authored and Davies Liu committed Jun 29, 2015
1 parent 94e040d commit 637b4ee
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ object FunctionRegistry {
expression[Expm1]("expm1"),
expression[Floor]("floor"),
expression[Hypot]("hypot"),
expression[Hex]("hex"),
expression[Logarithm]("log"),
expression[Log]("ln"),
expression[Log10]("log10"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
package org.apache.spark.sql.catalyst.expressions

import java.lang.{Long => JLong}
import java.util.Arrays

import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

/**
Expand Down Expand Up @@ -273,9 +275,6 @@ case class Atan2(left: Expression, right: Expression)
}
}

case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")

case class Pow(left: Expression, right: Expression)
extends BinaryMathExpression(math.pow, "POWER") {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
Expand All @@ -287,6 +286,85 @@ case class Pow(left: Expression, right: Expression)
}
}

/**
* If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
* Otherwise if the number is a STRING,
* it converts each character into its hexadecimal representation and returns the resulting STRING.
* Negative numbers would be treated as two's complement.
*/
case class Hex(child: Expression)
extends UnaryExpression with Serializable {

override def dataType: DataType = StringType

override def checkInputDataTypes(): TypeCheckResult = {
if (child.dataType.isInstanceOf[StringType]
|| child.dataType.isInstanceOf[IntegerType]
|| child.dataType.isInstanceOf[LongType]
|| child.dataType.isInstanceOf[BinaryType]
|| child.dataType == NullType) {
TypeCheckResult.TypeCheckSuccess
} else {
TypeCheckResult.TypeCheckFailure(s"hex doesn't accepts ${child.dataType} type")
}
}

override def eval(input: InternalRow): Any = {
val num = child.eval(input)
if (num == null) {
null
} else {
child.dataType match {
case LongType => hex(num.asInstanceOf[Long])
case IntegerType => hex(num.asInstanceOf[Integer].toLong)
case BinaryType => hex(num.asInstanceOf[Array[Byte]])
case StringType => hex(num.asInstanceOf[UTF8String])
}
}
}

/**
* Converts every character in s to two hex digits.
*/
private def hex(str: UTF8String): UTF8String = {
hex(str.getBytes)
}

private def hex(bytes: Array[Byte]): UTF8String = {
doHex(bytes, bytes.length)
}

private def doHex(bytes: Array[Byte], length: Int): UTF8String = {
val value = new Array[Byte](length * 2)
var i = 0
while(i < length) {
value(i * 2) = Character.toUpperCase(Character.forDigit(
(bytes(i) & 0xF0) >>> 4, 16)).toByte
value(i * 2 + 1) = Character.toUpperCase(Character.forDigit(
bytes(i) & 0x0F, 16)).toByte
i += 1
}
UTF8String.fromBytes(value)
}

private def hex(num: Long): UTF8String = {
// Extract the hex digits of num into value[] from right to left
val value = new Array[Byte](16)
var numBuf = num
var len = 0
do {
len += 1
value(value.length - len) = Character.toUpperCase(Character
.forDigit((numBuf & 0xF).toInt, 16)).toByte
numBuf >>>= 4
} while (numBuf != 0)
UTF8String.fromBytes(Arrays.copyOfRange(value, value.length - len, value.length))
}
}

case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")

case class Logarithm(left: Expression, right: Expression)
extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.types.{DataType, DoubleType, LongType}
Expand Down Expand Up @@ -226,6 +225,19 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
testBinary(Pow, math.pow, Seq((-1.0, 0.9), (-2.2, 1.7), (-2.2, -1.7)), expectNull = true)
}

test("hex") {
checkEvaluation(Hex(Literal(28)), "1C")
checkEvaluation(Hex(Literal(-28)), "FFFFFFFFFFFFFFE4")
checkEvaluation(Hex(Literal(100800200404L)), "177828FED4")
checkEvaluation(Hex(Literal(-100800200404L)), "FFFFFFE887D7012C")
checkEvaluation(Hex(Literal("helloHex")), "68656C6C6F486578")
checkEvaluation(Hex(Literal("helloHex".getBytes())), "68656C6C6F486578")
// scalastyle:off
// Turn off scala style for non-ascii chars
checkEvaluation(Hex(Literal("三重的")), "E4B889E9878DE79A84")
// scalastyle:on
}

test("hypot") {
testBinary(Hypot, math.hypot)
}
Expand Down
16 changes: 16 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,22 @@ object functions {
*/
def floor(columnName: String): Column = floor(Column(columnName))

/**
* Computes hex value of the given column
*
* @group math_funcs
* @since 1.5.0
*/
def hex(column: Column): Column = Hex(column.expr)

/**
* Computes hex value of the given input
*
* @group math_funcs
* @since 1.5.0
*/
def hex(colName: String): Column = hex(Column(colName))

/**
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,19 @@ class MathExpressionsSuite extends QueryTest {
)
}

test("hex") {
val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
checkAnswer(data.select(hex('a)), Seq(Row("1C")))
checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4")))
checkAnswer(data.select(hex('c)), Seq(Row("177828FED4")))
checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F")))
checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
checkAnswer(data.selectExpr("hex(d)"), Seq(Row("68656C6C6F")))
checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
}

test("hypot") {
testTwoToOneMathFunction(hypot, hypot, math.hypot)
}
Expand Down

0 comments on commit 637b4ee

Please sign in to comment.