From a451184d7fe8fe7ec2b6212e6ba80d01926214e7 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Wed, 24 Jun 2015 04:21:15 -0700 Subject: [PATCH] [SPARK-6785][SQL] fix DateTimeUtils.fromJavaDate(java.util.Date) for Dates before 1970 --- .../sql/catalyst/util/DateTimeUtils.scala | 7 ++- .../expressions/MiscFunctionsSuite.scala | 43 ++++++++++++++++++- .../sql/ScalaReflectionRelationSuite.scala | 2 +- ...te cast-0-a7cd69b80c77a771a2c955db666be53d | 1 - ... test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5 | 1 - .../sql/hive/execution/HiveQuerySuite.scala | 14 ------ .../sql/hive/execution/SQLQuerySuite.scala | 31 ++++++++++++- 7 files changed, 78 insertions(+), 21 deletions(-) delete mode 100644 sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d delete mode 100644 sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ff79884a44d00..ca23a3e27b034 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -52,8 +52,11 @@ object DateTimeUtils { } // we should use the exact day as Int, for example, (year, month, day) -> day - def millisToDays(millisLocal: Long): Int = { - ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt + def millisToDays(millisUtc: Long): Int = { + // SPARK-6785: use Math.floor so negative number of days (dates before 1970) + // will correctly work as input for function toJavaDate(Int) + val millisLocal = millisUtc.toDouble + LOCAL_TIMEZONE.get().getOffset(millisUtc) + Math.floor(millisLocal / MILLIS_PER_DAY).toInt } def toMillisSinceEpoch(days: Int): Long = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala index 48b84130b4556..bcaeb8387d93e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -17,8 +17,12 @@ package org.apache.spark.sql.catalyst.expressions +import java.sql.Date +import java.text.SimpleDateFormat + import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.types.{StringType, BinaryType} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.types.BinaryType class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -29,4 +33,41 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Md5(Literal.create(null, BinaryType)), null) } + test("SPARK-6785: java date conversion before and after epoch") { + def checkFromToJavaDate(d1: Date): Unit = { + val d2 = DateTimeUtils.toJavaDate(DateTimeUtils.fromJavaDate(d1)) + assert(d2.toString === d1.toString) + } + + val df1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val df2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z") + + checkFromToJavaDate(new Date(100)) + + checkFromToJavaDate(Date.valueOf("1970-01-01")) + + checkFromToJavaDate(new Date(df1.parse("1970-01-01 00:00:00").getTime)) + checkFromToJavaDate(new Date(df2.parse("1970-01-01 00:00:00 UTC").getTime)) + + checkFromToJavaDate(new Date(df1.parse("1970-01-01 00:00:01").getTime)) + checkFromToJavaDate(new Date(df2.parse("1970-01-01 00:00:01 UTC").getTime)) + + checkFromToJavaDate(new Date(df1.parse("1969-12-31 23:59:59").getTime)) + checkFromToJavaDate(new Date(df2.parse("1969-12-31 23:59:59 UTC").getTime)) + + checkFromToJavaDate(Date.valueOf("1969-01-01")) + + checkFromToJavaDate(new Date(df1.parse("1969-01-01 00:00:00").getTime)) + checkFromToJavaDate(new Date(df2.parse("1969-01-01 00:00:00 UTC").getTime)) + + checkFromToJavaDate(new Date(df1.parse("1969-01-01 00:00:01").getTime)) + checkFromToJavaDate(new Date(df2.parse("1969-01-01 00:00:01 UTC").getTime)) + + checkFromToJavaDate(new Date(df1.parse("1989-11-09 11:59:59").getTime)) + checkFromToJavaDate(new Date(df2.parse("1989-11-09 19:59:59 UTC").getTime)) + + checkFromToJavaDate(new Date(df1.parse("1776-07-04 10:30:00").getTime)) + checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime)) + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala index ece3d6fdf2af5..2845efac29e6c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala @@ -79,7 +79,7 @@ class ScalaReflectionRelationSuite extends SparkFunSuite { test("query case class RDD") { val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true, - new java.math.BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1, 2, 3)) + new java.math.BigDecimal(1), Date.valueOf("1970-01-01"), new Timestamp(12345), Seq(1, 2, 3)) Seq(data).toDF().registerTempTable("reflectData") assert(ctx.sql("SELECT * FROM reflectData").collect().head === diff --git a/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d b/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d deleted file mode 100644 index 98da82fa89386..0000000000000 --- a/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d +++ /dev/null @@ -1 +0,0 @@ -1970-01-01 1970-01-01 1969-12-31 16:00:00 1969-12-31 16:00:00 1970-01-01 00:00:00 diff --git a/sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5 b/sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5 deleted file mode 100644 index 27ba77ddaf615..0000000000000 --- a/sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5 +++ /dev/null @@ -1 +0,0 @@ -true diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 51dabc67fa7c1..4cdba03b27022 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -324,20 +324,6 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { | FROM src LIMIT 1 """.stripMargin) - createQueryTest("Date comparison test 2", - "SELECT CAST(CAST(0 AS timestamp) AS date) > CAST(0 AS timestamp) FROM src LIMIT 1") - - createQueryTest("Date cast", - """ - | SELECT - | CAST(CAST(0 AS timestamp) AS date), - | CAST(CAST(CAST(0 AS timestamp) AS date) AS string), - | CAST(0 AS timestamp), - | CAST(CAST(0 AS timestamp) AS string), - | CAST(CAST(CAST('1970-01-01 23:00:00' AS timestamp) AS date) AS timestamp) - | FROM src LIMIT 1 - """.stripMargin) - createQueryTest("Simple Average", "SELECT AVG(key) FROM src") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index a2e666586c186..c7b1717c50849 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -17,10 +17,12 @@ package org.apache.spark.sql.hive.execution +import java.sql.{Date, Timestamp} + +import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.DefaultParserDialect import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.errors.DialectException -import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ @@ -962,4 +964,31 @@ class SQLQuerySuite extends QueryTest { case None => // OK } } + + test("SPARK-6785: HiveQuerySuite - Date comparison test 2") { + checkAnswer( + sql("SELECT CAST(CAST(0 AS timestamp) AS date) > CAST(0 AS timestamp) FROM src LIMIT 1"), + Row(false)) + } + + test("SPARK-6785: HiveQuerySuite - Date cast") { + // new Date(0) == 1970-01-01 00:00:00.0 GMT == 1969-12-31 16:00:00.0 PST + checkAnswer( + sql( + """ + | SELECT + | CAST(CAST(0 AS timestamp) AS date), + | CAST(CAST(CAST(0 AS timestamp) AS date) AS string), + | CAST(0 AS timestamp), + | CAST(CAST(0 AS timestamp) AS string), + | CAST(CAST(CAST('1970-01-01 23:00:00' AS timestamp) AS date) AS timestamp) + | FROM src LIMIT 1 + """.stripMargin), + Row( + Date.valueOf("1969-12-31"), + String.valueOf("1969-12-31"), + Timestamp.valueOf("1969-12-31 16:00:00"), + String.valueOf("1969-12-31 16:00:00"), + Timestamp.valueOf("1970-01-01 00:00:00"))) + } }