From 0bf6e2132c348fa94a8b93d08d4be2d61aa25ad1 Mon Sep 17 00:00:00 2001 From: Milan Dankovic Date: Mon, 11 Mar 2024 19:33:05 +0500 Subject: [PATCH] [SPARK-47255][SQL] Assign names to the error classes _LEGACY_ERROR_TEMP_323[6-7] and _LEGACY_ERROR_TEMP_324[7-9] ### What changes were proposed in this pull request? In the PR, I propose to assign the proper names to the legacy error classes _LEGACY_ERROR_TEMP_323[6-7] and _LEGACY_ERROR_TEMP_324[7-9], and modify tests in testing suites to reflect these changes and use checkError() function. Also this PR improves the error messages. ### Why are the changes needed? Proper name improves user experience w/ Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes an user-facing error message. ### How was this patch tested? Error _LEGACY_ERROR_TEMP_3249 is tested by running the modified test suite: `$ build/sbt "catalyst/testOnly *RowJsonSuite"` Errors _LEGACY_ERROR_TEMP_323[6-7] and _LEGACY_ERROR_TEMP_324[7-8] are tested by running the modified test suite: `$ build/sbt "catalyst/testOnly *CSVExprUtilsSuite"` Golden files are regenerated using the: `$ SPARK_GENERATE_GOLDEN_FILES=1` `$ build/sbt "core/testOnly *SparkThrowableSuite"` ### Was this patch authored or co-authored using generative AI tooling? No Closes #45423 from miland-db/miland-db/miland-legacy-error-class. Authored-by: Milan Dankovic Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 59 +++++++++++-------- ...ons-invalid-delimiter-value-error-class.md | 49 +++++++++++++++ docs/sql-error-conditions.md | 14 +++++ .../main/scala/org/apache/spark/sql/Row.scala | 9 +-- .../spark/sql/catalyst/csv/CSVExprUtils.scala | 13 ++-- .../org/apache/spark/sql/RowJsonSuite.scala | 11 ++-- .../sql/catalyst/csv/CSVExprUtilsSuite.scala | 10 ++-- 7 files changed, 123 insertions(+), 42 deletions(-) create mode 100644 docs/sql-error-conditions-invalid-delimiter-value-error-class.md diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 9717ff2ed49c4..afe81b8e9bea7 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1277,6 +1277,12 @@ ], "sqlState" : "58030" }, + "FAILED_ROW_TO_JSON" : { + "message" : [ + "Failed to convert the row value of the class to the target SQL type in the JSON format." + ], + "sqlState" : "2203G" + }, "FIELDS_ALREADY_EXISTS" : { "message" : [ "Cannot column, because already exists in ." @@ -1862,6 +1868,34 @@ }, "sqlState" : "42623" }, + "INVALID_DELIMITER_VALUE" : { + "message" : [ + "Invalid value for delimiter." + ], + "subClass" : { + "DELIMITER_LONGER_THAN_EXPECTED" : { + "message" : [ + "Delimiter cannot be more than one character: ." + ] + }, + "EMPTY_STRING" : { + "message" : [ + "Delimiter cannot be empty string." + ] + }, + "SINGLE_BACKSLASH" : { + "message" : [ + "Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter." + ] + }, + "UNSUPPORTED_SPECIAL_CHARACTER" : { + "message" : [ + "Unsupported special character for delimiter: ." + ] + } + }, + "sqlState" : "42602" + }, "INVALID_DRIVER_MEMORY" : { "message" : [ "System memory must be at least .", @@ -7767,16 +7801,6 @@ "The numbers of zipped arrays and field names should be the same" ] }, - "_LEGACY_ERROR_TEMP_3236" : { - "message" : [ - "Unsupported special character for delimiter: " - ] - }, - "_LEGACY_ERROR_TEMP_3237" : { - "message" : [ - "Delimiter cannot be more than one character: " - ] - }, "_LEGACY_ERROR_TEMP_3238" : { "message" : [ "Failed to convert value (class of ) in type
to XML." @@ -7822,21 +7846,6 @@ "Failed to parse a value for data type ." ] }, - "_LEGACY_ERROR_TEMP_3247" : { - "message" : [ - "Delimiter cannot be empty string" - ] - }, - "_LEGACY_ERROR_TEMP_3248" : { - "message" : [ - "Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter." - ] - }, - "_LEGACY_ERROR_TEMP_3249" : { - "message" : [ - "Failed to convert value (class of }) with the type of to JSON." - ] - }, "_LEGACY_ERROR_TEMP_3250" : { "message" : [ "Failed to convert the JSON string '' to a field." diff --git a/docs/sql-error-conditions-invalid-delimiter-value-error-class.md b/docs/sql-error-conditions-invalid-delimiter-value-error-class.md new file mode 100644 index 0000000000000..815fe78bce945 --- /dev/null +++ b/docs/sql-error-conditions-invalid-delimiter-value-error-class.md @@ -0,0 +1,49 @@ +--- +layout: global +title: INVALID_DELIMITER_VALUE error class +displayTitle: INVALID_DELIMITER_VALUE error class +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + + + +[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Invalid value for delimiter. + +This error class has the following derived error classes: + +## DELIMITER_LONGER_THAN_EXPECTED + +Delimiter cannot be more than one character: ``. + +## EMPTY_STRING + +Delimiter cannot be empty string. + +## SINGLE_BACKSLASH + +Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter. + +## UNSUPPORTED_SPECIAL_CHARACTER + +Unsupported special character for delimiter: ``. + + diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 0be75cde968fb..0695ed28b7fc8 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -756,6 +756,12 @@ SQLSTATE: 58030 Failed to rename temp file `` to `` as FileSystem.rename returned false. +### FAILED_ROW_TO_JSON + +[SQLSTATE: 2203G](sql-error-conditions-sqlstates.html#class-22-data-exception) + +Failed to convert the row value `` of the class `` to the target SQL type `` in the JSON format. + ### FIELDS_ALREADY_EXISTS [SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) @@ -1124,6 +1130,14 @@ Failed to execute `` command because the destination column or variab For more details see [INVALID_DEFAULT_VALUE](sql-error-conditions-invalid-default-value-error-class.html) +### [INVALID_DELIMITER_VALUE](sql-error-conditions-invalid-delimiter-value-error-class.html) + +[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Invalid value for delimiter. + +For more details see [INVALID_DELIMITER_VALUE](sql-error-conditions-invalid-delimiter-value-error-class.html) + ### INVALID_DRIVER_MEMORY SQLSTATE: F0000 diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala index 3d49dbf80c7d1..0c065dd4d4baa 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala @@ -34,6 +34,7 @@ import org.apache.spark.annotation.{Stable, Unstable} import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkDateTimeUtils, TimestampFormatter, UDTUtils} import org.apache.spark.sql.errors.DataTypeErrors +import org.apache.spark.sql.errors.DataTypeErrors.{toSQLType, toSQLValue} import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -611,11 +612,11 @@ trait Row extends Serializable { case (v: Any, udt: UserDefinedType[Any @unchecked]) => toJson(UDTUtils.toRow(v, udt), udt.sqlType) case _ => throw new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_3249", + errorClass = "FAILED_ROW_TO_JSON", messageParameters = Map( - "value" -> value.toString, - "valueClass" -> value.getClass.toString, - "dataType" -> dataType.toString) + "value" -> toSQLValue(value.toString), + "class" -> value.getClass.toString, + "sqlType" -> toSQLType(dataType.toString)) ) } toJson(this, schema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala index 60113d6b3e125..62638d70dd904 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala @@ -68,8 +68,9 @@ object CSVExprUtils { @throws[SparkIllegalArgumentException] def toChar(str: String): Char = { (str: Seq[Char]) match { - case Seq() => throw new SparkIllegalArgumentException("_LEGACY_ERROR_TEMP_3247") - case Seq('\\') => throw new SparkIllegalArgumentException("_LEGACY_ERROR_TEMP_3248") + case Seq() => throw new SparkIllegalArgumentException("INVALID_DELIMITER_VALUE.EMPTY_STRING") + case Seq('\\') => + throw new SparkIllegalArgumentException("INVALID_DELIMITER_VALUE.SINGLE_BACKSLASH") case Seq(c) => c case Seq('\\', 't') => '\t' case Seq('\\', 'r') => '\r' @@ -82,10 +83,14 @@ object CSVExprUtils { case _ if str == "\u0000" => '\u0000' case Seq('\\', _) => throw new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_3236", messageParameters = Map("str" -> str)) + errorClass = + "INVALID_DELIMITER_VALUE.UNSUPPORTED_SPECIAL_CHARACTER", + messageParameters = Map("str" -> str)) case _ => throw new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_3237", messageParameters = Map("str" -> str)) + errorClass = + "INVALID_DELIMITER_VALUE.DELIMITER_LONGER_THAN_EXPECTED", + messageParameters = Map("str" -> str)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala index e5914d4a88d26..3e72dc7da24b7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala @@ -24,6 +24,7 @@ import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObje import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.encoders.{ExamplePoint, ExamplePointUDT} import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.errors.DataTypeErrors.{toSQLType, toSQLValue} import org.apache.spark.sql.types._ /** @@ -135,10 +136,12 @@ class RowJsonSuite extends SparkFunSuite { new StructType().add("a", ObjectType(classOf[(Int, Int)]))) row.jsonValue }, - errorClass = "_LEGACY_ERROR_TEMP_3249", + errorClass = "FAILED_ROW_TO_JSON", parameters = Map( - "value" -> "(1,2)", - "valueClass" -> "class scala.Tuple2$mcII$sp", - "dataType" -> "ObjectType(class scala.Tuple2)")) + "value" -> toSQLValue("(1,2)"), + "class" -> "class scala.Tuple2$mcII$sp", + "sqlType" -> toSQLType("ObjectType(class scala.Tuple2)") + ) + ) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala index b2e4cdfeafe2d..2e94c723a6f26 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala @@ -38,7 +38,7 @@ class CSVExprUtilsSuite extends SparkFunSuite { exception = intercept[SparkIllegalArgumentException]{ CSVExprUtils.toChar("ab") }, - errorClass = "_LEGACY_ERROR_TEMP_3237", + errorClass = "INVALID_DELIMITER_VALUE.DELIMITER_LONGER_THAN_EXPECTED", parameters = Map("str" -> "ab")) } @@ -47,7 +47,7 @@ class CSVExprUtilsSuite extends SparkFunSuite { exception = intercept[SparkIllegalArgumentException]{ CSVExprUtils.toChar("""\1""") }, - errorClass = "_LEGACY_ERROR_TEMP_3236", + errorClass = "INVALID_DELIMITER_VALUE.UNSUPPORTED_SPECIAL_CHARACTER", parameters = Map("str" -> """\1""")) } @@ -56,7 +56,7 @@ class CSVExprUtilsSuite extends SparkFunSuite { exception = intercept[SparkIllegalArgumentException]{ CSVExprUtils.toChar("""\""") }, - errorClass = "_LEGACY_ERROR_TEMP_3248", + errorClass = "INVALID_DELIMITER_VALUE.SINGLE_BACKSLASH", parameters = Map.empty) } @@ -65,7 +65,7 @@ class CSVExprUtilsSuite extends SparkFunSuite { exception = intercept[SparkIllegalArgumentException]{ CSVExprUtils.toChar("") }, - errorClass = "_LEGACY_ERROR_TEMP_3247", + errorClass = "INVALID_DELIMITER_VALUE.EMPTY_STRING", parameters = Map.empty) } @@ -76,7 +76,7 @@ class CSVExprUtilsSuite extends SparkFunSuite { // backslash, then tab ("""\\t""", Some("""\t"""), None), // invalid special character (dot) - ("""\.""", None, Some("_LEGACY_ERROR_TEMP_3236")), + ("""\.""", None, Some("INVALID_DELIMITER_VALUE.UNSUPPORTED_SPECIAL_CHARACTER")), // backslash, then dot ("""\\.""", Some("""\."""), None), // nothing special, just straight conversion