forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-42398][SQL] Refine default column value DS v2 interface
### What changes were proposed in this pull request? The current default value DS V2 API is a bit inconsistent. The `createTable` API only takes `StructType`, so implementations must know the special metadata key of the default value to access it. The `TableChange` API has the default value as an individual field. This API adds a new `Column` interface, which holds both current default (as a SQL string) and exist default (as a v2 literal). `createTable` API now takes `Column`. This avoids the need of special metadata key and is also more extensible when adding more special cols like generated cols. This is also type-safe and makes sure the exist default is literal. The implementation is free to decide how to encode and store default values. Note: backward compatibility is taken care of. ### Why are the changes needed? better DS v2 API for default value ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes apache#40049 from cloud-fan/table2. Lead-authored-by: Wenchen Fan <wenchen@databricks.com> Co-authored-by: Wenchen Fan <cloud0fan@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
- Loading branch information
Showing
43 changed files
with
670 additions
and
229 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.connector.catalog; | ||
|
||
import java.util.Map; | ||
import javax.annotation.Nullable; | ||
|
||
import org.apache.spark.annotation.Evolving; | ||
import org.apache.spark.sql.connector.expressions.Transform; | ||
import org.apache.spark.sql.internal.connector.ColumnImpl; | ||
import org.apache.spark.sql.types.DataType; | ||
|
||
/** | ||
* An interface representing a column of a {@link Table}. It defines basic properties of a column, | ||
* such as name and data type, as well as some advanced ones like default column value. | ||
* <p> | ||
* Data Sources do not need to implement it. They should consume it in APIs like | ||
* {@link TableCatalog#createTable(Identifier, Column[], Transform[], Map)}, and report it in | ||
* {@link Table#columns()} by calling the static {@code create} functions of this interface to | ||
* create it. | ||
*/ | ||
@Evolving | ||
public interface Column { | ||
|
||
static Column create(String name, DataType dataType) { | ||
return create(name, dataType, true); | ||
} | ||
|
||
static Column create(String name, DataType dataType, boolean nullable) { | ||
return create(name, dataType, nullable, null, null, null); | ||
} | ||
|
||
static Column create( | ||
String name, | ||
DataType dataType, | ||
boolean nullable, | ||
String comment, | ||
ColumnDefaultValue defaultValue, | ||
String metadataInJSON) { | ||
return new ColumnImpl(name, dataType, nullable, comment, defaultValue, metadataInJSON); | ||
} | ||
|
||
/** | ||
* Returns the name of this table column. | ||
*/ | ||
String name(); | ||
|
||
/** | ||
* Returns the data type of this table column. | ||
*/ | ||
DataType dataType(); | ||
|
||
/** | ||
* Returns true if this column may produce null values. | ||
*/ | ||
boolean nullable(); | ||
|
||
/** | ||
* Returns the comment of this table column. Null means no comment. | ||
*/ | ||
@Nullable | ||
String comment(); | ||
|
||
/** | ||
* Returns the default value of this table column. Null means no default value. | ||
*/ | ||
@Nullable | ||
ColumnDefaultValue defaultValue(); | ||
|
||
/** | ||
* Returns the column metadata in JSON format. | ||
*/ | ||
@Nullable | ||
String metadataInJSON(); | ||
} |
84 changes: 84 additions & 0 deletions
84
sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ColumnDefaultValue.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.connector.catalog; | ||
|
||
import java.util.Objects; | ||
import javax.annotation.Nonnull; | ||
|
||
import org.apache.spark.annotation.Evolving; | ||
import org.apache.spark.sql.connector.expressions.Literal; | ||
|
||
/** | ||
* A class representing the default value of a column. It contains both the SQL string and literal | ||
* value of the user-specified default value expression. The SQL string should be re-evaluated for | ||
* each table writing command, which may produce different values if the default value expression is | ||
* something like {@code CURRENT_DATE()}. The literal value is used to back-fill existing data if | ||
* new columns with default value are added. Note: the back-fill can be lazy. The data sources can | ||
* remember the column default value and let the reader fill the column value when reading existing | ||
* data that do not have these new columns. | ||
*/ | ||
@Evolving | ||
public class ColumnDefaultValue { | ||
private String sql; | ||
private Literal<?> value; | ||
|
||
public ColumnDefaultValue(String sql, Literal<?> value) { | ||
this.sql = sql; | ||
this.value = value; | ||
} | ||
|
||
/** | ||
* Returns the SQL string (Spark SQL dialect) of the default value expression. This is the | ||
* original string contents of the SQL expression specified at the time the column was created in | ||
* a CREATE TABLE, REPLACE TABLE, or ADD COLUMN command. For example, for | ||
* "CREATE TABLE t (col INT DEFAULT 40 + 2)", this returns the string literal "40 + 2" (without | ||
* quotation marks). | ||
*/ | ||
@Nonnull | ||
public String getSql() { | ||
return sql; | ||
} | ||
|
||
/** | ||
* Returns the default value literal. This is the literal value corresponding to | ||
* {@link #getSql()}. For the example in the doc of {@link #getSql()}, this returns a literal | ||
* integer with a value of 42. | ||
*/ | ||
@Nonnull | ||
public Literal<?> getValue() { | ||
return value; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (!(o instanceof ColumnDefaultValue)) return false; | ||
ColumnDefaultValue that = (ColumnDefaultValue) o; | ||
return sql.equals(that.sql) && value.equals(that.value); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(sql, value); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "ColumnDefaultValue{sql='" + sql + "\', value=" + value + '}'; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.