Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support setting table and column comments in Delta Lake #12971

Merged
merged 2 commits into from Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -135,6 +135,7 @@
import java.util.OptionalLong;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.google.common.base.Preconditions.checkArgument;
Expand Down Expand Up @@ -242,6 +243,8 @@ public class DeltaLakeMetadata
public static final String DELETE_OPERATION = "DELETE";
public static final String UPDATE_OPERATION = "UPDATE";
public static final String OPTIMIZE_OPERATION = "OPTIMIZE";
public static final String SET_TBLPROPERTIES_OPERATION = "SET TBLPROPERTIES";
public static final String CHANGE_COLUMN_OPERATION = "CHANGE COLUMN";
public static final String ISOLATION_LEVEL = "WriteSerializable";
private static final int READER_VERSION = 1;
private static final int WRITER_VERSION = 2;
Expand Down Expand Up @@ -955,6 +958,93 @@ private static boolean isCreatedBy(Table table, String queryId)
return tableQueryId.isPresent() && tableQueryId.get().equals(queryId);
}

@Override
public void setTableComment(ConnectorSession session, ConnectorTableHandle tableHandle, Optional<String> comment)
{
DeltaLakeTableHandle handle = (DeltaLakeTableHandle) tableHandle;
checkSupportedWriterVersion(session, handle.getSchemaTableName());

ConnectorTableMetadata tableMetadata = getTableMetadata(session, handle);

try {
long commitVersion = handle.getReadVersion() + 1;

List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
List<DeltaLakeColumnHandle> columns = tableMetadata.getColumns().stream()
.filter(column -> !column.isHidden())
.map(column -> toColumnHandle(column, partitionColumns))
.collect(toImmutableList());

Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());

TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, handle.getLocation());
appendTableEntries(
commitVersion,
transactionLogWriter,
handle.getMetadataEntry().getId(),
columns,
partitionColumns,
getColumnComments(handle.getMetadataEntry()),
buildDeltaMetadataConfiguration(checkpointInterval),
SET_TBLPROPERTIES_OPERATION,
session,
nodeVersion,
nodeId,
comment);
transactionLogWriter.flush();
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to comment on table: %s.%s", handle.getSchemaName(), handle.getTableName()), e);
}
}

@Override
public void setColumnComment(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column, Optional<String> comment)
{
DeltaLakeTableHandle deltaLakeTableHandle = (DeltaLakeTableHandle) tableHandle;
DeltaLakeColumnHandle deltaLakeColumnHandle = (DeltaLakeColumnHandle) column;
checkSupportedWriterVersion(session, deltaLakeTableHandle.getSchemaTableName());

ConnectorTableMetadata tableMetadata = getTableMetadata(session, deltaLakeTableHandle);

try {
long commitVersion = deltaLakeTableHandle.getReadVersion() + 1;

List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
List<DeltaLakeColumnHandle> columns = tableMetadata.getColumns().stream()
.filter(columnMetadata -> !columnMetadata.isHidden())
.map(columnMetadata -> toColumnHandle(columnMetadata, partitionColumns))
.collect(toImmutableList());

ImmutableMap.Builder<String, String> columnComments = ImmutableMap.builder();
columnComments.putAll(getColumnComments(deltaLakeTableHandle.getMetadataEntry()).entrySet().stream()
.filter(e -> !e.getKey().equals(deltaLakeColumnHandle.getName()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
comment.ifPresent(s -> columnComments.put(deltaLakeColumnHandle.getName(), s));

Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());

TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, deltaLakeTableHandle.getLocation());
appendTableEntries(
commitVersion,
transactionLogWriter,
deltaLakeTableHandle.getMetadataEntry().getId(),
columns,
partitionColumns,
columnComments.buildOrThrow(),
buildDeltaMetadataConfiguration(checkpointInterval),
CHANGE_COLUMN_OPERATION,
session,
nodeVersion,
nodeId,
Optional.ofNullable(deltaLakeTableHandle.getMetadataEntry().getDescription()));
transactionLogWriter.flush();
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to add '%s' column comment for: %s.%s", deltaLakeColumnHandle.getName(), deltaLakeTableHandle.getSchemaName(), deltaLakeTableHandle.getTableName()), e);
}
}

@Override
public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata newColumnMetadata)
{
Expand Down
Expand Up @@ -111,8 +111,6 @@ protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior)
case SUPPORTS_RENAME_TABLE:
case SUPPORTS_DROP_COLUMN:
case SUPPORTS_RENAME_COLUMN:
case SUPPORTS_COMMENT_ON_TABLE:
case SUPPORTS_COMMENT_ON_COLUMN:
case SUPPORTS_RENAME_SCHEMA:
case SUPPORTS_NOT_NULL_CONSTRAINT:
return false;
Expand Down
Expand Up @@ -7,5 +7,6 @@ hive.non-managed-table-writes-enabled=true
# Required by some product tests
hive.hive-views.enabled=true
hive.allow-comment-table=true
hive.allow-comment-column=true
hive.allow-rename-table=true
hive.delta-lake-catalog-name=delta
Expand Up @@ -10,5 +10,6 @@ hive.s3.ssl.enabled=false
# Required by some product tests
hive.hive-views.enabled=true
hive.allow-comment-table=true
hive.allow-comment-column=true
hive.allow-rename-table=true
hive.delta-lake-catalog-name=delta
Expand Up @@ -15,12 +15,15 @@

import org.testng.annotations.Test;

import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure;
import static io.trino.tempto.assertions.QueryAssert.assertThat;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_OSS;
import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getColumnCommentOnDelta;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getColumnCommentOnTrino;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getTableCommentOnDelta;
import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix;
import static io.trino.tests.product.utils.QueryExecutors.onDelta;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
Expand Down Expand Up @@ -73,4 +76,94 @@ public void testAddColumnUnsupportedWriterVersion()
onDelta().executeQuery("DROP TABLE default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testCommentOnTable()
{
String tableName = "test_dl_comment_table_" + randomTableSuffix();
String tableDirectory = "databricks-compatibility-test-" + tableName;

onTrino().executeQuery(format("CREATE TABLE delta.default.%s (col INT) WITH (location = 's3://%s/%s')",
tableName,
bucketName,
tableDirectory));

try {
onTrino().executeQuery("COMMENT ON TABLE delta.default." + tableName + " IS 'test comment'");
assertThat(onTrino().executeQuery("SELECT comment FROM system.metadata.table_comments WHERE catalog_name = 'delta' AND schema_name = 'default' AND table_name = '" + tableName + "'"))
.containsOnly(row("test comment"));

assertEquals(getTableCommentOnDelta("default", tableName), "test comment");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testCommentOnTableUnsupportedWriterVersion()
{
String tableName = "test_dl_comment_table_unsupported_writer_" + randomTableSuffix();
String tableDirectory = "databricks-compatibility-test-" + tableName;

onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='3')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("COMMENT ON TABLE delta.default." + tableName + " IS 'test comment'"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 3 which is not supported");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testCommentOnColumn()
{
String tableName = "test_dl_comment_column_" + randomTableSuffix();
String tableDirectory = "databricks-compatibility-test-" + tableName;

onTrino().executeQuery(format("CREATE TABLE delta.default.%s (col INT) WITH (location = 's3://%s/%s')",
tableName,
bucketName,
tableDirectory));

try {
onTrino().executeQuery("COMMENT ON COLUMN delta.default." + tableName + ".col IS 'test column comment'");
assertEquals(getColumnCommentOnTrino("default", tableName, "col"), "test column comment");
assertEquals(getColumnCommentOnDelta("default", tableName, "col"), "test column comment");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testCommentOnColumnUnsupportedWriterVersion()
{
String tableName = "test_dl_comment_column_unsupported_writer_" + randomTableSuffix();
String tableDirectory = "databricks-compatibility-test-" + tableName;

onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='3')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("COMMENT ON COLUMN delta.default." + tableName + ".col IS 'test column comment'"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 3 which is not supported");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}
}
Expand Up @@ -15,7 +15,6 @@

import com.google.common.collect.ImmutableList;
import io.trino.tempto.assertions.QueryAssert;
import io.trino.tempto.query.QueryResult;
import org.testng.annotations.Test;

import java.util.List;
Expand All @@ -26,6 +25,7 @@
import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getColumnCommentOnDelta;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getColumnCommentOnTrino;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getTableCommentOnDelta;
import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix;
import static io.trino.tests.product.utils.QueryExecutors.onDelta;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
Expand Down Expand Up @@ -198,15 +198,6 @@ public void testCreateTableWithTableComment()
}
}

private static String getTableCommentOnDelta(String schemaName, String tableName)
{
QueryResult result = onDelta().executeQuery(format("DESCRIBE EXTENDED %s.%s", schemaName, tableName));
return (String) result.rows().stream()
.filter(row -> row.get(0).equals("Comment"))
.map(row -> row.get(1))
.findFirst().orElseThrow();
}

@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS})
public void testCreateTableWithColumnCommentOnTrino()
{
Expand Down
Expand Up @@ -477,8 +477,56 @@ public void testHiveToDeltaCommentTable()
try {
assertThat(onTrino().executeQuery("SELECT comment FROM system.metadata.table_comments WHERE catalog_name = 'delta' AND schema_name = 'default' AND table_name = '" + tableName + "'"))
.is(new Condition<>(queryResult -> queryResult.row(0).get(0) == null, "Unexpected table comment"));
assertQueryFailure(() -> onTrino().executeQuery("COMMENT ON TABLE hive.default.\"" + tableName + "\" IS 'This is my table, there are many like it but this one is mine'"))
.hasMessageMatching(".*This connector does not support setting table comments");

String tableComment = "This is my table, there are many like it but this one is mine";
onTrino().executeQuery(format("COMMENT ON TABLE hive.default.\"" + tableName + "\" IS '%s'", tableComment));
assertTableComment("hive", "default", tableName).isEqualTo(tableComment);
assertTableComment("delta", "default", tableName).isEqualTo(tableComment);
}
finally {
onDelta().executeQuery("DROP TABLE " + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeltaToHiveCommentColumn()
{
String tableName = "hive_comment_column_by_delta_" + randomTableSuffix();
String columnName = "id";

onTrino().executeQuery(createTableInHiveConnector("default", tableName, true));
try {
assertColumnComment("hive", "default", tableName, columnName).isNull();
assertColumnComment("delta", "default", tableName, columnName).isNull();

String columnComment = "Internal identifier";
onTrino().executeQuery(format("COMMENT ON COLUMN delta.default.%s.%s IS '%s'", tableName, columnName, columnComment));

assertColumnComment("hive", "default", tableName, columnName).isEqualTo(columnComment);
assertColumnComment("delta", "default", tableName, columnName).isEqualTo(columnComment);
}
finally {
onTrino().executeQuery("DROP TABLE hive.default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testHiveToDeltaCommentColumn()
{
String tableName = "delta_comment_column_by_hive_" + randomTableSuffix();
String columnName = "nationkey";

onDelta().executeQuery(createTableInDatabricks(tableName, true));

try {
assertColumnComment("hive", "default", tableName, columnName).isNull();
assertColumnComment("delta", "default", tableName, columnName).isNull();

String columnComment = "Internal identifier for the nation";
onTrino().executeQuery(format("COMMENT ON COLUMN hive.default.%s.%s IS '%s'", tableName, columnName, columnComment));

assertColumnComment("hive", "default", tableName, columnName).isEqualTo(columnComment);
assertColumnComment("delta", "default", tableName, columnName).isEqualTo(columnComment);
}
finally {
onDelta().executeQuery("DROP TABLE " + tableName);
Expand Down Expand Up @@ -836,6 +884,21 @@ private static QueryResult readTableComment(String catalog, String schema, Strin
param(VARCHAR, tableName));
}

private static AbstractStringAssert<?> assertColumnComment(String catalog, String schema, String tableName, String columnName)
{
QueryResult queryResult = readColumnComment(catalog, schema, tableName, columnName);
return Assertions.assertThat((String) getOnlyElement(getOnlyElement(queryResult.rows())));
}

private static QueryResult readColumnComment(String catalog, String schema, String tableName, String columnName)
{
return onTrino().executeQuery(
format("SELECT comment FROM %s.information_schema.columns WHERE table_schema = ? AND table_name = ? AND column_name = ?", catalog),
param(VARCHAR, schema),
param(VARCHAR, tableName),
param(VARCHAR, columnName));
}

private static void assertResultsEqual(QueryResult first, QueryResult second)
{
assertThat(first).containsOnly(second.rows().stream()
Expand Down
Expand Up @@ -34,4 +34,13 @@ public static String getColumnCommentOnDelta(String schemaName, String tableName
QueryResult result = onDelta().executeQuery(format("DESCRIBE %s.%s %s", schemaName, tableName, columnName));
return (String) result.row(2).get(1);
}

public static String getTableCommentOnDelta(String schemaName, String tableName)
{
QueryResult result = onDelta().executeQuery(format("DESCRIBE EXTENDED %s.%s", schemaName, tableName));
return (String) result.rows().stream()
.filter(row -> row.get(0).equals("Comment"))
.map(row -> row.get(1))
.findFirst().orElseThrow();
}
}