Skip to content

Commit

Permalink
Update statistics on insert to empty table in Hive connector
Browse files Browse the repository at this point in the history
  • Loading branch information
wendigo authored and findepi committed Jun 6, 2020
1 parent 9905ef8 commit ca8922e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
Expand Up @@ -104,6 +104,7 @@
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
Expand All @@ -127,6 +128,7 @@
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromPrestoPrincipalType;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.isAvroTableWithSchemaSet;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters;
Expand Down Expand Up @@ -511,7 +513,17 @@ public void updateTableStatistics(HiveIdentity identity, String databaseName, St
io.prestosql.plugin.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable);
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream()
.map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue(), rowCount))
.flatMap(entry -> {
Optional<Column> column = table.getColumn(entry.getKey());
if (column.isEmpty() && isAvroTableWithSchemaSet(modifiedTable)) {
// Avro table can have different effective schema than declared in metastore. Still, metastore does not allow
// to store statistics for a column it does not know about.
return Stream.of();
}

HiveType type = column.orElseThrow(() -> new IllegalStateException("Column not found: " + entry.getKey())).getType();
return Stream.of(createMetastoreColumnStatistics(entry.getKey(), type, entry.getValue(), rowCount));
})
.collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(identity, databaseName, tableName, metastoreColumnStatistics);
Expand Down
Expand Up @@ -83,6 +83,13 @@ private Statistics() {}

public static PartitionStatistics merge(PartitionStatistics first, PartitionStatistics second)
{
if (first.getBasicStatistics().getRowCount().isPresent() && first.getBasicStatistics().getRowCount().getAsLong() == 0) {
return second;
}
if (second.getBasicStatistics().getRowCount().isPresent() && second.getBasicStatistics().getRowCount().getAsLong() == 0) {
return first;
}

return new PartitionStatistics(
reduce(first.getBasicStatistics(), second.getBasicStatistics(), ADD),
merge(first.getColumnStatistics(), second.getColumnStatistics()));
Expand Down
Expand Up @@ -5329,6 +5329,60 @@ public void testCollectColumnStatisticsOnInsert()
assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testCollectColumnStatisticsOnInsertToEmptyTable()
{
String tableName = "test_collect_column_statistics_empty_table";

assertUpdate(format("CREATE TABLE %s (col INT)", tableName));

assertQuery("SHOW STATS FOR " + tableName,
"SELECT * FROM VALUES " +
"('col', null, null, null, null, null, null), " +
"(null, null, null, null, 0E0, null, null)");

assertUpdate(format("INSERT INTO %s (col) VALUES 50, 100, 1, 200, 2", tableName), 5);

assertQuery(format("SHOW STATS FOR %s", tableName),
"SELECT * FROM VALUES " +
"('col', null, 5.0, 0.0, null, 1, 200), " +
"(null, null, null, null, 5.0, null, null)");

assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testCollectColumnStatisticsOnInsertToPartiallyAnalyzedTable()
{
String tableName = "test_collect_column_statistics_partially_analyzed_table";

assertUpdate(format("CREATE TABLE %s (col INT, col2 INT)", tableName));

assertQuery("SHOW STATS FOR " + tableName,
"SELECT * FROM VALUES " +
"('col', null, null, null, null, null, null), " +
"('col2', null, null, null, null, null, null), " +
"(null, null, null, null, 0E0, null, null)");

assertUpdate(format("ANALYZE %s WITH (columns = ARRAY['col2'])", tableName), 0);

assertQuery("SHOW STATS FOR " + tableName,
"SELECT * FROM VALUES " +
"('col', null, null, null, null, null, null), " +
"('col2', null, 0.0, 0.0, null, null, null), " +
"(null, null, null, null, 0E0, null, null)");

assertUpdate(format("INSERT INTO %s (col, col2) VALUES (50, 49), (100, 99), (1, 0), (200, 199), (2, 1)", tableName), 5);

assertQuery(format("SHOW STATS FOR %s", tableName),
"SELECT * FROM VALUES " +
"('col', null, 5.0, 0.0, null, 1, 200), " +
"('col2', null, 5.0, 0.0, null, 0, 199), " +
"(null, null, null, null, 5.0, null, null)");

assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testAnalyzePropertiesSystemTable()
{
Expand Down

0 comments on commit ca8922e

Please sign in to comment.