Skip to content

Commit

Permalink
Refactor HiveMetastore statistics related interfaces
Browse files Browse the repository at this point in the history
- Remove optional from the Metastore statistics interfaces
- Remove column granularity from getStatistics* metastore methods

Optional.empty() and Optional.of(ImmutableMap.of()) is basically the same.
It introduces a lot of confusion of when to return an empty map, and when
to return an empty optional.

Also currently we never query statistics for a subset of columns. It is not supported
by the ConnectorMetadata interface. So this code just complicates the interfaces,
and increases the amount of memory used by the statistics cache.
  • Loading branch information
arhimondr committed May 29, 2018
1 parent 6efaa26 commit 4a4fbb4
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 265 deletions.

Large diffs are not rendered by default.

Expand Up @@ -29,9 +29,9 @@ public interface ExtendedHiveMetastore


Optional<Table> getTable(String databaseName, String tableName); Optional<Table> getTable(String databaseName, String tableName);


Optional<Map<String, HiveColumnStatistics>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames); Map<String, HiveColumnStatistics> getTableColumnStatistics(String databaseName, String tableName);


Optional<Map<String, Map<String, HiveColumnStatistics>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames); Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames);


Optional<List<String>> getAllTables(String databaseName); Optional<List<String>> getAllTables(String databaseName);


Expand Down
Expand Up @@ -150,30 +150,30 @@ public synchronized Optional<Table> getTable(String databaseName, String tableNa
} }
} }


public synchronized Optional<Map<String, HiveColumnStatistics>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames) public synchronized Map<String, HiveColumnStatistics> getTableColumnStatistics(String databaseName, String tableName)
{ {
checkReadable(); checkReadable();
Action<TableAndMore> tableAction = tableActions.get(new SchemaTableName(databaseName, tableName)); Action<TableAndMore> tableAction = tableActions.get(new SchemaTableName(databaseName, tableName));
if (tableAction == null) { if (tableAction == null) {
return delegate.getTableColumnStatistics(databaseName, tableName, columnNames); return delegate.getTableColumnStatistics(databaseName, tableName);
} }
switch (tableAction.getType()) { switch (tableAction.getType()) {
case ADD: case ADD:
case ALTER: case ALTER:
case INSERT_EXISTING: case INSERT_EXISTING:
case DROP: case DROP:
return Optional.empty(); return ImmutableMap.of();
default: default:
throw new IllegalStateException("Unknown action type"); throw new IllegalStateException("Unknown action type");
} }
} }


public synchronized Optional<Map<String, Map<String, HiveColumnStatistics>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames) public synchronized Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames)
{ {
checkReadable(); checkReadable();
Optional<Table> table = getTable(databaseName, tableName); Optional<Table> table = getTable(databaseName, tableName);
if (!table.isPresent()) { if (!table.isPresent()) {
return Optional.empty(); return ImmutableMap.of();
} }
TableSource tableSource = getTableSource(databaseName, tableName); TableSource tableSource = getTableSource(databaseName, tableName);
Map<List<String>, Action<PartitionAndMore>> partitionActionsOfTable = partitionActions.computeIfAbsent(new SchemaTableName(databaseName, tableName), k -> new HashMap<>()); Map<List<String>, Action<PartitionAndMore>> partitionActionsOfTable = partitionActions.computeIfAbsent(new SchemaTableName(databaseName, tableName), k -> new HashMap<>());
Expand All @@ -199,14 +199,14 @@ public synchronized Optional<Map<String, Map<String, HiveColumnStatistics>>> get
} }
} }


Optional<Map<String, Map<String, HiveColumnStatistics>>> delegateResult = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesToQuery.build(), columnNames); Map<String, Map<String, HiveColumnStatistics>> delegateResult = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesToQuery.build());
if (delegateResult.isPresent()) { if (!delegateResult.isEmpty()) {
resultBuilder.putAll(delegateResult.get()); resultBuilder.putAll(delegateResult);
} }
else { else {
partitionNamesToQuery.build().forEach(partionName -> resultBuilder.put(partionName, ImmutableMap.of())); partitionNamesToQuery.build().forEach(partionName -> resultBuilder.put(partionName, ImmutableMap.of()));
} }
return Optional.of(resultBuilder.build()); return resultBuilder.build();
} }


/** /**
Expand Down
Expand Up @@ -270,15 +270,15 @@ public synchronized Optional<Table> getTable(String databaseName, String tableNa
} }


@Override @Override
public Optional<Map<String, HiveColumnStatistics>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames) public Map<String, HiveColumnStatistics> getTableColumnStatistics(String databaseName, String tableName)
{ {
return Optional.of(ImmutableMap.of()); return ImmutableMap.of();
} }


@Override @Override
public Optional<Map<String, Map<String, HiveColumnStatistics>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames) public Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames)
{ {
return Optional.of(ImmutableMap.of()); return ImmutableMap.of();
} }


private Table getRequiredTable(String databaseName, String tableName) private Table getRequiredTable(String databaseName, String tableName)
Expand Down
Expand Up @@ -217,15 +217,15 @@ private Table getTableOrElseThrow(String databaseName, String tableName)
} }


@Override @Override
public Optional<Map<String, HiveColumnStatistics>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames) public Map<String, HiveColumnStatistics> getTableColumnStatistics(String databaseName, String tableName)
{ {
return Optional.of(ImmutableMap.of()); return ImmutableMap.of();
} }


@Override @Override
public Optional<Map<String, Map<String, HiveColumnStatistics>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames) public Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames)
{ {
return Optional.of(ImmutableMap.of()); return ImmutableMap.of();
} }


@Override @Override
Expand Down
Expand Up @@ -16,6 +16,7 @@
import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.HiveType;
import com.facebook.presto.hive.HiveUtil; import com.facebook.presto.hive.HiveUtil;
import com.facebook.presto.hive.PartitionNotFoundException; import com.facebook.presto.hive.PartitionNotFoundException;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.Database;
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HiveColumnStatistics;
Expand Down Expand Up @@ -47,6 +48,8 @@
import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPrivilegeGrantInfo; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPrivilegeGrantInfo;
import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiTable; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiTable;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.util.Objects.requireNonNull; import static java.util.Objects.requireNonNull;
import static java.util.function.UnaryOperator.identity; import static java.util.function.UnaryOperator.identity;


Expand Down Expand Up @@ -80,29 +83,35 @@ public Optional<Table> getTable(String databaseName, String tableName)
} }


@Override @Override
public Optional<Map<String, HiveColumnStatistics>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames) public Map<String, HiveColumnStatistics> getTableColumnStatistics(String databaseName, String tableName)
{ {
return delegate.getTableColumnStatistics(databaseName, tableName, columnNames).map(this::groupStatisticsByColumn); Table table = getTable(databaseName, tableName)
.orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
Set<String> dataColumns = table.getDataColumns().stream()
.map(Column::getName)
.collect(toImmutableSet());
return groupStatisticsByColumn(delegate.getTableColumnStatistics(databaseName, tableName, dataColumns));
} }


@Override @Override
public Optional<Map<String, Map<String, HiveColumnStatistics>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames) public Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames)
{ {
return delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNames, columnNames).map( Table table = getTable(databaseName, tableName)
statistics -> ImmutableMap.copyOf( .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
statistics.entrySet().stream() Set<String> dataColumns = table.getDataColumns().stream()
.collect(Collectors.toMap( .map(Column::getName)
Map.Entry::getKey, .collect(toImmutableSet());
entry -> groupStatisticsByColumn(entry.getValue()))))); Map<String, Set<ColumnStatisticsObj>> statistics = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNames, dataColumns);
return statistics.entrySet()
.stream()
.filter(entry -> !entry.getValue().isEmpty())
.collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(entry.getValue())));
} }


private Map<String, HiveColumnStatistics> groupStatisticsByColumn(Set<ColumnStatisticsObj> statistics) private Map<String, HiveColumnStatistics> groupStatisticsByColumn(Set<ColumnStatisticsObj> statistics)
{ {
return ImmutableMap.copyOf( return statistics.stream()
statistics.stream() .collect(toImmutableMap(ColumnStatisticsObj::getColName, ThriftMetastoreUtil::fromMetastoreApiColumnStatistics));
.collect(Collectors.toMap(
ColumnStatisticsObj::getColName,
ThriftMetastoreUtil::fromMetastoreApiColumnStatistics)));
} }


@Override @Override
Expand Down
Expand Up @@ -72,9 +72,9 @@ public interface HiveMetastore


Optional<Table> getTable(String databaseName, String tableName); Optional<Table> getTable(String databaseName, String tableName);


Optional<Set<ColumnStatisticsObj>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames); Set<ColumnStatisticsObj> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames);


Optional<Map<String, Set<ColumnStatisticsObj>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames); Map<String, Set<ColumnStatisticsObj>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames);


Set<String> getRoles(String user); Set<String> getRoles(String user);


Expand Down
Expand Up @@ -25,6 +25,7 @@
import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.TableNotFoundException;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.TableType;
Expand Down Expand Up @@ -54,6 +55,7 @@
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
Expand All @@ -70,12 +72,12 @@
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Throwables.throwIfUnchecked; import static com.google.common.base.Throwables.throwIfUnchecked;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.collect.Sets.newHashSet; import static com.google.common.collect.Sets.newHashSet;
import static java.lang.String.format; import static java.lang.String.format;
import static java.util.Objects.requireNonNull; import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity; import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet; import static java.util.stream.Collectors.toSet;
import static org.apache.hadoop.hive.metastore.api.HiveObjectType.DATABASE; import static org.apache.hadoop.hive.metastore.api.HiveObjectType.DATABASE;
import static org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE; import static org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE;
Expand Down Expand Up @@ -229,20 +231,20 @@ private static boolean isPrestoView(Table table)
} }


@Override @Override
public Optional<Set<ColumnStatisticsObj>> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames) public Set<ColumnStatisticsObj> getTableColumnStatistics(String databaseName, String tableName, Set<String> columnNames)
{ {
try { try {
return retry() return retry()
.stopOn(NoSuchObjectException.class, HiveViewNotSupportedException.class) .stopOn(NoSuchObjectException.class, HiveViewNotSupportedException.class)
.stopOnIllegalExceptions() .stopOnIllegalExceptions()
.run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> { .run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> {
try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) {
return Optional.of(ImmutableSet.copyOf(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames)))); return ImmutableSet.copyOf(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames)));
} }
})); }));
} }
catch (NoSuchObjectException e) { catch (NoSuchObjectException e) {
return Optional.empty(); return ImmutableSet.of();
} }
catch (TException e) { catch (TException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e); throw new PrestoException(HIVE_METASTORE_ERROR, e);
Expand All @@ -253,7 +255,7 @@ public Optional<Set<ColumnStatisticsObj>> getTableColumnStatistics(String databa
} }


@Override @Override
public Optional<Map<String, Set<ColumnStatisticsObj>>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames) public Map<String, Set<ColumnStatisticsObj>> getPartitionColumnStatistics(String databaseName, String tableName, Set<String> partitionNames, Set<String> columnNames)
{ {
try { try {
return retry() return retry()
Expand All @@ -262,16 +264,14 @@ public Optional<Map<String, Set<ColumnStatisticsObj>>> getPartitionColumnStatist
.run("getPartitionColumnStatistics", stats.getGetPartitionColumnStatistics().wrap(() -> { .run("getPartitionColumnStatistics", stats.getGetPartitionColumnStatistics().wrap(() -> {
try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) {
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = client.getPartitionColumnStatistics(databaseName, tableName, ImmutableList.copyOf(partitionNames), ImmutableList.copyOf(columnNames)); Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = client.getPartitionColumnStatistics(databaseName, tableName, ImmutableList.copyOf(partitionNames), ImmutableList.copyOf(columnNames));
return Optional.of(partitionColumnStatistics.entrySet() return partitionColumnStatistics.entrySet()
.stream() .stream()
.collect(toMap( .collect(toImmutableMap(Entry::getKey, entry -> ImmutableSet.copyOf(entry.getValue())));
Map.Entry::getKey,
entry -> ImmutableSet.copyOf(entry.getValue()))));
} }
})); }));
} }
catch (NoSuchObjectException e) { catch (NoSuchObjectException e) {
return Optional.empty(); return ImmutableMap.of();
} }
catch (TException e) { catch (TException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e); throw new PrestoException(HIVE_METASTORE_ERROR, e);
Expand Down

0 comments on commit 4a4fbb4

Please sign in to comment.