Skip to content

Commit

Permalink
Finer grained cache missing control in caching hive metastore
Browse files Browse the repository at this point in the history
Allow caching missing stats without caching missing tables.  This allows
leveraging caching on un-analyzed Hive tables (with associated caching's
trade-offs) without having to opt-in into caching missing tables, which
has different set of trade-offs.
  • Loading branch information
findepi committed May 6, 2024
1 parent 52b7471 commit 0c16330
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.google.common.cache.CacheLoader.InvalidCacheLoadException;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets.SetView;
import com.google.common.util.concurrent.ListenableFuture;
Expand Down Expand Up @@ -89,6 +90,9 @@
import static io.trino.plugin.hive.metastore.HivePartitionName.hivePartitionName;
import static io.trino.plugin.hive.metastore.HiveTableName.hiveTableName;
import static io.trino.plugin.hive.metastore.PartitionFilter.partitionFilter;
import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.OTHER;
import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.PARTITION;
import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.STATS;
import static io.trino.plugin.hive.util.HiveUtil.makePartName;
import static java.util.Collections.unmodifiableSet;
import static java.util.Objects.requireNonNull;
Expand All @@ -107,8 +111,14 @@ public enum StatsRecording
DISABLED
}

public enum ObjectType {
PARTITION,
STATS,
OTHER,
}

private final HiveMetastore delegate;
private final boolean cacheMissing;
private final Set<ObjectType> cacheMissing;
private final LoadingCache<String, Optional<Database>> databaseCache;
private final LoadingCache<String, List<String>> databaseNamesCache;
private final LoadingCache<HiveTableName, Optional<Table>> tableCache;
Expand All @@ -126,7 +136,7 @@ public static CachingHiveMetastore createPerTransactionCache(HiveMetastore deleg
{
return new CachingHiveMetastore(
delegate,
true,
ImmutableSet.copyOf(ObjectType.values()),
new CacheFactory(maximumSize),
new CacheFactory(maximumSize),
new CacheFactory(maximumSize),
Expand All @@ -141,8 +151,8 @@ public static CachingHiveMetastore createCachingHiveMetastore(
Executor refreshExecutor,
long maximumSize,
StatsRecording statsRecording,
boolean cacheMissing,
boolean partitionCacheEnabled)
boolean partitionCacheEnabled,
Set<ObjectType> cacheMissing)
{
// refresh executor is only required when the refresh interval is set, but the executor is
// always set, so it is simpler to just enforce that
Expand Down Expand Up @@ -183,7 +193,7 @@ public static CachingHiveMetastore createCachingHiveMetastore(

private CachingHiveMetastore(
HiveMetastore delegate,
boolean cacheMissing,
Set<ObjectType> cacheMissing,
CacheFactory cacheFactory,
CacheFactory partitionCacheFactory,
CacheFactory statsCacheFactory,
Expand Down Expand Up @@ -261,14 +271,14 @@ private static <K, V> V get(LoadingCache<K, V> cache, K key)
}
}

private <K, V> Optional<V> getOptional(LoadingCache<K, Optional<V>> cache, K key)
private <K, V> Optional<V> getOptional(ObjectType objectType, LoadingCache<K, Optional<V>> cache, K key)
{
try {
Optional<V> value = cache.getIfPresent(key);
@SuppressWarnings("OptionalAssignedToNull")
boolean valueIsPresent = value != null;
if (valueIsPresent) {
if (value.isPresent() || cacheMissing) {
if (value.isPresent() || cacheMissing.contains(objectType)) {
return value;
}
cache.invalidate(key);
Expand Down Expand Up @@ -396,7 +406,7 @@ private static <K, V> Map<K, V> getAll(
@Override
public Optional<Database> getDatabase(String databaseName)
{
return getOptional(databaseCache, databaseName);
return getOptional(OTHER, databaseCache, databaseName);
}

private Optional<Database> loadDatabase(String databaseName)
Expand All @@ -418,7 +428,7 @@ private List<String> loadAllDatabases()
@Override
public Optional<Table> getTable(String databaseName, String tableName)
{
return getOptional(tableCache, hiveTableName(databaseName, tableName));
return getOptional(OTHER, tableCache, hiveTableName(databaseName, tableName));
}

private Optional<Table> loadTable(HiveTableName hiveTableName)
Expand Down Expand Up @@ -481,7 +491,7 @@ private Map<String, HiveColumnStatistics> mergeColumnStatistics(Map<String, Hive
requireNonNull(newStats, "newStats is null");
ImmutableMap.Builder<String, HiveColumnStatistics> columnStatisticsBuilder = ImmutableMap.builder();
// Populate empty statistics for all requested columns to cache absence of column statistics for future requests.
if (cacheMissing) {
if (cacheMissing.contains(STATS)) {
columnStatisticsBuilder.putAll(Iterables.transform(
dataColumns,
column -> new AbstractMap.SimpleEntry<>(column, HiveColumnStatistics.empty())));
Expand Down Expand Up @@ -741,7 +751,7 @@ public Optional<List<String>> getPartitionNamesByFilter(
List<String> columnNames,
TupleDomain<String> partitionKeysFilter)
{
return getOptional(partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter));
return getOptional(PARTITION, partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter));
}

private Optional<List<String>> loadPartitionNamesByFilter(PartitionFilter partitionFilter)
Expand Down Expand Up @@ -949,7 +959,7 @@ public Set<HivePrivilegeInfo> listTablePrivileges(String databaseName, String ta
@Override
public Optional<String> getConfigValue(String name)
{
return getOptional(configValuesCache, name);
return getOptional(OTHER, configValuesCache, name);
}

private Optional<String> loadConfigValue(String name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.Optional;

import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.collect.Comparators.max;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;
Expand All @@ -38,8 +39,10 @@ public class CachingHiveMetastoreConfig
private Optional<Duration> metastoreRefreshInterval = Optional.empty();
private long metastoreCacheMaximumSize = 10000;
private int maxMetastoreRefreshThreads = 10;
private boolean cacheMissing = true;
private boolean partitionCacheEnabled = true;
private boolean cacheMissing = true;
private Boolean cacheMissingPartitions;
private Boolean cacheMissingStats;

@NotNull
public Duration getMetastoreCacheTtl()
Expand Down Expand Up @@ -106,6 +109,18 @@ public CachingHiveMetastoreConfig setMaxMetastoreRefreshThreads(int maxMetastore
return this;
}

public boolean isPartitionCacheEnabled()
{
return partitionCacheEnabled;
}

@Config("hive.metastore-cache.cache-partitions")
public CachingHiveMetastoreConfig setPartitionCacheEnabled(boolean enabled)
{
this.partitionCacheEnabled = enabled;
return this;
}

public boolean isCacheMissing()
{
return cacheMissing;
Expand All @@ -118,15 +133,27 @@ public CachingHiveMetastoreConfig setCacheMissing(boolean cacheMissing)
return this;
}

public boolean isPartitionCacheEnabled()
public boolean isCacheMissingPartitions()
{
return partitionCacheEnabled;
return firstNonNull(cacheMissingPartitions, cacheMissing);
}

@Config("hive.metastore-cache.cache-partitions")
public CachingHiveMetastoreConfig setPartitionCacheEnabled(boolean enabled)
@Config("hive.metastore-cache.cache-missing-partitions")
public CachingHiveMetastoreConfig setCacheMissingPartitions(boolean cacheMissingPartitions)
{
this.partitionCacheEnabled = enabled;
this.cacheMissingPartitions = cacheMissingPartitions;
return this;
}

public boolean isCacheMissingStats()
{
return firstNonNull(cacheMissingStats, cacheMissing);
}

@Config("hive.metastore-cache.cache-missing-stats")
public CachingHiveMetastoreConfig setCacheMissingStats(boolean cacheMissingStats)
{
this.cacheMissingStats = cacheMissingStats;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
import com.google.common.cache.CacheLoader;
import com.google.common.cache.CacheStats;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.math.LongMath;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.inject.Inject;
import io.airlift.units.Duration;
import io.trino.plugin.hive.metastore.HiveMetastore;
import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType;
import io.trino.spi.NodeManager;
import io.trino.spi.TrinoException;
import io.trino.spi.catalog.CatalogName;
Expand All @@ -35,6 +37,7 @@
import org.weakref.jmx.Nested;

import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.function.Function;

Expand All @@ -61,7 +64,7 @@ public class SharedHiveMetastoreCache
private final Duration userMetastoreCacheTtl;
private final long userMetastoreCacheMaximumSize;
private final boolean metastorePartitionCacheEnabled;
private final boolean cacheMissing;
private final Set<ObjectType> cacheMissing;

private ExecutorService executorService;

Expand All @@ -83,7 +86,17 @@ public SharedHiveMetastoreCache(
metastoreRefreshInterval = config.getMetastoreRefreshInterval();
metastoreCacheMaximumSize = config.getMetastoreCacheMaximumSize();
metastorePartitionCacheEnabled = config.isPartitionCacheEnabled();
cacheMissing = config.isCacheMissing();
ImmutableSet.Builder<ObjectType> cacheMissing = ImmutableSet.builder();
if (config.isCacheMissing()) {
cacheMissing.add(ObjectType.OTHER);
}
if (config.isCacheMissingPartitions()) {
cacheMissing.add(ObjectType.PARTITION);
}
if (config.isCacheMissingStats()) {
cacheMissing.add(ObjectType.STATS);
}
this.cacheMissing = cacheMissing.build();

userMetastoreCacheTtl = impersonationCachingConfig.getUserMetastoreCacheTtl();
userMetastoreCacheMaximumSize = impersonationCachingConfig.getUserMetastoreCacheMaximumSize();
Expand Down Expand Up @@ -147,8 +160,8 @@ private CachingHiveMetastore createCachingHiveMetastore(HiveMetastoreFactory met
new ReentrantBoundedExecutor(executorService, maxMetastoreRefreshThreads),
metastoreCacheMaximumSize,
CachingHiveMetastore.StatsRecording.ENABLED,
cacheMissing,
metastorePartitionCacheEnabled);
metastorePartitionCacheEnabled,
cacheMissing);
}

public static class CachingHiveMetastoreFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ private static CachingHiveMetastore createCachingHiveMetastore(HiveMetastore hiv
executor,
1000,
CachingHiveMetastore.StatsRecording.ENABLED,
cacheMissing,
partitionCacheEnabled);
partitionCacheEnabled,
cacheMissing ? ImmutableSet.copyOf(CachingHiveMetastore.ObjectType.values()) : ImmutableSet.of());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ public void testDefaults()
.setMetastoreRefreshInterval(null)
.setMetastoreCacheMaximumSize(10000)
.setMaxMetastoreRefreshThreads(10)
.setPartitionCacheEnabled(true)
.setCacheMissing(true)
.setPartitionCacheEnabled(true));
.setCacheMissingPartitions(true)
.setCacheMissingStats(true));
}

@Test
Expand All @@ -56,6 +58,8 @@ public void testExplicitPropertyMappings()
.put("hive.metastore-refresh-max-threads", "2500")
.put("hive.metastore-cache.cache-partitions", "false")
.put("hive.metastore-cache.cache-missing", "false")
.put("hive.metastore-cache.cache-missing-partitions", "false")
.put("hive.metastore-cache.cache-missing-stats", "false")
.buildOrThrow();

CachingHiveMetastoreConfig expected = new CachingHiveMetastoreConfig()
Expand All @@ -64,8 +68,10 @@ public void testExplicitPropertyMappings()
.setMetastoreRefreshInterval(new Duration(30, MINUTES))
.setMetastoreCacheMaximumSize(5000)
.setMaxMetastoreRefreshThreads(2500)
.setPartitionCacheEnabled(false)
.setCacheMissing(false)
.setPartitionCacheEnabled(false);
.setCacheMissingPartitions(false)
.setCacheMissingStats(false);

assertFullMapping(properties, expected);
}
Expand Down

0 comments on commit 0c16330

Please sign in to comment.