Skip to content

Commit

Permalink
Allow using filter when materializing BigQuery views
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Apr 10, 2024
1 parent e5050c2 commit 22151a5
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/src/main/sphinx/connector/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ a few caveats:
| `bigquery.view-materialization-project` | The project where the materialized view is going to be created | The view's project |
| `bigquery.view-materialization-dataset` | The dataset where the materialized view is going to be created | The view's dataset |
| `bigquery.skip-view-materialization` | Use REST API to access views instead of Storage API. BigQuery `BIGNUMERIC` and `TIMESTAMP` types are unsupported. | `false` |
| `bigqueryview-materialization-with-filter` | Use filter condition when materializng views. | `false` |
| `bigquery.views-cache-ttl` | Duration for which the materialization of a view will be cached and reused. Set to `0ms` to disable the cache. | `15m` |
| `bigquery.metadata.cache-ttl` | Duration for which metadata retrieved from BigQuery is cached and reused. Set to `0ms` to disable the cache. | `0ms` |
| `bigquery.max-read-rows-retries` | The number of retries in case of retryable server issues | `3` |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public class BigQueryClient
private final BigQueryLabelFactory labelFactory;
private final BigQueryTypeManager typeManager;
private final ViewMaterializationCache materializationCache;
private final boolean isViewMaterializationWithFilter;
private final boolean caseInsensitiveNameMatching;
private final LoadingCache<String, List<DatasetId>> remoteDatasetIdCache;
private final Optional<String> configProjectId;
Expand All @@ -98,13 +99,15 @@ public BigQueryClient(
BigQueryTypeManager typeManager,
boolean caseInsensitiveNameMatching,
ViewMaterializationCache materializationCache,
boolean isViewMaterializationWithFilter,
Duration metadataCacheTtl,
Optional<String> configProjectId)
{
this.bigQuery = requireNonNull(bigQuery, "bigQuery is null");
this.labelFactory = requireNonNull(labelFactory, "labelFactory is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.materializationCache = requireNonNull(materializationCache, "materializationCache is null");
this.isViewMaterializationWithFilter = isViewMaterializationWithFilter;
this.caseInsensitiveNameMatching = caseInsensitiveNameMatching;
this.remoteDatasetIdCache = EvictableCacheBuilder.newBuilder()
.expireAfterWrite(metadataCacheTtl.toMillis(), MILLISECONDS)
Expand Down Expand Up @@ -201,9 +204,9 @@ public Optional<TableInfo> getTable(TableId remoteTableId)
return Optional.ofNullable(bigQuery.getTable(remoteTableId));
}

public TableInfo getCachedTable(Duration viewExpiration, TableInfo remoteTableId, List<String> requiredColumns)
public TableInfo getCachedTable(Duration viewExpiration, TableInfo remoteTableId, List<String> requiredColumns, Optional<String> filter)
{
String query = selectSql(remoteTableId, requiredColumns);
String query = isViewMaterializationWithFilter ? selectSql(remoteTableId.getTableId(), requiredColumns, filter) : selectSql(remoteTableId, requiredColumns);
log.debug("query is %s", query);
return materializationCache.getCachedTable(this, query, viewExpiration, remoteTableId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import static io.trino.cache.CacheUtils.uncheckedCacheGet;
import static io.trino.cache.SafeCaches.buildNonEvictableCache;
import static io.trino.plugin.bigquery.BigQuerySessionProperties.isViewMaterializationWithFilter;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

Expand Down Expand Up @@ -75,7 +76,7 @@ public BigQueryClient create(ConnectorSession session)

protected BigQueryClient createBigQueryClient(ConnectorSession session)
{
return new BigQueryClient(createBigQuery(session), labelFactory, typeManager, caseInsensitiveNameMatching, materializationCache, metadataCacheTtl, projectId);
return new BigQueryClient(createBigQuery(session), labelFactory, typeManager, caseInsensitiveNameMatching, materializationCache, isViewMaterializationWithFilter(session), metadataCacheTtl, projectId);
}

protected BigQuery createBigQuery(ConnectorSession session)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class BigQueryConfig
private boolean arrowSerializationEnabled;
private Duration viewExpireDuration = new Duration(24, HOURS);
private boolean skipViewMaterialization;
private boolean viewMaterializationWithFilter;
private Optional<String> viewMaterializationProject = Optional.empty();
private Optional<String> viewMaterializationDataset = Optional.empty();
private int maxReadRowsRetries = DEFAULT_MAX_READ_ROWS_RETRIES;
Expand Down Expand Up @@ -153,6 +154,19 @@ public BigQueryConfig setSkipViewMaterialization(boolean skipViewMaterialization
return this;
}

public boolean isViewMaterializationWithFilter()
{
return viewMaterializationWithFilter;
}

@Config("bigquery.view-materialization-with-filter")
@ConfigDescription("Use filter when materializing views")
public BigQueryConfig setViewMaterializationWithFilter(boolean viewMaterializationWithFilter)
{
this.viewMaterializationWithFilter = viewMaterializationWithFilter;
return this;
}

public Optional<String> getViewMaterializationProject()
{
return viewMaterializationProject;
Expand Down Expand Up @@ -332,5 +346,8 @@ public void validate()
if (skipViewMaterialization) {
checkState(viewsEnabled, "%s config property must be enabled when skipping view materialization", VIEWS_ENABLED);
}
if (viewMaterializationWithFilter) {
checkState(viewsEnabled, "%s config property must be enabled when view materialization with filter is enabled", VIEWS_ENABLED);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public final class BigQuerySessionProperties
implements SessionPropertiesProvider
{
private static final String SKIP_VIEW_MATERIALIZATION = "skip_view_materialization";
private static final String VIEW_MATERIALIZATION_WITH_FILTER = "view_materialization_with_filter";
private static final String QUERY_RESULTS_CACHE_ENABLED = "query_results_cache_enabled";
private static final String CREATE_DISPOSITION_TYPE = "create_disposition_type";

Expand All @@ -43,6 +44,11 @@ public BigQuerySessionProperties(BigQueryConfig config)
"Skip materializing views",
config.isSkipViewMaterialization(),
false))
.add(booleanProperty(
VIEW_MATERIALIZATION_WITH_FILTER,
"Materialize views with filters",
config.isViewMaterializationWithFilter(),
false))
.add(booleanProperty(
QUERY_RESULTS_CACHE_ENABLED,
"Enable query results cache",
Expand All @@ -68,6 +74,11 @@ public static boolean isSkipViewMaterialization(ConnectorSession session)
return session.getProperty(SKIP_VIEW_MATERIALIZATION, Boolean.class);
}

public static boolean isViewMaterializationWithFilter(ConnectorSession session)
{
return session.getProperty(VIEW_MATERIALIZATION_WITH_FILTER, Boolean.class);
}

public static boolean isQueryResultsCacheEnabled(ConnectorSession session)
{
return session.getProperty(QUERY_RESULTS_CACHE_ENABLED, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public ReadSession create(ConnectorSession session, TableId remoteTable, List<St
TableInfo tableDetails = client.getTable(remoteTable)
.orElseThrow(() -> new TableNotFoundException(new SchemaTableName(remoteTable.getDataset(), remoteTable.getTable())));

TableInfo actualTable = getActualTable(client, tableDetails, selectedFields);
TableInfo actualTable = getActualTable(client, tableDetails, selectedFields, filter);

List<String> filteredSelectedFields = selectedFields.stream()
.map(BigQueryUtil::toBigQueryColumnName)
Expand Down Expand Up @@ -145,7 +145,8 @@ String toTableResourceName(TableId tableId)
private TableInfo getActualTable(
BigQueryClient client,
TableInfo remoteTable,
List<String> requiredColumns)
List<String> requiredColumns,
Optional<String> filter)
{
TableDefinition tableDefinition = remoteTable.getDefinition();
TableDefinition.Type tableType = tableDefinition.getType();
Expand All @@ -159,7 +160,7 @@ private TableInfo getActualTable(
BigQueryConfig.VIEWS_ENABLED));
}
// get it from the view
return client.getCachedTable(viewExpiration, remoteTable, requiredColumns);
return client.getCachedTable(viewExpiration, remoteTable, requiredColumns, filter);
}
// Storage API doesn't support reading other table types (materialized views, external)
throw new TrinoException(NOT_SUPPORTED, format("Table type '%s' of table '%s.%s' is not supported",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public void testDefaults()
.setParallelism(null)
.setViewExpireDuration(new Duration(24, HOURS))
.setSkipViewMaterialization(false)
.setViewMaterializationWithFilter(false)
.setViewMaterializationProject(null)
.setViewMaterializationDataset(null)
.setMaxReadRowsRetries(3)
Expand Down Expand Up @@ -66,6 +67,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
.put("bigquery.experimental.arrow-serialization.enabled", "true")
.put("bigquery.view-expire-duration", "30m")
.put("bigquery.skip-view-materialization", "true")
.put("bigquery.view-materialization-with-filter", "true")
.put("bigquery.view-materialization-project", "vmproject")
.put("bigquery.view-materialization-dataset", "vmdataset")
.put("bigquery.max-read-rows-retries", "10")
Expand All @@ -88,6 +90,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
.setArrowSerializationEnabled(true)
.setViewExpireDuration(new Duration(30, MINUTES))
.setSkipViewMaterialization(true)
.setViewMaterializationWithFilter(true)
.setViewMaterializationProject("vmproject")
.setViewMaterializationDataset("vmdataset")
.setMaxReadRowsRetries(10)
Expand Down Expand Up @@ -120,5 +123,12 @@ public void testInvalidViewSetting()
.validate())
.isInstanceOf(IllegalStateException.class)
.hasMessage("bigquery.views-enabled config property must be enabled when skipping view materialization");

assertThatThrownBy(() -> new BigQueryConfig()
.setViewMaterializationWithFilter(true)
.setViewsEnabled(false)
.validate())
.isInstanceOf(IllegalStateException.class)
.hasMessage("bigquery.views-enabled config property must be enabled when view materialization with filter is enabled");
}
}

0 comments on commit 22151a5

Please sign in to comment.