Skip to content

Commit

Permalink
[CALCITE-6219] 'Must-filter' columns
Browse files Browse the repository at this point in the history
A table can declare that some of its columns must be filtered
by implementing `interface SemanticTable`. If such columns
are not filtered in a WHERE or HAVING clause, the validator
throws.

There are several purposes of these columns, one of which is
to prevent expensive full-table scans (for example, reading
all Orders without restricting on orderDate).

Implementation is via the method
SqlValidatorNamespace.getMustFilterFields(). For a table
namespace, that method returns the declared must-filter
fields. For a query namespce, that method returns any
must-filter fields that have not been filtered in that query;
such fields become the responsibility of the enclosing query.

If a field is must-filter and is not in the SELECT clause of a
sub-query, that is also an error, because of course it is now
impossible for the enclosing query to filter it.

Close apache#3688

Co-authored-by: Julian Hyde <jhyde@apache.org>
  • Loading branch information
olivrlee and julianhyde committed Feb 17, 2024
1 parent c774c31 commit 5f2a20d
Show file tree
Hide file tree
Showing 20 changed files with 653 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.calcite.config.CalciteSystemProperty;

import org.checkerframework.checker.nullness.qual.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -55,7 +56,7 @@ public class CalciteException extends RuntimeException {
@SuppressWarnings({"argument.type.incompatible", "method.invocation.invalid"})
public CalciteException(
String message,
Throwable cause) {
@Nullable Throwable cause) {
super(message, cause);

// TODO: Force the caller to pass in a Logger as a trace argument for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,9 @@ ExInst<RuntimeException> multipleCapturingGroupsForRegexpFunctions(String value,
@BaseMessage("A table function at most has one input table with row semantics. Table function ''{0}'' has multiple input tables with row semantics")
ExInst<SqlValidatorException> multipleRowSemanticsTables(String funcName);

@BaseMessage("SQL statement did not contain filters on the following fields: {0}")
ExInst<SqlValidatorException> mustFilterFieldsMissing(String mustFilterFields);

@BaseMessage("BIT_GET/GETBIT error: negative position {0,number} not allowed")
ExInst<CalciteException> illegalNegativeBitGetPosition(int position);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ private void registerId(SqlIdentifier id, SqlValidatorScope scope) {

@Override protected void validateNamespace(final SqlValidatorNamespace namespace,
RelDataType targetRowType) {
// Only attempt to validate each namespace once. Otherwise if
// Only attempt to validate each namespace once. Otherwise, if
// validation fails, we may end up cycling.
if (activeNamespaces.add(namespace)) {
super.validateNamespace(namespace, targetRowType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;

Expand All @@ -29,7 +30,8 @@
import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.List;
import java.util.Objects;

import static java.util.Objects.requireNonNull;

/**
* Abstract implementation of {@link SqlValidatorNamespace}.
Expand All @@ -55,6 +57,10 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
/** As {@link #rowType}, but not necessarily a struct. */
protected @Nullable RelDataType type;

/** Ordinals of fields that must be filtered. Initially the empty set, but
* should typically be re-assigned on validate. */
protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of();

protected final @Nullable SqlNode enclosingNode;

//~ Constructors -----------------------------------------------------------
Expand Down Expand Up @@ -86,8 +92,7 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
Preconditions.checkArgument(rowType == null,
"Namespace.rowType must be null before validate has been called");
RelDataType type = validateImpl(targetRowType);
Preconditions.checkArgument(type != null,
"validateImpl() returned null");
requireNonNull(type, "validateImpl() returned null");
setType(type);
} finally {
status = SqlValidatorImpl.Status.VALID;
Expand All @@ -107,17 +112,16 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
* External users should call {@link #validate}, which uses the
* {@link #status} field to protect against cycles.
*
* @return record data type, never null
*
* @param targetRowType Desired row type, must not be null, may be the data
* type 'unknown'.
* @return record data type, never null
*/
protected abstract RelDataType validateImpl(RelDataType targetRowType);

@Override public RelDataType getRowType() {
if (rowType == null) {
validator.validateNamespace(this, validator.unknownType);
Objects.requireNonNull(rowType, "validate must set rowType");
requireNonNull(rowType, "validate must set rowType");
}
return rowType;
}
Expand All @@ -128,7 +132,7 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {

@Override public RelDataType getType() {
Util.discard(getRowType());
return Objects.requireNonNull(type, "type");
return requireNonNull(type, "type");
}

@Override public void setType(RelDataType type) {
Expand Down Expand Up @@ -159,6 +163,11 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
return ImmutableList.of();
}

@Override public ImmutableBitSet getMustFilterFields() {
return requireNonNull(mustFilterFields,
"mustFilterFields (maybe validation is not complete?)");
}

@Override public SqlMonotonicity getMonotonicity(String columnName) {
return SqlMonotonicity.NOT_MONOTONIC;
}
Expand All @@ -179,7 +188,7 @@ public String translate(String name) {
return true;
}

@Override public <T extends Object> T unwrap(Class<T> clazz) {
@Override public <T> T unwrap(Class<T> clazz) {
return clazz.cast(this);
}

Expand Down Expand Up @@ -219,9 +228,7 @@ protected RelDataType toStruct(RelDataType type, @Nullable SqlNode unnest) {
return type;
}
return validator.getTypeFactory().builder()
.add(
SqlValidatorUtil.alias(Objects.requireNonNull(unnest, "unnest"), 0),
type)
.add(SqlValidatorUtil.alias(requireNonNull(unnest, "unnest"), 0), type)
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ protected DelegatingNamespace(SqlValidatorNamespace namespace) {
@Override public void makeNullable() {
}

@Override public <T extends Object> T unwrap(Class<T> clazz) {
@Override public <T> T unwrap(Class<T> clazz) {
if (clazz.isInstance(this)) {
return clazz.cast(this);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import static org.apache.calcite.util.Static.RESOURCE;

import static java.util.Objects.requireNonNull;

/**
* Namespace whose contents are defined by the type of an
* {@link org.apache.calcite.sql.SqlIdentifier identifier}.
Expand Down Expand Up @@ -77,7 +76,7 @@ public class IdentifierNamespace extends AbstractNamespace {
super(validator, enclosingNode);
this.id = id;
this.extendList = extendList;
this.parentScope = Objects.requireNonNull(parentScope, "parentScope");
this.parentScope = requireNonNull(parentScope, "parentScope");
}

IdentifierNamespace(SqlValidatorImpl validator, SqlNode node,
Expand Down Expand Up @@ -187,33 +186,31 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) {

@Override public RelDataType validateImpl(RelDataType targetRowType) {
resolvedNamespace = resolveImpl(id);
if (resolvedNamespace instanceof TableNamespace) {
SqlValidatorTable table = ((TableNamespace) resolvedNamespace).getTable();
if (validator.config().identifierExpansion()) {
validator.validateNamespace(resolvedNamespace, targetRowType);

if (validator.config().identifierExpansion()) {
SqlValidatorTable table = resolvedNamespace.getTable();
if (table != null) {
// TODO: expand qualifiers for column references also
List<String> qualifiedNames = table.getQualifiedName();
if (qualifiedNames != null) {
// Assign positions to the components of the fully-qualified
// identifier, as best we can. We assume that qualification
// adds names to the front, e.g. FOO.BAR becomes BAZ.FOO.BAR.
List<SqlParserPos> poses =
new ArrayList<>(
Collections.nCopies(
qualifiedNames.size(), id.getParserPosition()));
int offset = qualifiedNames.size() - id.names.size();

// Test offset in case catalog supports fewer qualifiers than catalog
// reader.
if (offset >= 0) {
for (int i = 0; i < id.names.size(); i++) {
poses.set(i + offset, id.getComponentParserPosition(i));
}
}
id.setNames(qualifiedNames, poses);
// Assign positions to the components of the fully-qualified
// identifier, as best we can. We assume that qualification
// adds names to the front, e.g. FOO.BAR becomes BAZ.FOO.BAR.
// Test offset in case catalog supports fewer qualifiers than catalog
// reader.
ImmutableList.Builder<SqlParserPos> positions =
ImmutableList.builder();
int offset = qualifiedNames.size() - id.names.size();
for (int i = 0; i < qualifiedNames.size(); i++) {
positions.add(offset >= 0 && i >= offset
? id.getComponentParserPosition(i - offset)
: id.getParserPosition());
}
id.setNames(qualifiedNames, positions.build());
}
}

this.mustFilterFields = resolvedNamespace.getMustFilterFields();
RelDataType rowType = resolvedNamespace.getRowType();

if (extendList != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

import static org.apache.calcite.util.Static.RESOURCE;

import static java.util.Objects.requireNonNull;

/**
* Abstract base for a scope which is defined by a list of child namespaces and
* which inherits from a parent scope.
Expand All @@ -58,7 +60,7 @@ protected ListScope(SqlValidatorScope parent) {

@Override public void addChild(SqlValidatorNamespace ns, String alias,
boolean nullable) {
Objects.requireNonNull(alias, "alias");
requireNonNull(alias, "alias");
children.add(new ScopeChild(children.size(), alias, ns, nullable));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.sql.validate;

import org.checkerframework.checker.nullness.qual.Nullable;

/**
* Extension to {@link SqlValidatorTable} with extra, optional metadata.
*
* <p>Used to flag individual columns as 'must-filter'.
*/
public interface SemanticTable {
/** Returns the filter expression for {@code column}
* if it is a {@link #mustFilter(int) must-filter} column,
* or null if it is not a must-filter column.
*
* @param column Column ordinal (0-based)
*
* @throws IndexOutOfBoundsException if column ordinal is out of range */
default @Nullable String getFilter(int column) {
return null;
}

/** Returns whether {@code column} must be filtered in any query
* that references this table.
*
* @param column Column ordinal (0-based)
*
* @throws IndexOutOfBoundsException if column ordinal is out of range */
default boolean mustFilter(int column) {
return getFilter(column) != null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import java.util.List;

import static java.util.Objects.hash;

/**
* Fully-qualified identifier.
*
Expand All @@ -46,8 +48,31 @@ private SqlQualified(@Nullable SqlValidatorScope scope, int prefixLength,
this.identifier = identifier;
}

@Override public int hashCode() {
return hash(identifier.names, prefixLength);
}

@Override public boolean equals(@Nullable Object obj) {
// Two SqlQualified instances are equivalent if they are of the same
// identifier and same prefix length. Thus, in
//
// SELECT e.address, e.address.zipcode
// FROM employees AS e
//
// "e.address" is {identifier=[e, address], prefixLength=1}
// and is distinct from "e.address.zipcode".
//
// We assume that all SqlQualified instances being compared are resolved
// from the same SqlValidatorScope, and therefore we do not need to look
// at namespace to distinguish them.
return this == obj
|| obj instanceof SqlQualified
&& prefixLength == ((SqlQualified) obj).prefixLength
&& identifier.names.equals(((SqlQualified) obj).identifier.names);
}

@Override public String toString() {
return "{id: " + identifier.toString() + ", prefix: " + prefixLength + "}";
return "{id: " + identifier + ", prefix: " + prefixLength + "}";
}

public static SqlQualified create(@Nullable SqlValidatorScope scope, int prefixLength,
Expand Down

0 comments on commit 5f2a20d

Please sign in to comment.