Skip to content

Commit

Permalink
FIX: Refactor computeMetaData and separate propagateNull() from compu…
Browse files Browse the repository at this point in the history
…tePropagateNulls() API. (facebookincubator#5287)

Summary:
Pull Request resolved: facebookincubator#5287

This fix a set of expression fuzzer failures such as facebookincubator#5059.

The diff split propagateNull() and computePropagateNulls() API. previously propagateNull()
used to retrieve the propagateNull property for some expressions and re-compute it for others.

with this change propagateNull() only accesses the pre-computed property. And
computePropagateNulls() is called from computeMetaData to compute the property
for some expressions.

The diff also refactor computeMetaData for better readability, it used to interleave computations
of different properties and combine things in a hard to track manner.

Following diffs will address two remaining issues:
- The re-compute that happens over and over during query compilations.
- Avoid emptying distinct fields if shared with parents and simplify the API.

Reviewed By: kagamiori

Differential Revision: D46457021

fbshipit-source-id: 5d8479132d3b157a9245dbe761e9e978cf4255fc
  • Loading branch information
laithsakka authored and facebook-github-bot committed Jun 22, 2023
1 parent 866b50c commit 9725f85
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 69 deletions.
1 change: 0 additions & 1 deletion velox/docs/develop/expression-evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -453,4 +453,3 @@ SWITCH expression evaluation goes through the following steps:
SWITCH expression sets EvalCtx::isFinalSelection flag to false. The expressions
are expected to use this flag to decide whether the partially populated result
vector must be preserved or can be overwritten.

6 changes: 3 additions & 3 deletions velox/expression/CoalesceExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ class CoalesceExpr : public SpecialForm {
EvalCtx& context,
VectorPtr& result) override;

bool propagatesNulls() const override {
return false;
private:
void computePropagatesNulls() override {
propagatesNulls_ = false;
}

private:
static TypePtr resolveType(const std::vector<TypePtr>& argTypes);

friend class CoalesceCallToSpecialForm;
Expand Down
9 changes: 5 additions & 4 deletions velox/expression/ConjunctExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@ class ConjunctExpr : public SpecialForm {
EvalCtx& context,
VectorPtr& result) override;

bool propagatesNulls() const override {
return false;
}

bool isConditional() const override {
return true;
}
Expand All @@ -74,7 +70,12 @@ class ConjunctExpr : public SpecialForm {
private:
static TypePtr resolveType(const std::vector<TypePtr>& argTypes);

void computePropagatesNulls() override {
propagatesNulls_ = false;
}

void maybeReorderInputs();

void updateResult(
BaseVector* inputResult,
EvalCtx& context,
Expand Down
101 changes: 59 additions & 42 deletions velox/expression/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "velox/common/base/Fs.h"
#include "velox/common/base/SuccinctPrinter.h"
#include "velox/core/Expressions.h"
#include "velox/expression/CastExpr.h"
#include "velox/expression/ConstantExpr.h"
#include "velox/expression/Expr.h"
#include "velox/expression/ExprCompiler.h"
Expand Down Expand Up @@ -188,64 +189,80 @@ bool Expr::allSupportFlatNoNullsFastPath(
}

void Expr::computeMetadata() {
// Sets propagatesNulls_ if a null in any of the columns this
// depends on makes the Expr null. If the set of fields
// null-propagating arguments depend on is a superset of the fields
// non null-propagating arguments depend on and the function itself
// has default null behavior, then the Expr propagates nulls. Sets
// isDeterministic to false if some subtree is
// non-deterministic. Sets 'distinctFields_' to be the union of
// 'distinctFields_' of inputs. If one of the inputs has the
// identical set of distinct fields, then the input's distinct
// fields are set to empty.
bool isNullPropagatingFunction = false;
if (isSpecialForm()) {
// 'propagatesNulls_' will be adjusted after inputs are processed.
propagatesNulls_ = true;
deterministic_ = true;
} else if (vectorFunction_) {
// Compute metadata for all the inputs.
for (auto& input : inputs_) {
input->computeMetadata();
}

// (1) Compute deterministic_.
// An expression is deterministic if it is a deterministic function call or a
// special form, and all its inputs are also deterministic.
if (vectorFunction_) {
deterministic_ = vectorFunction_->isDeterministic();
isNullPropagatingFunction = vectorFunction_->isDefaultNullBehavior();
propagatesNulls_ = isNullPropagatingFunction;
} else {
VELOX_CHECK(isSpecialForm());
deterministic_ = true;
}

std::vector<FieldReference*> nullPropagatingFields;
std::vector<FieldReference*> nonNullPropagatingFields;
std::unordered_set<FieldReference*> ignore;
for (auto& input : inputs_) {
// Skip computing for inputs already marked as multiply referenced as they
// would have it computed already.
if (!input->isMultiplyReferenced_) {
input->computeMetadata();
}
deterministic_ &= input->deterministic_;
if (!input->distinctFields_.empty()) {
if (!isNullPropagatingFunction) {
propagatesNulls_ &= input->propagatesNulls_;
} else if (input->propagatesNulls_) {
mergeFields(nullPropagatingFields, ignore, input->distinctFields_);
} else {
mergeFields(nonNullPropagatingFields, ignore, input->distinctFields_);
}
}
}

// (2) Compute distinctFields_ and multiplyReferencedFields_.
for (auto& input : inputs_) {
mergeFields(
distinctFields_, multiplyReferencedFields_, input->distinctFields_);
}
if (isSpecialForm()) {
propagatesNulls_ = propagatesNulls();
} else if (isNullPropagatingFunction) {
propagatesNulls_ =
isSubsetOfFields(nonNullPropagatingFields, nullPropagatingFields);

// (3) Compute propagatesNulls_.
// propagatesNulls_ is true iff a null in any of the columns this
// depends on makes the Expr null.

if (isSpecialForm() && !is<ConstantExpr>() && !is<FieldReference>() &&
!is<CastExpr>()) {
as<SpecialForm>()->computePropagatesNulls();
} else {
propagatesNulls_ = false;
if (vectorFunction_ && !vectorFunction_->isDefaultNullBehavior()) {
propagatesNulls_ = false;
} else {
// Logic for handling default-null vector functions.
// cast, constant and fieldReference expressions act as vector functions
// with default null behavior.

// If the function has default null behavior, the Expr propagates nulls if
// the set of fields null-propagating arguments depend on is a superset of
// the fields non null-propagating arguments depend on.
std::unordered_set<FieldReference*> nullPropagating, nonNullPropagating;
for (auto& input : inputs_) {
if (input->propagatesNulls_) {
nullPropagating.insert(
input->distinctFields_.begin(), input->distinctFields_.end());
} else {
nonNullPropagating.insert(
input->distinctFields_.begin(), input->distinctFields_.end());
}
}

// propagatesNulls_ is true if nonNullPropagating is subset of
// nullPropagating.
propagatesNulls_ = true;
for (auto* field : nonNullPropagating) {
if (!nullPropagating.count(field)) {
propagatesNulls_ = false;
break;
}
}
}
}

for (auto& input : inputs_) {
if (!input->isMultiplyReferenced_ &&
isSameFields(distinctFields_, input->distinctFields_)) {
input->distinctFields_.clear();
}
}

// (5) Compute hasConditionals_.
hasConditionals_ = hasConditionals(this);
}

Expand All @@ -272,7 +289,7 @@ void rethrowFirstError(
// has errors, throws the first error in 'argumentErrors' scoped to 'rows'.
// Otherwise sets 'errors()' of 'context' to the union of the errors. This is
// used after all arguments of a function call have been evaluated and
// we decide on whether to throw or what errors to leave in 'context' for the
// we decide on whether to throw or what errors to leave in 'context' for the
// caller.
void mergeOrThrowArgumentErrors(
const SelectivityVector& rows,
Expand Down
17 changes: 16 additions & 1 deletion velox/expression/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,24 @@ class Expr {
isMultiplyReferenced_ = true;
}

template <typename T>
const T* as() const {
return dynamic_cast<const T*>(this);
}

template <typename T>
T* as() {
return dynamic_cast<T*>(this);
}

template <typename T>
bool is() const {
return as<T>() != nullptr;
}

// True if 'this' Expr tree is null for a null in any of the columns
// this depends on.
virtual bool propagatesNulls() const {
bool propagatesNulls() const {
return propagatesNulls_;
}

Expand Down
10 changes: 5 additions & 5 deletions velox/expression/LambdaExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,6 @@ class LambdaExpr : public SpecialForm {
std::string toSql(
std::vector<VectorPtr>* complexConstants = nullptr) const override;

bool propagatesNulls() const override {
// A null capture does not result in a null function.
return false;
}

void evalSpecialForm(
const SelectivityVector& rows,
EvalCtx& context,
Expand All @@ -71,6 +66,11 @@ class LambdaExpr : public SpecialForm {
/// Used to initialize captureChannels_ and typeWithCapture_ on first use.
void makeTypeWithCapture(EvalCtx& context);

void computePropagatesNulls() override {
// A null capture does not result in a null function.
propagatesNulls_ = false;
}

RowTypePtr signature_;

/// The inner expression that will be applied to the elements of the input
Expand Down
6 changes: 6 additions & 0 deletions velox/expression/SpecialForm.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,11 @@ class SpecialForm : public Expr {
true /* specialForm */,
supportsFlatNoNullsFastPath,
trackCpuUsage) {}

// This is safe to call only after all metadata is computed for input
// expressions.
virtual void computePropagatesNulls() {
VELOX_NYI();
}
};
} // namespace facebook::velox::exec
21 changes: 14 additions & 7 deletions velox/expression/SwitchExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ void SwitchExpr::evalSpecialForm(
}
}

bool SwitchExpr::propagatesNulls() const {
// This is safe to call only after all metadata is computed for input
// expressions.
bool SwitchExpr::computePropagatesNulls() {
// The "switch" expression propagates nulls when all of the following
// conditions are met:
// - All "then" clauses and optional "else" clause propagate nulls.
Expand All @@ -166,7 +168,8 @@ bool SwitchExpr::propagatesNulls() const {

for (auto i = 0; i < numCases_; i += 2) {
if (!inputs_[i + 1]->propagatesNulls()) {
return false;
propagatesNulls_ = false;
return;
}
}

Expand All @@ -175,25 +178,29 @@ bool SwitchExpr::propagatesNulls() const {
const auto& condition = inputs_[i * 2];
const auto& thenClause = inputs_[i * 2 + 1];
if (!Expr::isSameFields(firstThenFields, thenClause->distinctFields())) {
return false;
propagatesNulls_ = false;
return;
}

if (!Expr::isSubsetOfFields(condition->distinctFields(), firstThenFields)) {
return false;
propagatesNulls_ = false;
return;
}
}

if (hasElseClause_) {
const auto& elseClause = inputs_.back();
if (!elseClause->propagatesNulls()) {
return false;
propagatesNulls_ = false;
return;
}
if (!Expr::isSameFields(firstThenFields, elseClause->distinctFields())) {
return false;
propagatesNulls_ = false;
return;
}
}

return true;
propagatesNulls_ = true;
}

// static
Expand Down
4 changes: 2 additions & 2 deletions velox/expression/SwitchExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ class SwitchExpr : public SpecialForm {
EvalCtx& context,
VectorPtr& result) override;

bool propagatesNulls() const override;

bool isConditional() const override {
return true;
}

private:
static TypePtr resolveType(const std::vector<TypePtr>& argTypes);

void computePropagatesNulls() override;

const size_t numCases_;
const bool hasElseClause_;
BufferPtr tempValues_;
Expand Down
11 changes: 7 additions & 4 deletions velox/expression/TryExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,17 @@ class TryExpr : public SpecialForm {
EvalCtx& context,
VectorPtr& result) override;

bool propagatesNulls() const override {
return inputs_[0]->propagatesNulls();
}

void nullOutErrors(
const SelectivityVector& rows,
EvalCtx& context,
VectorPtr& result);

private:
// This is safe to call only after all metadata is computed for input
// expressions.
void computePropagatesNulls() {
propagatesNulls_ = inputs_[0]->propagatesNulls();
}
};

class TryCallToSpecialForm : public FunctionCallToSpecialForm {
Expand Down
8 changes: 8 additions & 0 deletions velox/expression/tests/ExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3734,6 +3734,14 @@ TEST_F(ExprTest, nullPropagation) {
ROW({"c0", "c1"}, {VARCHAR(), VARCHAR()}));
EXPECT_TRUE(propagatesNulls(singleString));
EXPECT_FALSE(propagatesNulls(twoStrings));

// Try will call propagateNulls on its child, that should not
// redo the computation.
auto switchInTry = parseExpression(
"try(switch(subscript(c0, array_min(c1)), FALSE, FALSE,"
"FALSE, is_null('6338838335202944843'::BIGINT)))",
ROW({"c0", "c1"}, {ARRAY({BOOLEAN()}), ARRAY({BIGINT()})}));
EXPECT_FALSE(propagatesNulls(switchInTry));
}

TEST_F(ExprTest, peelingWithSmallerConstantInput) {
Expand Down

0 comments on commit 9725f85

Please sign in to comment.