Skip to content

Commit

Permalink
Test NestedLoopJoin without filter in join fuzzer (facebookincubator#…
Browse files Browse the repository at this point in the history
…9923)

Summary:

A correctness bug was found recently in NestedLoopJoin without filter (facebookincubator#9892). 
So this diff extends join fuzzer to cover this case with 10% chance.

Differential Revision: D57761514
  • Loading branch information
kagamiori authored and facebook-github-bot committed May 24, 2024
1 parent d4870eb commit ef2732b
Showing 1 changed file with 76 additions and 2 deletions.
78 changes: 76 additions & 2 deletions velox/exec/fuzzer/JoinFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,13 @@ class JoinFuzzer {
const std::vector<RowVectorPtr>& buildInput,
const core::PlanNodePtr& plan);

// Generates and executes plans using NestedLoopJoin without filters. The
// result is compared to DuckDB.
void testCrossProduct(
core::JoinType joinType,
const std::vector<RowVectorPtr>& probeInput,
const std::vector<RowVectorPtr>& buildInput);

int32_t randInt(int32_t min, int32_t max) {
return boost::random::uniform_int_distribution<int32_t>(min, max)(rng_);
}
Expand Down Expand Up @@ -940,6 +947,53 @@ bool isTableScanSupported(const TypePtr& type) {
return true;
}

void JoinFuzzer::testCrossProduct(
core::JoinType joinType,
const std::vector<RowVectorPtr>& probeInput,
const std::vector<RowVectorPtr>& buildInput) {
auto outputColumns =
concat(asRowType(probeInput[0]->type()), asRowType(buildInput[0]->type()))
->names();

auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
auto plan = JoinFuzzer::PlanWithSplits{
PlanBuilder(planNodeIdGenerator)
.values(probeInput)
.nestedLoopJoin(
PlanBuilder(planNodeIdGenerator).values(buildInput).planNode(),
"",
outputColumns,
joinType)
.planNode()};

const auto expected = execute(plan, /*injectSpill=*/false);

// If OOM injection is not enabled verify the results against DuckDB.
if (!FLAGS_enable_oom_injection) {
if (auto duckDbResult =
computeDuckDbResult(probeInput, buildInput, plan.plan)) {
VELOX_CHECK(
assertEqualResults(
duckDbResult.value(), plan.plan->outputType(), {expected}),
"Velox and DuckDB results don't match");
}
}

auto joinNode =
std::dynamic_pointer_cast<const core::NestedLoopJoinNode>(plan.plan);
VELOX_CHECK_NOT_NULL(joinNode);

if (auto flippedPlan = tryFlipJoinSides(*joinNode)) {
auto flippedPlanWithSplits = PlanWithSplits{flippedPlan};
auto actual = execute(flippedPlanWithSplits, /*injectSpill=*/false);
if (actual != nullptr && expected != nullptr) {
VELOX_CHECK(
assertEqualResults({expected}, {actual}),
"Logically equivalent plans produced different results");
}
}
}

void JoinFuzzer::verify(core::JoinType joinType) {
const bool nullAware =
isNullAwareSupported(joinType) && vectorFuzzer_.coinToss(0.5);
Expand Down Expand Up @@ -970,6 +1024,18 @@ void JoinFuzzer::verify(core::JoinType joinType) {
}
}

// Test cross product without filter with 10% chance. Avoid testing cross
// product if input size is too large.
if ((core::isInnerJoin(joinType) || core::isLeftJoin(joinType) ||
core::isFullJoin(joinType)) &&
FLAGS_batch_size * FLAGS_num_batches <= 10000) {
if (vectorFuzzer_.coinToss(0.1)) {
testCrossProduct(joinType, probeInput, buildInput);
testCrossProduct(joinType, flatProbeInput, flatBuildInput);
return;
}
}

auto outputColumns =
(core::isLeftSemiProjectJoin(joinType) ||
core::isLeftSemiFilterJoin(joinType) || core::isAntiJoin(joinType))
Expand Down Expand Up @@ -1148,10 +1214,18 @@ void JoinFuzzer::addPlansWithTableScan(
const int32_t numGroups = randInt(1, probeScanSplits.size());
const std::vector<exec::Split> groupedProbeScanSplits =
generateSplitsWithGroup(
tableDir, numGroups, /*isProbe=*/true, probeKeys.size(), probeInput);
tableDir,
numGroups,
/*isProbe=*/true,
probeKeys.size(),
probeInput);
const std::vector<exec::Split> groupedBuildScanSplits =
generateSplitsWithGroup(
tableDir, numGroups, /*isProbe=*/false, buildKeys.size(), buildInput);
tableDir,
numGroups,
/*isProbe=*/false,
buildKeys.size(),
buildInput);

for (const auto& planWithTableScan : plansWithTableScan) {
altPlans.push_back(planWithTableScan);
Expand Down

0 comments on commit ef2732b

Please sign in to comment.