From e77bf059a2bfc5dd242f30b60bb7dd3ea67a3b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Sun, 19 Apr 2026 20:17:45 +0300 Subject: [PATCH 1/2] test(operator): add aggregation edge case tests --- tests/operator_tests.cpp | 111 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/tests/operator_tests.cpp b/tests/operator_tests.cpp index 6f6574f..0dc3b3c 100644 --- a/tests/operator_tests.cpp +++ b/tests/operator_tests.cpp @@ -59,6 +59,17 @@ AggregateInfo make_agg(AggregateType type, const std::string& name, return info; } +// Helper to create AggregateInfo with distinct flag +AggregateInfo make_agg_distinct(AggregateType type, const std::string& name, + std::unique_ptr expr = nullptr) { + AggregateInfo info; + info.type = type; + info.name = name; + info.expr = std::move(expr); + info.is_distinct = true; + return info; +} + // Helper: create a BufferScanOperator with test data std::unique_ptr make_buffer_scan(const std::string& table_name, const std::vector& data, @@ -769,6 +780,106 @@ TEST_F(OperatorTests, AggregateAvgFractional) { agg->close(); } +TEST_F(OperatorTests, AggregateMultipleAggregates) { + // Test multiple aggregates in single query: SUM, COUNT, AVG + Schema schema = make_schema({{"val", common::ValueType::TYPE_INT64}}); + std::vector data; + data.push_back(make_tuple({common::Value::make_int64(10)})); + data.push_back(make_tuple({common::Value::make_int64(20)})); + data.push_back(make_tuple({common::Value::make_int64(30)})); + + auto scan = make_buffer_scan("test_table", data, schema); + std::vector aggs; + aggs.push_back(make_agg(AggregateType::Sum, "total", col_expr("val"))); + aggs.push_back(make_agg(AggregateType::Count, "cnt")); + aggs.push_back(make_agg(AggregateType::Avg, "avg_val", col_expr("val"))); + auto agg = make_agg_op(std::move(scan), {}, std::move(aggs)); + + ASSERT_TRUE(agg->init()); + ASSERT_TRUE(agg->open()); + + Tuple tuple; + EXPECT_TRUE(agg->next(tuple)); + EXPECT_EQ(tuple.get(0).to_int64(), 60); // SUM + EXPECT_EQ(tuple.get(1).to_int64(), 3); // COUNT + EXPECT_EQ(tuple.get(2).to_float64(), 20.0); // AVG + EXPECT_FALSE(agg->next(tuple)); + agg->close(); +} + +TEST_F(OperatorTests, AggregateGroupByMultipleCols) { + // Test GROUP BY with multiple columns + Schema schema = make_schema({{"dept", common::ValueType::TYPE_INT64}, + {"name", common::ValueType::TYPE_TEXT}, + {"salary", common::ValueType::TYPE_INT64}}); + std::vector data; + data.push_back(make_tuple({common::Value::make_int64(1), common::Value::make_text("alice"), + common::Value::make_int64(1000)})); + data.push_back(make_tuple({common::Value::make_int64(1), common::Value::make_text("bob"), + common::Value::make_int64(2000)})); + data.push_back(make_tuple({common::Value::make_int64(2), common::Value::make_text("charlie"), + common::Value::make_int64(1500)})); + + auto scan = make_buffer_scan("test_table", data, schema); + std::vector> group_by; + group_by.push_back(col_expr("dept")); + std::vector aggs; + aggs.push_back(make_agg(AggregateType::Sum, "total_salary", col_expr("salary"))); + auto agg = make_agg_op(std::move(scan), std::move(group_by), std::move(aggs)); + + ASSERT_TRUE(agg->init()); + ASSERT_TRUE(agg->open()); + + // Should get 2 groups: dept 1 (sum=3000), dept 2 (sum=1500) + std::vector> results; + Tuple tuple; + while (agg->next(tuple)) { + results.push_back({tuple.get(0).to_int64(), tuple.get(1).to_int64()}); + } + + EXPECT_EQ(results.size(), 2U); + // Verify both groups appear + bool found_dept1 = false; + bool found_dept2 = false; + for (const auto& r : results) { + if (r.first == 1 && r.second == 3000) { + found_dept1 = true; + } else if (r.first == 2 && r.second == 1500) { + found_dept2 = true; + } + } + EXPECT_TRUE(found_dept1); + EXPECT_TRUE(found_dept2); + agg->close(); +} + +TEST_F(OperatorTests, AggregateWithNulls) { + // Test aggregate functions with NULL values in data + Schema schema = make_schema({{"val", common::ValueType::TYPE_INT64}}); + std::vector data; + data.push_back(make_tuple({common::Value::make_int64(10)})); + data.push_back(make_tuple({common::Value()})); // NULL + data.push_back(make_tuple({common::Value::make_int64(20)})); + + auto scan = make_buffer_scan("test_table", data, schema); + std::vector aggs; + aggs.push_back(make_agg(AggregateType::Sum, "total", col_expr("val"))); + aggs.push_back(make_agg(AggregateType::Count, "cnt", col_expr("val"))); + auto agg = make_agg_op(std::move(scan), {}, std::move(aggs)); + + ASSERT_TRUE(agg->init()); + ASSERT_TRUE(agg->open()); + + Tuple tuple; + EXPECT_TRUE(agg->next(tuple)); + // SUM should skip NULL: 10 + 20 = 30 + EXPECT_EQ(tuple.get(0).to_int64(), 30); + // COUNT should skip NULL: only 2 non-null values + EXPECT_EQ(tuple.get(1).to_int64(), 2); + EXPECT_FALSE(agg->next(tuple)); + agg->close(); +} + TEST_F(OperatorTests, HashJoinRightOuter) { // Right table: values 2, 3, 4 (only 2 matches) Schema left_schema = make_schema({{"id", common::ValueType::TYPE_INT64}}); From 3fd5ca128b3d5d7e3766e7c96d20f5317287ba98 Mon Sep 17 00:00:00 2001 From: poyrazK <83272398+poyrazK@users.noreply.github.com> Date: Sun, 19 Apr 2026 17:19:04 +0000 Subject: [PATCH 2/2] style: automated clang-format fixes --- tests/operator_tests.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/operator_tests.cpp b/tests/operator_tests.cpp index 0dc3b3c..3af9f92 100644 --- a/tests/operator_tests.cpp +++ b/tests/operator_tests.cpp @@ -800,9 +800,9 @@ TEST_F(OperatorTests, AggregateMultipleAggregates) { Tuple tuple; EXPECT_TRUE(agg->next(tuple)); - EXPECT_EQ(tuple.get(0).to_int64(), 60); // SUM - EXPECT_EQ(tuple.get(1).to_int64(), 3); // COUNT - EXPECT_EQ(tuple.get(2).to_float64(), 20.0); // AVG + EXPECT_EQ(tuple.get(0).to_int64(), 60); // SUM + EXPECT_EQ(tuple.get(1).to_int64(), 3); // COUNT + EXPECT_EQ(tuple.get(2).to_float64(), 20.0); // AVG EXPECT_FALSE(agg->next(tuple)); agg->close(); } @@ -905,7 +905,8 @@ TEST_F(OperatorTests, HashJoinRightOuter) { // RIGHT join output: matched rows + unmatched right rows with NULLs // Matched: (2, 2) // Unmatched right: (NULL, 3), (NULL, 4) - std::vector> results; // (left_value, right_value); use INT64_MIN as sentinel for NULL + std::vector> + results; // (left_value, right_value); use INT64_MIN as sentinel for NULL Tuple tuple; while (join->next(tuple)) { int64_t left_val = tuple.get(0).is_null() ? INT64_MIN : tuple.get(0).to_int64(); @@ -991,11 +992,11 @@ TEST_F(OperatorTests, HashJoinNullKeys) { Schema left_schema = make_schema({{"id", common::ValueType::TYPE_INT64}}); std::vector left_data; left_data.push_back(make_tuple({common::Value::make_int64(1)})); // matches 1 - left_data.push_back(make_tuple({common::Value()})); // NULL - currently matches NULL + left_data.push_back(make_tuple({common::Value()})); // NULL - currently matches NULL Schema right_schema = make_schema({{"id", common::ValueType::TYPE_INT64}}); std::vector right_data; - right_data.push_back(make_tuple({common::Value()})); // NULL - currently matches + right_data.push_back(make_tuple({common::Value()})); // NULL - currently matches right_data.push_back(make_tuple({common::Value::make_int64(1)})); // matches 1 auto left_scan = make_buffer_scan("left_table", left_data, left_schema);