Skip to content

Commit

Permalink
Analyzer: Fix assertion in HashJoin with duplicate columns
Browse files Browse the repository at this point in the history
  • Loading branch information
vdimir committed Jan 15, 2024
1 parent 9cfdff2 commit 4adeb24
Show file tree
Hide file tree
Showing 6 changed files with 322 additions and 48 deletions.
56 changes: 26 additions & 30 deletions src/Interpreters/HashJoin.cpp
Expand Up @@ -1717,18 +1717,14 @@ Block HashJoin::joinBlockImpl(
for (size_t i = 0; i < required_right_keys.columns(); ++i)
{
const auto & right_key = required_right_keys.getByPosition(i);
// renamed ???
if (!block.findByName(right_key.name))
{
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
continue;
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
continue;

const auto & left_column = block.getByName(required_right_keys_sources[i]);
const auto & right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column);
block.insert(std::move(right_col));
}
const auto & left_column = block.getByName(required_right_keys_sources[i]);
const auto & right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column);
block.insert(std::move(right_col));
}
}
else if (has_required_right_keys)
Expand All @@ -1738,19 +1734,16 @@ Block HashJoin::joinBlockImpl(
{
const auto & right_key = required_right_keys.getByPosition(i);
auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
if (!block.findByName(right_col_name))
{
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
continue;
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
continue;

const auto & left_column = block.getByName(required_right_keys_sources[i]);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter);
block.insert(std::move(right_col));
const auto & left_column = block.getByName(required_right_keys_sources[i]);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter);
block.insert(std::move(right_col));

if constexpr (join_features.need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
}
if constexpr (join_features.need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
}
}

Expand Down Expand Up @@ -2009,12 +2002,14 @@ struct AdderNonJoined
/// Based on:
/// - map offsetInternal saved in used_flags for single disjuncts
/// - flags in BlockWithFlags for multiple disjuncts
template <bool multiple_disjuncts>
class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller
{
public:
NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_)
: parent(parent_), max_block_size(max_block_size_), current_block_start(0)
NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool multiple_disjuncts_)
: parent(parent_)
, max_block_size(max_block_size_)
, multiple_disjuncts(multiple_disjuncts_)
, current_block_start(0)
{
if (parent.data == nullptr)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released");
Expand All @@ -2040,7 +2035,7 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness);
}

if constexpr (!multiple_disjuncts)
if (!multiple_disjuncts)
{
fillNullsFromBlocks(columns_right, rows_added);
}
Expand All @@ -2051,6 +2046,7 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller
private:
const HashJoin & parent;
UInt64 max_block_size;
bool multiple_disjuncts;

size_t current_block_start;

Expand Down Expand Up @@ -2116,7 +2112,7 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller
{
size_t rows_added = 0;

if constexpr (multiple_disjuncts)
if (multiple_disjuncts)
{
if (!used_position.has_value())
used_position = parent.data->blocks.begin();
Expand Down Expand Up @@ -2213,14 +2209,14 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block,
{
/// ... calculate `left_columns_count` ...
size_t left_columns_count = left_sample_block.columns();
auto non_joined = std::make_unique<NotJoinedHash<true>>(*this, max_block_size);
auto non_joined = std::make_unique<NotJoinedHash>(*this, max_block_size, multiple_disjuncts);
return std::make_unique<NotJoinedBlocks>(std::move(non_joined), result_sample_block, left_columns_count, *table_join);
}
else
{
size_t left_columns_count = left_sample_block.columns();
assert(left_columns_count == result_sample_block.columns() - required_right_keys.columns() - sample_block_with_columns_to_add.columns());
auto non_joined = std::make_unique<NotJoinedHash<false>>(*this, max_block_size);
chassert(left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns() == result_sample_block.columns())
auto non_joined = std::make_unique<NotJoinedHash>(*this, max_block_size, multiple_disjuncts);
return std::make_unique<NotJoinedBlocks>(std::move(non_joined), result_sample_block, left_columns_count, *table_join);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Interpreters/HashJoin.h
Expand Up @@ -399,7 +399,7 @@ class HashJoin : public IJoin
void setMaxJoinedBlockRows(size_t value) { max_joined_block_rows = value; }

private:
template<bool> friend class NotJoinedHash;
friend class NotJoinedHash;

friend class JoinSource;

Expand Down
2 changes: 1 addition & 1 deletion src/Interpreters/MergeJoin.cpp
Expand Up @@ -1124,7 +1124,7 @@ IBlocksStreamPtr MergeJoin::getNonJoinedBlocks(
if (table_join->strictness() == JoinStrictness::All && (is_right || is_full))
{
size_t left_columns_count = left_sample_block.columns();
assert(left_columns_count == result_sample_block.columns() - right_columns_to_add.columns());
chassert(left_columns_count == result_sample_block.columns() - right_columns_to_add.columns());
auto non_joined = std::make_unique<NotJoinedMerge>(*this, max_block_size);
return std::make_unique<NotJoinedBlocks>(std::move(non_joined), result_sample_block, left_columns_count, *table_join);
}
Expand Down
232 changes: 232 additions & 0 deletions tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference
@@ -1,3 +1,10 @@
-- { echoOn }

SET join_algorithm = 'hash';
EXPLAIN actions=0, description=0, header=1
SELECT * FROM ( SELECT 'key2' AS key ) AS s1
JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2
USING (key);
Expression
Header: key String
value String
Expand All @@ -21,6 +28,121 @@ Header: key String
__table3.value String
ReadFromStorage
Header: dummy UInt8
SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1;
1 1 1
SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1;
1 1 1 1 1
SELECT *
FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2
ON t1.number = t2.number
ORDER BY t1.number, t2.number
;
0 0 0 0 0 0 0 0
0 0 0 0 0 5 5 5
0 0 0 0 0 6 6 6
0 0 0 0 0 7 7 7
0 0 0 0 0 8 8 8
0 0 0 0 0 9 9 9
0 0 0 0 0 10 10 10
1 1 1 1 1 0 0 0
2 2 2 2 2 0 0 0
3 3 3 3 3 3 3 3
4 4 4 4 4 4 4 4
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
ON t1.number = t2.number
ORDER BY t1.number, t2.number
;
0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6
0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7
0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8
0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9
0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10
0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11
1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0
2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0
3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4
4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
ON t1.k = t2.k
ORDER BY t1.k, t2.k
;
0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6
0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7
0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8
0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9
0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10
0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11
0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0
1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0
2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0
3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4
4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5
SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1;
1
SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1;
1 1 1 1
SELECT *
FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2
USING (number)
ORDER BY number
;
0
1
2
3
4
5
6
7
8
9
10
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
USING (number)
ORDER BY number
;
0 1 1 1 0 0 0 0 0
1 2 2 2 0 0 0 0 0
2 3 3 3 0 0 0 0 0
3 4 4 4 4 4 4 4 4
4 5 5 5 5 5 5 5 5
5 0 0 0 6 6 6 6 6
6 0 0 0 7 7 7 7 7
7 0 0 0 8 8 8 8 8
8 0 0 0 9 9 9 9 9
9 0 0 0 10 10 10 10 10
10 0 0 0 11 11 11 11 11
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
USING (k)
ORDER BY k
;
1 0 0 0 0 0 0 0 0
2 1 1 1 1 1 0 0 0
3 2 2 2 2 2 0 0 0
4 3 3 3 3 3 3 3 3
5 4 4 4 4 4 4 4 4
6 0 0 0 0 0 5 5 5
7 0 0 0 0 0 6 6 6
8 0 0 0 0 0 7 7 7
9 0 0 0 0 0 8 8 8
10 0 0 0 0 0 9 9 9
11 0 0 0 0 0 10 10 10
SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 0;
EXPLAIN actions=0, description=0, header=1
SELECT * FROM ( SELECT 'key2' AS key ) AS s1
JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2
USING (key);
Expression
Header: key String
value String
Expand Down Expand Up @@ -50,3 +172,113 @@ Header: key String
__table3.value String
ReadFromStorage
Header: dummy UInt8
SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1;
1 1 1
SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1;
1 1 1 1 1
SELECT *
FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2
ON t1.number = t2.number
ORDER BY t1.number, t2.number
;
0 0 0 0 0 0 0 0
0 0 0 0 0 5 5 5
0 0 0 0 0 6 6 6
0 0 0 0 0 7 7 7
0 0 0 0 0 8 8 8
0 0 0 0 0 9 9 9
0 0 0 0 0 10 10 10
1 1 1 1 1 0 0 0
2 2 2 2 2 0 0 0
3 3 3 3 3 3 3 3
4 4 4 4 4 4 4 4
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
ON t1.number = t2.number
ORDER BY t1.number, t2.number
;
0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6
0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7
0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8
0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9
0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10
0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11
1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0
2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0
3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4
4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
ON t1.k = t2.k
ORDER BY t1.k, t2.k
;
0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6
0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7
0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8
0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9
0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10
0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11
0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0
1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0
2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0
3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4
4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5
SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1;
1
SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1;
1 1 1 1
SELECT *
FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2
USING (number)
ORDER BY number
;
0
1
2
3
4
5
6
7
8
9
10
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
USING (number)
ORDER BY number
;
0 1 1 1 0 0 0 0 0
1 2 2 2 0 0 0 0 0
2 3 3 3 0 0 0 0 0
3 4 4 4 4 4 4 4 4
4 5 5 5 5 5 5 5 5
5 0 0 0 6 6 6 6 6
6 0 0 0 7 7 7 7 7
7 0 0 0 8 8 8 8 8
8 0 0 0 9 9 9 9 9
9 0 0 0 10 10 10 10 10
10 0 0 0 11 11 11 11 11
SELECT *
FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1
FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2
USING (k)
ORDER BY k
;
1 0 0 0 0 0 0 0 0
2 1 1 1 1 1 0 0 0
3 2 2 2 2 2 0 0 0
4 3 3 3 3 3 3 3 3
5 4 4 4 4 4 4 4 4
6 0 0 0 0 0 5 5 5
7 0 0 0 0 0 6 6 6
8 0 0 0 0 0 7 7 7
9 0 0 0 0 0 8 8 8
10 0 0 0 0 0 9 9 9
11 0 0 0 0 0 10 10 10
16 changes: 0 additions & 16 deletions tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql

This file was deleted.

0 comments on commit 4adeb24

Please sign in to comment.