Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce virtual function call overhead for constructing columns in join probing #8683

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions dbms/src/Columns/ColumnAggregateFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,15 +382,8 @@ ColumnPtr ColumnAggregateFunction::replicateRange(size_t start_row, size_t end_r
auto & res_data = res->getData();
res_data.reserve(offsets[end_row - 1]);

IColumn::Offset prev_offset = 0;
for (size_t i = start_row; i < end_row; ++i)
{
size_t size_to_replicate = offsets[i] - prev_offset;
prev_offset = offsets[i];

for (size_t j = 0; j < size_to_replicate; ++j)
res_data.push_back(data[i]);
}
res_data.resize_fill(offsets[i], data[i]);

return res;
}
Expand Down
10 changes: 7 additions & 3 deletions dbms/src/Columns/ColumnAggregateFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
insertFrom(src_, n);
}

void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override
void insertDisjunctManyFrom(const IColumn & src_, const IColumn::Disjuncts & disjuncts) override
{
for (auto position : position_vec)
insertFrom(src_, position);
insertDisjunctManyFromImpl<ColumnAggregateFunction>(src_, disjuncts);
}

void insertGatherRangeFrom(ColumnRawPtrs & src_, const IColumn::GatherRanges & gather_ranges) override
{
insertGatherRangeFromImpl<ColumnAggregateFunction>(src_, gather_ranges);
}

void insertFrom(ConstAggregateDataPtr __restrict place);
Expand Down
10 changes: 7 additions & 3 deletions dbms/src/Columns/ColumnArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,14 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
insertFrom(src_, n);
}

void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override
{
for (auto position : position_vec)
insertFrom(src_, position);
insertDisjunctManyFromImpl<ColumnArray>(src, disjuncts);
}

void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override
{
insertGatherRangeFromImpl<ColumnArray>(src, gather_ranges);
}

void insertDefault() override;
Expand Down
11 changes: 9 additions & 2 deletions dbms/src/Columns/ColumnConst.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,16 @@ class ColumnConst final : public COWPtrHelper<IColumn, ColumnConst>

void insertManyFrom(const IColumn &, size_t, size_t length) override { s += length; }

void insertDisjunctFrom(const IColumn &, const std::vector<size_t> & position_vec) override
void insertDisjunctManyFrom(const IColumn &, const IColumn::Disjuncts & disjuncts) override
{
s += position_vec.size();
if (!disjuncts.empty())
s += disjuncts.back().count_offset;
}

void insertGatherRangeFrom(ColumnRawPtrs &, const IColumn::GatherRanges & gather_ranges) override
{
if (!gather_ranges.empty())
s += gather_ranges.back().length_offset;
}

void insertDefault() override { ++s; }
Expand Down
47 changes: 33 additions & 14 deletions dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,14 +318,40 @@ void ColumnDecimal<T>::insertManyFrom(const IColumn & src, size_t position, size
}

template <typename T>
void ColumnDecimal<T>::insertDisjunctFrom(const IColumn & src, const std::vector<size_t> & position_vec)
void ColumnDecimal<T>::insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts)
{
const auto & src_data = static_cast<const ColumnDecimal &>(src).data;
if (disjuncts.empty())
return;
const auto & src_container = static_cast<const Self &>(src).getData();
size_t old_size = data.size();
size_t to_add_size = position_vec.size();
data.resize(old_size + to_add_size);
for (size_t i = 0; i < to_add_size; ++i)
data[i + old_size] = src_data[position_vec[i]];
data.reserve(old_size + disjuncts.back().count_offset);
for (const auto & d : disjuncts)
data.resize_fill(old_size + d.count_offset, src_container[d.position]);
}

template <typename T>
void ColumnDecimal<T>::insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges)
{
if (gather_ranges.empty())
return;
assert(src.size() == gather_ranges.size());
size_t old_size = data.size();
data.reserve(old_size + gather_ranges.back().length_offset);
size_t sz = src.size(), prev_len = 0;
for (size_t i = 0; i < sz; ++i)
{
const auto & g = gather_ranges[i];
if (src[i] == nullptr)
{
data.resize_fill(old_size + g.length_offset, T());
}
else
{
const auto & column_src = static_cast<const Self &>(*src[i]);
data.insert(&column_src.data[g.start_pos], &column_src.data[g.start_pos + g.length_offset - prev_len]);
}
prev_len = g.length_offset;
}
}

#pragma GCC diagnostic pop
Expand Down Expand Up @@ -380,15 +406,8 @@ ColumnPtr ColumnDecimal<T>::replicateRange(size_t start_row, size_t end_row, con
typename Self::Container & res_data = res->getData();
res_data.reserve(offsets[end_row - 1]);

IColumn::Offset prev_offset = 0;
for (size_t i = start_row; i < end_row; ++i)
{
size_t size_to_replicate = offsets[i] - prev_offset;
prev_offset = offsets[i];

for (size_t j = 0; j < size_to_replicate; ++j)
res_data.push_back(data[i]);
}
res_data.resize_fill(offsets[i], data[i]);

return res;
}
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnDecimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ class ColumnDecimal final : public COWPtrHelper<ColumnVectorHelper, ColumnDecima
void insert(const Field & x) override { data.push_back(DB::get<typename NearestFieldType<T>::Type>(x)); }
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override;
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override;
void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override;
void popBack(size_t n) override { data.resize_assume_reserved(data.size() - n); }

StringRef getRawData() const override
Expand Down
30 changes: 17 additions & 13 deletions dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
{
const auto & src = static_cast<const ColumnFixedString &>(src_);

if (n != src.getN())
if unlikely (n != src.getN())
throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH);

size_t old_size = chars.size();
Expand All @@ -77,7 +77,7 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
void ColumnFixedString::insertManyFrom(const IColumn & src_, size_t position, size_t length)
{
const auto & src = static_cast<const ColumnFixedString &>(src_);
if (n != src.getN())
if unlikely (n != src.getN())
throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH);
size_t old_size = chars.size();
size_t new_size = old_size + n * length;
Expand All @@ -87,22 +87,17 @@ void ColumnFixedString::insertManyFrom(const IColumn & src_, size_t position, si
memcpySmallAllowReadWriteOverflow15(&chars[i], src_char_ptr, n);
}

void ColumnFixedString::insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec)
void ColumnFixedString::insertDisjunctManyFrom(const IColumn & src_, const IColumn::Disjuncts & disjuncts)
{
const auto & src = static_cast<const ColumnFixedString &>(src_);
if (n != src.getN())
throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH);
size_t old_size = chars.size();
size_t new_size = old_size + position_vec.size() * n;
chars.resize(new_size);
const auto & src_chars = src.chars;
for (size_t i = old_size, j = 0; i < new_size; i += n, ++j)
memcpySmallAllowReadWriteOverflow15(&chars[i], &src_chars[position_vec[j] * n], n);
if (disjuncts.empty())
return;
chars.reserve(chars.size() + disjuncts.back().count_offset * n);
insertDisjunctManyFromImpl<ColumnFixedString>(src_, disjuncts);
}

void ColumnFixedString::insertData(const char * pos, size_t length)
{
if (length > n)
if unlikely (length > n)
throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);

size_t old_size = chars.size();
Expand All @@ -111,6 +106,15 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
memset(chars.data() + old_size + length, 0, n - length);
}

void ColumnFixedString::insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges)
{
if (gather_ranges.empty())
return;
assert(src.size() == gather_ranges.size());
chars.reserve(chars.size() + gather_ranges.back().length_offset * n);
insertGatherRangeFromImpl<ColumnFixedString>(src, gather_ranges);
}

StringRef ColumnFixedString::serializeValueIntoArena(
size_t index,
Arena & arena,
Expand Down
4 changes: 3 additions & 1 deletion dbms/src/Columns/ColumnFixedString.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,15 @@ class ColumnFixedString final : public COWPtrHelper<IColumn, ColumnFixedString>

void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;

void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override;
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override;

void insertData(const char * pos, size_t length) override;

void insertDefault() override { chars.resize_fill(chars.size() + n); }
void insertManyDefaults(size_t length) override { chars.resize_fill(chars.size() + n * length); }

void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override;

void popBack(size_t elems) override { chars.resize_assume_reserved(chars.size() - n * elems); }

StringRef serializeValueIntoArena(
Expand Down
7 changes: 6 additions & 1 deletion dbms/src/Columns/ColumnFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class ColumnFunction final : public COWPtrHelper<IColumn, ColumnFunction>
throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}

void insertDisjunctFrom(const IColumn &, const std::vector<size_t> &) override
void insertDisjunctManyFrom(const IColumn &, const IColumn::Disjuncts &) override
{
throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
Expand All @@ -104,6 +104,11 @@ class ColumnFunction final : public COWPtrHelper<IColumn, ColumnFunction>
throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}

void insertGatherRangeFrom(ColumnRawPtrs &, const IColumn::GatherRanges &) override
{
throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}

StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const TiDB::TiDBCollatorPtr &, String &)
const override
{
Expand Down
41 changes: 32 additions & 9 deletions dbms/src/Columns/ColumnNullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,17 +257,40 @@ void ColumnNullable::insertManyFrom(const IColumn & src, size_t n, size_t length
map.resize_fill(map.size() + length, src_concrete.getNullMapData()[n]);
}

void ColumnNullable::insertDisjunctFrom(const IColumn & src, const std::vector<size_t> & position_vec)
void ColumnNullable::insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts)
{
const auto & src_concrete = static_cast<const ColumnNullable &>(src);
getNestedColumn().insertDisjunctFrom(src_concrete.getNestedColumn(), position_vec);
auto & map = getNullMapData();
const auto & src_map = src_concrete.getNullMapData();
size_t old_size = map.size();
size_t to_add_size = position_vec.size();
map.resize(old_size + to_add_size);
for (size_t i = 0; i < to_add_size; ++i)
map[i + old_size] = src_map[position_vec[i]];
getNestedColumn().insertDisjunctManyFrom(src_concrete.getNestedColumn(), disjuncts);
getNullMapColumn().insertDisjunctManyFrom(src_concrete.getNullMapColumn(), disjuncts);
}

void ColumnNullable::insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges)
{
if (gather_ranges.empty())
return;
assert(src.size() == gather_ranges.size());
auto & data = getNullMapData();
size_t old_size = data.size();
data.reserve(old_size + gather_ranges.back().length_offset);
size_t sz = src.size(), prev_len = 0;
for (size_t i = 0; i < sz; ++i)
{
const auto & g = gather_ranges[i];
if (src[i] == nullptr)
{
data.resize_fill(old_size + g.length_offset, 1);
}
else
{
const auto & src_column = static_cast<const ColumnNullable &>(*src[i]);
const auto & src_data = src_column.getNullMapData();
data.insert(&src_data[g.start_pos], &src_data[g.start_pos + g.length_offset - prev_len]);
src[i] = &src_column.getNestedColumn();
}
prev_len = g.length_offset;
}

getNestedColumn().insertGatherRangeFrom(src, gather_ranges);
}

void ColumnNullable::popBack(size_t n)
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnNullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class ColumnNullable final : public COWPtrHelper<IColumn, ColumnNullable>
void insert(const Field & x) override;
void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t n, size_t length) override;
void insertDisjunctFrom(const IColumn & src, const std::vector<size_t> & position_vec) override;
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override;
void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override;

void insertDefault() override
{
Expand Down
19 changes: 14 additions & 5 deletions dbms/src/Columns/ColumnString.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,12 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
insertFromImpl(src, position);
}

void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override
{
const auto & src = static_cast<const ColumnString &>(src_);
offsets.reserve(offsets.size() + position_vec.size());
for (auto position : position_vec)
insertFromImpl(src, position);
if (disjuncts.empty())
return;
offsets.reserve(offsets.size() + disjuncts.back().count_offset);
insertDisjunctManyFromImpl<ColumnString>(src, disjuncts);
}

template <bool add_terminating_zero>
Expand All @@ -179,6 +179,15 @@ class ColumnString final : public COWPtrHelper<IColumn, ColumnString>

void insertData(const char * pos, size_t length) override { return insertDataImpl<true>(pos, length); }

void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override
{
if (gather_ranges.empty())
return;
assert(src.size() == gather_ranges.size());
offsets.reserve(offsets.size() + gather_ranges.back().length_offset);
insertGatherRangeFromImpl<ColumnString>(src, gather_ranges);
}

bool decodeTiDBRowV2Datum(size_t cursor, const String & raw_value, size_t length, bool /* force_decode */) override
{
insertData(raw_value.c_str() + cursor, length);
Expand Down
10 changes: 7 additions & 3 deletions dbms/src/Columns/ColumnTuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,14 @@ class ColumnTuple final : public COWPtrHelper<IColumn, ColumnTuple>
insertFrom(src_, n);
}

void insertDisjunctFrom(const IColumn & src_, const std::vector<size_t> & position_vec) override
void insertDisjunctManyFrom(const IColumn & src, const IColumn::Disjuncts & disjuncts) override
{
for (auto position : position_vec)
insertFrom(src_, position);
insertDisjunctManyFromImpl<ColumnTuple>(src, disjuncts);
}

void insertGatherRangeFrom(ColumnRawPtrs & src, const IColumn::GatherRanges & gather_ranges) override
{
insertGatherRangeFromImpl<ColumnTuple>(src, gather_ranges);
}

void insertDefault() override;
Expand Down
17 changes: 3 additions & 14 deletions dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,26 +320,15 @@ ColumnPtr ColumnVector<T>::replicateRange(size_t start_row, size_t end_row, cons
assert(start_row < end_row);
assert(end_row <= size);

auto res = this->create();
if (0 == size)
return this->create();
return res;

auto res = this->create();
typename Self::Container & res_data = res->getData();

res_data.reserve(offsets[end_row - 1]);

IColumn::Offset prev_offset = 0;

for (size_t i = start_row; i < end_row; ++i)
{
size_t size_to_replicate = offsets[i] - prev_offset;
prev_offset = offsets[i];

for (size_t j = 0; j < size_to_replicate; ++j)
{
res_data.push_back(data[i]);
}
}
res_data.resize_fill(offsets[i], data[i]);

return res;
}
Expand Down
Loading