From 361f546fded2ed899630b68b58d6113777a9a9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisen=20=E6=9D=A8?= Date: Tue, 30 Jan 2024 14:36:39 +0800 Subject: [PATCH 1/5] support tracking bucket changes in two level hash table --- src/Common/HashTable/TimeBucketHashMap.h | 14 ++++ src/Common/HashTable/TimeBucketHashTable.h | 57 ++++++++++++++++ src/Common/HashTable/TwoLevelHashMap.h | 14 ++++ src/Common/HashTable/TwoLevelHashTable.h | 46 +++++++++++++ src/Common/HashTable/TwoLevelStringHashMap.h | 14 ++++ .../HashTable/TwoLevelStringHashTable.h | 66 +++++++++++++++++++ 6 files changed, 211 insertions(+) diff --git a/src/Common/HashTable/TimeBucketHashMap.h b/src/Common/HashTable/TimeBucketHashMap.h index 172d1d1c192..685ede30af4 100644 --- a/src/Common/HashTable/TimeBucketHashMap.h +++ b/src/Common/HashTable/TimeBucketHashMap.h @@ -33,6 +33,20 @@ class TimeBucketHashMapTable p.second.forEachValue(func); } + template + void ALWAYS_INLINE forEachValueOfUpdatedBuckets(Func && func, bool reset_updated = false) + { + for (auto & p : this->impls) + { + if (this->isUpdatedBucket(p.first)) + { + p.second.forEachValue(func); + if (reset_updated) + this->resetUpdated(p.first); + } + } + } + typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) { LookupResult it; diff --git a/src/Common/HashTable/TimeBucketHashTable.h b/src/Common/HashTable/TimeBucketHashTable.h index 77c1cdbe8aa..9bff2271aa3 100644 --- a/src/Common/HashTable/TimeBucketHashTable.h +++ b/src/Common/HashTable/TimeBucketHashTable.h @@ -108,7 +108,9 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty using ConstLookupResult = typename Impl::ConstLookupResult; /// FIXME, choose a better perf data structure + /// Usually we don't have too many time buckets std::map impls; + std::unordered_map bucket_updated_flags; Impl sentinel; TimeBucketHashTable() { } @@ -263,6 +265,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty { auto window = windowKey(key_holder); impls[window].emplace(key_holder, it, inserted, hash_value); + bucket_updated_flags[window] = true; /// updated } LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) @@ -289,6 +292,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty { DB::writeIntBinary(p.first); p.second.write(wb); + DB::writeBoolText(bucket_updated_flags[p.first], wb); } } @@ -309,7 +313,12 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty /// Write key and key-value separator DB::writeIntText(p.first, wb); DB::writeChar(KEY_VALUE_SEPARATOR, wb); + /// + DB::writeChar('<', wb); p.second.writeText(wb); + DB::writeChar(',', wb); + DB::writeBoolText(bucket_updated_flags[p.first], wb); + DB::writeChar('>', wb); } DB::writeChar(END_BUCKET_MARKER, wb); } @@ -327,6 +336,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty assert(key != 0); assert(!impls.contains(key)); impls[key].read(rb); + DB::readBoolText(bucket_updated_flags[key], rb); } } @@ -349,7 +359,12 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty assert(key != 0); assert(!impls.contains(key)); + /// + DB::assertChar('<', rb); impls[key].readText(rb); + DB::assertChar(',', rb); + DB::readBoolText(bucket_updated_flags[key], rb); + DB::assertChar('>', rb); } DB::assertChar(END_BUCKET_MARKER, rb); } @@ -402,6 +417,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty last_removed_watermark = it->first; ++removed; + bucket_updated_flags.erase(it->first); it = impls.erase(it); } else @@ -438,4 +454,45 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty return buckets; } + + bool isUpdatedBucket(Int64 bucket_) const + { + auto it = bucket_updated_flags.find(bucket_); + if (it != bucket_updated_flags.end()) + return it->second; + + return false; + } + + void resetUpdated(Int64 bucket_) + { + auto it = bucket_updated_flags.find(bucket_); + if (it != bucket_updated_flags.end()) + it->second = false; + } + + void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + { + DB::writeVarUInt(bucket_updated_flags.size(), wb); + for (const auto & [bucket, updated] : bucket_updated_flags) + { + DB::writeIntBinary(bucket, wb); + DB::writeBoolText(updated, wb); + } + } + + void readBucketUpdatedFlags(DB::ReadBuffer & rb) + { + size_t size = 0; + DB::readVarUInt(size, rb); + bucket_updated_flags.clear(); + Int64 bucket = 0; + bool updated = false; + for (size_t i = 0; i < size; ++i) + { + DB::readIntBinary(bucket, rb); + DB::readBoolText(updated, rb); + bucket_updated_flags.emplace(bucket, updated); + } + } }; diff --git a/src/Common/HashTable/TwoLevelHashMap.h b/src/Common/HashTable/TwoLevelHashMap.h index 3e618ca0a50..5c87d5e6eb0 100644 --- a/src/Common/HashTable/TwoLevelHashMap.h +++ b/src/Common/HashTable/TwoLevelHashMap.h @@ -38,6 +38,20 @@ class TwoLevelHashMapTable : public TwoLevelHashTableimpls[i].forEachValue(func); } + template + void ALWAYS_INLINE forEachValueOfUpdatedBuckets(Func && func, bool reset_updated = false) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) + { + if (this->isUpdatedBucket(i)) + { + this->impls[i].forEachValue(func); + if (reset_updated) + this->resetUpdated(i); + } + } + } + template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { diff --git a/src/Common/HashTable/TwoLevelHashTable.h b/src/Common/HashTable/TwoLevelHashTable.h index 7e865cb48da..4dd13e6e7e4 100644 --- a/src/Common/HashTable/TwoLevelHashTable.h +++ b/src/Common/HashTable/TwoLevelHashTable.h @@ -90,6 +90,7 @@ class TwoLevelHashTable : using ConstLookupResult = typename Impl::ConstLookupResult; Impl impls[NUM_BUCKETS]; + bool bucket_updated_flags[NUM_BUCKETS] = {false}; TwoLevelHashTable() = default; @@ -119,6 +120,7 @@ class TwoLevelHashTable : size_t hash_value = cell->getHash(src); size_t buck = getBucketFromHash(hash_value); impls[buck].insertUniqueNonZero(cell, hash_value); + bucket_updated_flags[buck] = true; } } @@ -271,6 +273,7 @@ class TwoLevelHashTable : { size_t buck = getBucketFromHash(hash_value); impls[buck].emplace(key_holder, it, inserted, hash_value); + bucket_updated_flags[buck] = true; } LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) @@ -292,7 +295,10 @@ class TwoLevelHashTable : void write(DB::WriteBuffer & wb) const { for (UInt32 i = 0; i < NUM_BUCKETS; ++i) + { impls[i].write(wb); + DB::writeBoolText(bucket_updated_flags[i], wb); + } } void writeText(DB::WriteBuffer & wb) const @@ -301,14 +307,22 @@ class TwoLevelHashTable : { if (i != 0) DB::writeChar(',', wb); + /// + DB::writeChar('<', wb); impls[i].writeText(wb); + DB::writeChar(',', wb); + DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeChar('>', wb); } } void read(DB::ReadBuffer & rb) { for (UInt32 i = 0; i < NUM_BUCKETS; ++i) + { impls[i].read(rb); + DB::readBoolText(bucket_updated_flags[i], rb); + } } void readText(DB::ReadBuffer & rb) @@ -317,7 +331,13 @@ class TwoLevelHashTable : { if (i != 0) DB::assertChar(',', rb); + + /// + DB::assertChar('<', rb); impls[i].readText(rb); + DB::assertChar(',', rb); + DB::readBoolText(bucket_updated_flags[i], rb); + DB::assertChar('>', rb); } } @@ -365,5 +385,31 @@ class TwoLevelHashTable : std::iota(bucket_ids.begin(), bucket_ids.end(), 0); return bucket_ids; } + + bool isUpdatedBucket(Int64 bucket_) const + { + return bucket_updated_flags[bucket_]; + } + + void resetUpdated(Int64 bucket_) + { + bucket_updated_flags[bucket_] = false; + } + + void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + { + DB::writeVarUInt(NUM_BUCKETS, wb); + for (const auto & elem : bucket_updated_flags) + DB::writeBoolText(elem, wb); + } + + void readBucketUpdatedFlags(DB::ReadBuffer & rb) + { + size_t size = 0; + DB::readVarUInt(size, rb); + assert(size == NUM_BUCKETS); + for (auto & elem : bucket_updated_flags) + DB::readBoolText(elem, rb); + } /// proton : ends }; diff --git a/src/Common/HashTable/TwoLevelStringHashMap.h b/src/Common/HashTable/TwoLevelStringHashMap.h index a351543edb0..9f2c5ba00d3 100644 --- a/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/src/Common/HashTable/TwoLevelStringHashMap.h @@ -29,6 +29,20 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTableimpls[i].forEachValue(func); } + template + void ALWAYS_INLINE forEachValueOfUpdatedBuckets(Func && func, bool reset_updated = false) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) + { + if (this->isUpdatedBucket(i)) + { + this->impls[i].forEachValue(func); + if (reset_updated) + this->resetUpdated(i); + } + } + } + template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index e0485f5aaa6..e74ae676143 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -39,6 +39,7 @@ class TwoLevelStringHashTable : private boost::noncopyable using ConstLookupResult = typename Impl::ConstLookupResult; Impl impls[NUM_BUCKETS]; + bool bucket_updated_flags[NUM_BUCKETS] = {false}; TwoLevelStringHashTable() {} @@ -53,24 +54,28 @@ class TwoLevelStringHashTable : private boost::noncopyable size_t hash_value = v.getHash(src.m1); size_t buck = getBucketFromHash(hash_value); impls[buck].m1.insertUniqueNonZero(&v, hash_value); + bucket_updated_flags[buck] = true; } for (auto & v : src.m2) { size_t hash_value = v.getHash(src.m2); size_t buck = getBucketFromHash(hash_value); impls[buck].m2.insertUniqueNonZero(&v, hash_value); + bucket_updated_flags[buck] = true; } for (auto & v : src.m3) { size_t hash_value = v.getHash(src.m3); size_t buck = getBucketFromHash(hash_value); impls[buck].m3.insertUniqueNonZero(&v, hash_value); + bucket_updated_flags[buck] = true; } for (auto & v : src.ms) { size_t hash_value = v.getHash(src.ms); size_t buck = getBucketFromHash(hash_value); impls[buck].ms.insertUniqueNonZero(&v, hash_value); + bucket_updated_flags[buck] = true; } } @@ -84,6 +89,9 @@ class TwoLevelStringHashTable : private boost::noncopyable const size_t sz = x.size; if (sz == 0) { + if constexpr (std::is_same_v) + self.bucket_updated_flags[0] = true; + keyHolderDiscardKey(key_holder); return func(self.impls[0].m0, VoidKey{}, 0); } @@ -94,6 +102,9 @@ class TwoLevelStringHashTable : private boost::noncopyable // string keys. Put them to the generic table. auto res = hash(x); auto buck = getBucketFromHash(res); + if constexpr (std::is_same_v) + self.bucket_updated_flags[buck] = true; + return func(self.impls[buck].ms, std::forward(key_holder), res); } @@ -126,6 +137,9 @@ class TwoLevelStringHashTable : private boost::noncopyable } auto res = hash(k8); auto buck = getBucketFromHash(res); + if constexpr (std::is_same_v) + self.bucket_updated_flags[buck] = true; + keyHolderDiscardKey(key_holder); return func(self.impls[buck].m1, k8, res); } @@ -137,6 +151,9 @@ class TwoLevelStringHashTable : private boost::noncopyable n[1] >>= s; auto res = hash(k16); auto buck = getBucketFromHash(res); + if constexpr (std::is_same_v) + self.bucket_updated_flags[buck] = true; + keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); } @@ -148,6 +165,9 @@ class TwoLevelStringHashTable : private boost::noncopyable n[2] >>= s; auto res = hash(k24); auto buck = getBucketFromHash(res); + if constexpr (std::is_same_v) + self.bucket_updated_flags[buck] = true; + keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); } @@ -155,6 +175,9 @@ class TwoLevelStringHashTable : private boost::noncopyable { auto res = hash(x); auto buck = getBucketFromHash(res); + if constexpr (std::is_same_v) + self.bucket_updated_flags[buck] = true; + return func(self.impls[buck].ms, std::forward(key_holder), res); } } @@ -179,7 +202,10 @@ class TwoLevelStringHashTable : private boost::noncopyable void write(DB::WriteBuffer & wb) const { for (UInt32 i = 0; i < NUM_BUCKETS; ++i) + { impls[i].write(wb); + DB::writeBoolText(bucket_updated_flags[i], wb); + } } void writeText(DB::WriteBuffer & wb) const @@ -188,14 +214,22 @@ class TwoLevelStringHashTable : private boost::noncopyable { if (i != 0) DB::writeChar(',', wb); + /// + DB::writeChar('<', wb); impls[i].writeText(wb); + DB::writeChar(',', wb); + DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeChar('>', wb); } } void read(DB::ReadBuffer & rb) { for (UInt32 i = 0; i < NUM_BUCKETS; ++i) + { impls[i].read(rb); + DB::readBoolText(bucket_updated_flags[i], rb); + } } void readText(DB::ReadBuffer & rb) @@ -205,6 +239,12 @@ class TwoLevelStringHashTable : private boost::noncopyable if (i != 0) DB::assertChar(',', rb); impls[i].readText(rb); + /// + DB::assertChar('<', rb); + impls[i].readText(rb); + DB::assertChar(',', rb); + DB::readBoolText(bucket_updated_flags[i], rb); + DB::assertChar('>', rb); } } @@ -252,4 +292,30 @@ class TwoLevelStringHashTable : private boost::noncopyable std::iota(bucket_ids.begin(), bucket_ids.end(), 0); return bucket_ids; } + + bool isUpdatedBucket(Int64 bucket_) const + { + return bucket_updated_flags[bucket_]; + } + + void resetUpdated(Int64 bucket_) + { + bucket_updated_flags[bucket_] = false; + } + + void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + { + DB::writeVarUInt(NUM_BUCKETS, wb); + for (const auto & elem : bucket_updated_flags) + DB::writeBoolText(elem, wb); + } + + void readBucketUpdatedFlags(DB::ReadBuffer & rb) + { + size_t size = 0; + DB::readVarUInt(size, rb); + assert(size == NUM_BUCKETS); + for (auto & elem : bucket_updated_flags) + DB::readBoolText(elem, rb); + } }; From a298a8e166c201069fa9decd3d345606ae1dd3a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisen=20=E6=9D=A8?= Date: Tue, 30 Jan 2024 14:44:53 +0800 Subject: [PATCH 2/5] add expanded data in aggregate state to track updated and retracted --- cmake/autogenerated_versions.txt | 2 +- src/Common/HashMapsTemplate.h | 26 +- src/Common/serde.h | 21 + src/Interpreters/InterpreterSelectQuery.cpp | 3 +- src/Interpreters/Streaming/AggregateDataEx.h | 124 + .../Streaming/AggregationUtils.cpp | 113 + src/Interpreters/Streaming/AggregationUtils.h | 27 + src/Interpreters/Streaming/Aggregator.cpp | 2525 ++++++++--------- src/Interpreters/Streaming/Aggregator.h | 236 +- .../Streaming/AggregatingHelper.cpp | 73 +- .../Transforms/Streaming/AggregatingHelper.h | 11 +- .../Streaming/AggregatingTransform.cpp | 4 +- .../AggregatingTransformWithSubstream.cpp | 8 +- .../Streaming/GlobalAggregatingTransform.cpp | 82 +- .../Streaming/GlobalAggregatingTransform.h | 2 + ...lobalAggregatingTransformWithSubstream.cpp | 64 +- .../GlobalAggregatingTransformWithSubstream.h | 2 + 17 files changed, 1798 insertions(+), 1525 deletions(-) create mode 100644 src/Interpreters/Streaming/AggregateDataEx.h create mode 100644 src/Interpreters/Streaming/AggregationUtils.cpp create mode 100644 src/Interpreters/Streaming/AggregationUtils.h diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 29ccf0cc41c..2f61abb85dc 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,7 +2,7 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 2) +SET(VERSION_REVISION 3) SET(VERSION_MAJOR 1) SET(VERSION_MINOR 4) SET(VERSION_PATCH 1) diff --git a/src/Common/HashMapsTemplate.h b/src/Common/HashMapsTemplate.h index 8eb33d1d1d4..53df5ecd69f 100644 --- a/src/Common/HashMapsTemplate.h +++ b/src/Common/HashMapsTemplate.h @@ -4,7 +4,8 @@ #include #include #include -#include +#include +#include namespace DB { @@ -24,9 +25,14 @@ void serializeHashMap(const Map & map, MappedSerializer && mapped_serializer, Wr }); } -template +template void deserializeHashMap(Map & map, MappedDeserializer && mapped_deserializer, Arena & pool, ReadBuffer & rb) { + using Mapped = std::decay_t::mapped_type; + + constexpr bool is_string_hash_map + = std::is_same_v, StringHashMap> || std::is_same_v, TwoLevelStringHashMap>; + /// For StringHashMap or TwoLevelStringHashMap, it requires StringRef key padded 8 keys(left and right). /// So far, the Arena's MemoryChunk is always padding right 15, so we just pad left 8 here if constexpr (is_string_hash_map) @@ -60,6 +66,20 @@ void deserializeHashMap(Map & map, MappedDeserializer && mapped_deserializer, Ar pool.setPaddingLeft(0); } +template +void serializeTwoLevelHashMap(const Map & map, MappedSerializer && mapped_serializer, WriteBuffer & wb) +{ + serializeHashMap(map, std::move(mapped_serializer), wb); + map.writeBucketUpdatedFlags(wb); +} + +template +void deserializeTwoLevelHashMap(Map & map, MappedDeserializer && mapped_deserializer, Arena & pool, ReadBuffer & rb) +{ + deserializeHashMap(map, std::move(mapped_deserializer), pool, rb); + map.readBucketUpdatedFlags(rb); /// recover buckets updated status +} + /// HashMapsTemplate is a taken from HashJoin class and make it standalone /// and could be shared among different components @@ -187,7 +207,7 @@ struct HashMapsTemplate #define M(NAME) \ case HashType::NAME: { \ assert(NAME); \ - deserializeHashMap(*NAME, mapped_deserializer, pool, rb); \ + deserializeHashMap(*NAME, mapped_deserializer, pool, rb); \ return; \ } APPLY_FOR_HASH_KEY_VARIANTS(M) diff --git a/src/Common/serde.h b/src/Common/serde.h index ce44f491337..d6e51e17dc0 100644 --- a/src/Common/serde.h +++ b/src/Common/serde.h @@ -27,6 +27,27 @@ void ALWAYS_INLINE deserialize(S & s, RB & rb, VersionType version, Args &&... a s.deserialize(rb, version, std::forward(args)...); } +/// With owned versions +template +concept Serializable + = requires(const S & s, WB & wb, Args &&... args) { s.serialize(wb, std::forward(args)...); }; + +template +concept Deserializable + = requires(S & s, RB & rb, Args &&... args) { s.deserialize(rb, std::forward(args)...); }; + +template S> +void ALWAYS_INLINE serialize(const S & s, WB & wb, Args &&... args) +{ + s.serialize(wb, std::forward(args)...); +} + +template S> +void ALWAYS_INLINE deserialize(S & s, RB & rb, Args &&... args) +{ + s.deserialize(rb, std::forward(args)...); +} + /// macro tag to indicate the data members or struct or class will /// be serialized / deserialized via network or file system IO. /// Hence, data structure versioning / backward / forward compatibility diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ae40014e4ba..66d99a2c0fb 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3261,7 +3261,8 @@ void InterpreterSelectQuery::executeStreamingAggregation( streaming_group_by, delta_col_pos, window_keys_num, - query_info.streaming_window_params); + query_info.streaming_window_params, + data_stream_semantic_pair.isChangelogOutput()); auto merge_threads = max_streams; auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads diff --git a/src/Interpreters/Streaming/AggregateDataEx.h b/src/Interpreters/Streaming/AggregateDataEx.h new file mode 100644 index 00000000000..2b969018a7d --- /dev/null +++ b/src/Interpreters/Streaming/AggregateDataEx.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +using AggregateDataPtr = char *; +using ConstAggregateDataPtr = const char *; + +namespace Streaming +{ +SERDE struct UpdatedDataEx +{ + static ALWAYS_INLINE UpdatedDataEx & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + static ALWAYS_INLINE const UpdatedDataEx & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + + static ALWAYS_INLINE bool isEmpty(ConstAggregateDataPtr __restrict place) { return data(place).final_count == 0; } + static ALWAYS_INLINE bool isUpdated(ConstAggregateDataPtr __restrict place) { return data(place).updated_since_last_finalization; } + static ALWAYS_INLINE void setUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = true; } + static ALWAYS_INLINE void resetUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = false; } + + static void addBatch(size_t row_begin, size_t row_end, AggregateDataPtr * places, const IColumn * delta_col) + { + if (delta_col == nullptr) + { + for (size_t i = row_begin; i < row_end; ++i) + if (places[i]) + data(places[i]).add(); + } + else + { + const auto & delta_flags = assert_cast(*delta_col).getData(); + for (size_t i = row_begin; i < row_end; ++i) + { + if (places[i]) + { + if (delta_flags[i] >= 0) + data(places[i]).add(); + else + data(places[i]).negate(); + } + } + } + } + + static void addBatchSinglePlace(size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn * delta_col) + { + if (!place) + return; + + auto & metadata = data(place); + if (delta_col == nullptr) + metadata.final_count += row_end - row_begin; + else + { + const auto & delta_flags = assert_cast(*delta_col).getData(); + metadata.final_count = std::accumulate(delta_flags.begin(), delta_flags.end(), metadata.final_count); + } + + metadata.updated_since_last_finalization = true; + } + + static void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & wb) + { + const auto & data_ex = data(place); + writeIntBinary(data_ex.final_count, wb); + writeBoolText(data_ex.updated_since_last_finalization, wb); + } + + static void deserialize(AggregateDataPtr __restrict place, ReadBuffer & rb) + { + auto & data_ex = data(place); + readIntBinary(data_ex.final_count, rb); + readBoolText(data_ex.updated_since_last_finalization, rb); + } + + ALWAYS_INLINE void add() + { + ++final_count; + updated_since_last_finalization = true; + } + + ALWAYS_INLINE void negate() + { + --final_count; + updated_since_last_finalization = true; + } + + /// Used for tracking the group is empty or not + UInt32 final_count = 0; + + /// Used for tracking the group is updated or not + bool updated_since_last_finalization = true; +}; + +SERDE struct RetractedDataEx : UpdatedDataEx +{ + static ALWAYS_INLINE AggregateDataPtr & getRetracted(AggregateDataPtr & place) { return reinterpret_cast(place)->retracted_data; } + static ALWAYS_INLINE bool hasRetracted(ConstAggregateDataPtr __restrict place) { return reinterpret_cast(place)->retracted_data; } + + template + static ALWAYS_INLINE AggregateDataPtr & getData(AggregateDataPtr & place) + { + if constexpr (use_retracted_data) + return getRetracted(place); + else + return place; + } + + /// Used for tracking group changes + AggregateDataPtr retracted_data = nullptr; +}; + +enum class ExpandedDataType : uint8_t +{ + None = 0, + Updated = 1, /// Allow tracking group is empty or updated + UpdatedWithRetracted = 2, /// Allow tracking group is empty or updated and changes +}; + +} +} diff --git a/src/Interpreters/Streaming/AggregationUtils.cpp b/src/Interpreters/Streaming/AggregationUtils.cpp new file mode 100644 index 00000000000..b40851b65e6 --- /dev/null +++ b/src/Interpreters/Streaming/AggregationUtils.cpp @@ -0,0 +1,113 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +namespace Streaming +{ +OutputBlockColumns prepareOutputBlockColumns( + const Aggregator::Params & params, + const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions, + const Block & res_header, + Arenas & aggregates_pools, + bool final, + size_t rows) +{ + MutableColumns key_columns(params.keys_size); + MutableColumns aggregate_columns(params.aggregates_size); + MutableColumns final_aggregate_columns(params.aggregates_size); + Aggregator::AggregateColumnsData aggregate_columns_data(params.aggregates_size); + + for (size_t i = 0; i < params.keys_size; ++i) + { + key_columns[i] = res_header.safeGetByPosition(i).type->createColumn(); + key_columns[i]->reserve(rows); + } + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (!final) + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + aggregate_columns[i] = res_header.getByName(aggregate_column_name).type->createColumn(); + + /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. + ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + + for (auto & pool : aggregates_pools) + column_aggregate_func.addArena(pool); + + aggregate_columns_data[i] = &column_aggregate_func.getData(); + aggregate_columns_data[i]->reserve(rows); + } + else + { + final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); + final_aggregate_columns[i]->reserve(rows); + + if (aggregate_functions[i]->isState()) + { + auto callback = [&](IColumn & subcolumn) + { + /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. + if (auto * column_aggregate_func = typeid_cast(&subcolumn)) + for (auto & pool : aggregates_pools) + column_aggregate_func->addArena(pool); + }; + + callback(*final_aggregate_columns[i]); + final_aggregate_columns[i]->forEachSubcolumnRecursively(callback); + } + } + } + + if (key_columns.size() != params.keys_size) + throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; + + std::vector raw_key_columns; + raw_key_columns.reserve(key_columns.size()); + for (auto & column : key_columns) + raw_key_columns.push_back(column.get()); + + return { + .key_columns = std::move(key_columns), + .raw_key_columns = std::move(raw_key_columns), + .aggregate_columns = std::move(aggregate_columns), + .final_aggregate_columns = std::move(final_aggregate_columns), + .aggregate_columns_data = std::move(aggregate_columns_data), + }; +} + +Block finalizeBlock(const Aggregator::Params & params, const Block & res_header, OutputBlockColumns && out_cols, bool final, size_t rows) +{ + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; + + Block res = res_header.cloneEmpty(); + + for (size_t i = 0; i < params.keys_size; ++i) + res.getByPosition(i).column = std::move(key_columns[i]); + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + if (final) + res.getByName(aggregate_column_name).column = std::move(final_aggregate_columns[i]); + else + res.getByName(aggregate_column_name).column = std::move(aggregate_columns[i]); + } + + /// Change the size of the columns-constants in the block. + size_t columns = res_header.columns(); + for (size_t i = 0; i < columns; ++i) + if (isColumnConst(*res.getByPosition(i).column)) + res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows); + + return res; +} +} +} diff --git a/src/Interpreters/Streaming/AggregationUtils.h b/src/Interpreters/Streaming/AggregationUtils.h new file mode 100644 index 00000000000..6f6875e72fc --- /dev/null +++ b/src/Interpreters/Streaming/AggregationUtils.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB::Streaming +{ + +struct OutputBlockColumns +{ + MutableColumns key_columns; + std::vector raw_key_columns; + MutableColumns aggregate_columns; + MutableColumns final_aggregate_columns; + Aggregator::AggregateColumnsData aggregate_columns_data; +}; + + +OutputBlockColumns prepareOutputBlockColumns( + const Aggregator::Params & params, + const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions, + const Block & res_header, + Arenas & aggregates_pools, + bool final, + size_t rows); + +Block finalizeBlock(const Aggregator::Params & params, const Block & res_header, OutputBlockColumns && out_cols, bool final, size_t rows); +} diff --git a/src/Interpreters/Streaming/Aggregator.cpp b/src/Interpreters/Streaming/Aggregator.cpp index 7273c6ab81d..f1937f482d7 100644 --- a/src/Interpreters/Streaming/Aggregator.cpp +++ b/src/Interpreters/Streaming/Aggregator.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +80,9 @@ inline void initDataVariants( result.keys_size = params.keys_size; result.key_sizes = key_sizes; result.init(method_chosen); + + if (params.tracking_changes) + result.resetRetractedPool(); } Columns materializeKeyColumns(Columns & columns, ColumnRawPtrs & key_columns, const Aggregator::Params & params, bool is_low_cardinality) @@ -103,6 +107,73 @@ Columns materializeKeyColumns(Columns & columns, ColumnRawPtrs & key_columns, co return materialized_columns; } + +Arena * getArena(AggregatedDataVariants & variants, AggregateStateType type) +{ + if (type == AggregateStateType::OnlyRetracted) + return variants.retracted_pool.get(); + else + return variants.aggregates_pool; +} + +template +BlocksList concurrentBucketConvert(ThreadPool * thread_pool, const std::vector & buckets, Arena * arena, Arenas & pools, BucketConverter && bucket_converter) +{ + std::atomic next_bucket_idx_to_merge = 0; + auto converter = [&](Arena * pool, ThreadGroupStatusPtr thread_group, const std::atomic_flag * cancelled) { + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); + + BlocksList blocks; + while (true) + { + if (cancelled && cancelled->test()) + break; + + UInt32 bucket_idx = next_bucket_idx_to_merge.fetch_add(1); + if (bucket_idx >= buckets.size()) + break; + + auto bucket = buckets[bucket_idx]; + blocks.splice(blocks.end(), bucket_converter(bucket, pool)); + } + return blocks; + }; + + size_t num_threads = thread_pool ? std::min(thread_pool->getMaxThreads(), buckets.size()) : 1; + if (num_threads <= 1) + return converter(arena, nullptr, nullptr); + + /// Process in parallel + for (size_t i = pools.size(); i < num_threads; ++i) + pools.push_back(std::make_shared()); + + auto results = std::make_shared>(); + results->resize(num_threads); + thread_pool->setMaxThreads(num_threads); + { + std::atomic_flag cancelled; + SCOPE_EXIT_SAFE(cancelled.test_and_set();); + + for (size_t thread_id = 0; thread_id < num_threads; ++thread_id) + thread_pool->scheduleOrThrowOnError([&pools, thread_id, group = CurrentThread::getGroup(), results, &converter, &cancelled] { + (*results)[thread_id] = converter(pools[thread_id].get(), group, &cancelled); + }); + + thread_pool->wait(); + } + + BlocksList blocks; + for (auto & result : *results) + blocks.splice(blocks.end(), std::move(result)); + + return blocks; +} } AggregatedDataVariants::~AggregatedDataVariants() @@ -120,6 +191,31 @@ AggregatedDataVariants::~AggregatedDataVariants() } } +void AggregatedDataVariants::reset() +{ + assert(aggregator); + /// Clear states + if (!aggregator->all_aggregates_has_trivial_destructor) + aggregator->destroyAllAggregateStates(*this); + + /// Clear hash map + switch (type) + { + case AggregatedDataVariants::Type::EMPTY: break; + case AggregatedDataVariants::Type::without_key: break; + + #define M(NAME, IS_TWO_LEVEL) \ + case AggregatedDataVariants::Type::NAME: NAME.reset(); break; + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) + #undef M + } + invalidate(); + + /// Reset pool + resetAggregatesPool(); + retracted_pool.reset(); +} + void AggregatedDataVariants::convertToTwoLevel() { if (aggregator) @@ -143,6 +239,17 @@ void AggregatedDataVariants::convertToTwoLevel() } } +void AggregatedDataVariants::serialize(WriteBuffer & wb, const Aggregator & aggregator_) const +{ + aggregator_.checkpoint(*this, wb); +} + +void AggregatedDataVariants::deserialize(ReadBuffer & rb, const Aggregator & aggregator_) +{ + aggregator = &aggregator_; + aggregator_.recover(*this, rb); +} + Block Aggregator::getHeader(bool final) const { return params.getHeader(final); @@ -282,8 +389,21 @@ Aggregator::Aggregator(const Params & params_) : params(params_), log(&Poco::Lo total_size_of_aggregate_states = 0; all_aggregates_has_trivial_destructor = true; + if (params.tracking_changes) + { + total_size_of_aggregate_states = sizeof(RetractedDataEx); + align_aggregate_states = alignof(RetractedDataEx); + expanded_data_type = ExpandedDataType::UpdatedWithRetracted; + } + else if (params.tracking_updated) + { + total_size_of_aggregate_states = sizeof(UpdatedDataEx); + align_aggregate_states = alignof(UpdatedDataEx); + expanded_data_type = ExpandedDataType::Updated; + } + // aggregate_states will be aligned as below: - // |<-- state_1 -->|<-- pad_1 -->|<-- state_2 -->|<-- pad_2 -->| ..... + // |<-- [ExpandedDataEx] -->||<-- state_1 -->|<-- pad_1 -->|<-- state_2 -->|<-- pad_2 -->| ..... // // pad_N will be used to match alignment requirement for each next state. // The address of state_1 is aligned based on maximum alignment requirements in states @@ -650,12 +770,43 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethodTimeBucketTwoLev } /// proton: ends -template +template void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const { + /// Initialize reserved UpdatedDataEx + assert(aggregate_data); + if constexpr (!skip_expanded_data) + { + if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) + new (aggregate_data) RetractedDataEx(); + else if (expanded_data_type == ExpandedDataType::Updated) + new (aggregate_data) UpdatedDataEx(); + } + + if constexpr (use_compiled_functions) + { + assert(compiled_aggregate_functions_holder); + const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; + compiled_aggregate_functions.create_aggregate_states_function(aggregate_data); + +#if defined(MEMORY_SANITIZER) + + /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. + for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) + { + if (!is_aggregate_function_compiled[aggregate_function_index]) + continue; + + auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index]; + auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); + __msan_unpoison(aggregate_data_with_offset, data_size); + } +#endif + } + for (size_t j = 0; j < params.aggregates_size; ++j) { - if constexpr (skip_compiled_aggregate_functions) + if constexpr (use_compiled_functions) if (is_aggregate_function_compiled[j]) continue; @@ -671,7 +822,7 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const { for (size_t rollback_j = 0; rollback_j < j; ++rollback_j) { - if constexpr (skip_compiled_aggregate_functions) + if constexpr (use_compiled_functions) if (is_aggregate_function_compiled[j]) continue; @@ -719,23 +870,18 @@ template AggregateDataPtr overflow_row) const { typename Method::State state(key_columns, key_sizes, aggregation_state_cache); + assert(!no_more_keys); - if (!no_more_keys) - { #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); - } - else -#endif - { - return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); - } + /// TODO: So far not support compiled functions with expanded data + if (compiled_aggregate_functions_holder && !hasExpandedData()) + { + return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); } else +#endif { - return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); + return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); } } @@ -750,7 +896,7 @@ template AggregateDataPtr overflow_row) const { /// Optimization for special case when there are no aggregate functions. - if (params.aggregates_size == 0) + if (params.aggregates_size == 0 && !hasExpandedData()) { if constexpr (no_more_keys) return false; @@ -778,7 +924,7 @@ template } } - if (!has_arrays) + if (!has_arrays && !hasExpandedData()) { for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -789,8 +935,9 @@ template inst->state_offset, [&](AggregateDataPtr & aggregate_data) { - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); + auto data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(data); + aggregate_data = data; }, state.getKeyData(), inst->batch_arguments, @@ -821,66 +968,24 @@ template { AggregateDataPtr aggregate_data = nullptr; - if constexpr (!no_more_keys) - { - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_aggregate_functions.create_aggregate_states_function(aggregate_data); - if (compiled_aggregate_functions.functions_count != aggregate_functions.size()) - { - static constexpr bool skip_compiled_aggregate_functions = true; - createAggregateStates(aggregate_data); - } - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; + assert(!no_more_keys); + auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif - } - else -#endif - { - createAggregateStates(aggregate_data); - } + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); - emplace_result.setMapped(aggregate_data); - } - else - aggregate_data = emplace_result.getMapped(); + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); - assert(aggregate_data != nullptr); + emplace_result.setMapped(aggregate_data); } else - { - /// Add only if the key already exists. - auto find_result = state.findKey(method.data, i, *aggregates_pool); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); - else - aggregate_data = overflow_row; - } + aggregate_data = emplace_result.getMapped(); + assert(aggregate_data != nullptr); places[i] = aggregate_data; } @@ -938,6 +1043,9 @@ template } } + if (hasExpandedData()) + UpdatedDataEx::addBatch(row_begin, row_end, places.get(), aggregate_instructions ? aggregate_instructions->delta_column : nullptr); + return need_finalization; } @@ -1026,28 +1134,12 @@ template } } - return should_finalize; -} + if (hasExpandedData()) + UpdatedDataEx::addBatchSinglePlace(row_begin, row_end, res, aggregate_instructions ? aggregate_instructions->delta_column : nullptr); -void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions, - Arena * arena, - const IColumn * delta_col) -{ - /// Adding values - for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) - { - if (inst->offsets) - inst->batch_that->addBatchSinglePlaceFromInterval(inst->offsets[row_begin], inst->offsets[row_end - 1], res + inst->state_offset, inst->batch_arguments, arena, -1, delta_col); - else - inst->batch_that->addBatchSinglePlaceFromInterval(row_begin, row_end, res + inst->state_offset, inst->batch_arguments, arena, -1, delta_col); - } + return should_finalize; } - void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns & aggregate_columns, Columns & materialized_columns, AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder) const { @@ -1170,14 +1262,14 @@ std::pair Aggregator::executeOnBlock( /// For the case when there are no keys (all aggregate into one row). if (result.type == AggregatedDataVariants::Type::without_key) { - /// TODO: Enable compilation after investigation -// #if USE_EMBEDDED_COMPILER -// if (compiled_aggregate_functions_holder) -// { -// executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); -// } -// else -// #endif + /// TODO: So far not support compiled functions with expanded data +#if USE_EMBEDDED_COMPILER + if (compiled_aggregate_functions_holder && !hasExpandedData()) + { + need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); + } + else +#endif { need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); } @@ -1274,283 +1366,70 @@ Block Aggregator::convertOneBucketToBlockImpl( Arena * arena, bool final, bool clear_states, - size_t bucket) const + Int64 bucket, + AggregateStateType type) const { - Block block = prepareBlockAndFill(data_variants, final, clear_states, method.data.impls[bucket].size(), - [bucket, &method, arena, this] ( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_, - bool clear_states_) - { - convertToBlockImpl(method, method.data.impls[bucket], - key_columns, aggregate_columns, final_aggregate_columns, arena, final_, clear_states_); - }); - - block.info.bucket_num = static_cast(bucket); + Block block = convertToBlockImpl(method, method.data.impls[bucket], arena, data_variants.aggregates_pools, final, method.data.impls[bucket].size(), clear_states, type); + block.info.bucket_num = static_cast(bucket); + method.data.resetUpdated(bucket); /// finalized return block; } -Block Aggregator::convertOneBucketToBlock(AggregatedDataVariants & variants, bool final, ConvertAction action, size_t bucket) const +template +void Aggregator::writeToTemporaryFileImpl( + AggregatedDataVariants & data_variants, + Method & method, + NativeWriter & out) const { - auto method = variants.type; - Block block; - bool clear_states = shouldClearStates(action, final); - if (false) {} // NOLINT -#define M(NAME) \ - else if (method == AggregatedDataVariants::Type::NAME) \ - block = convertOneBucketToBlockImpl(variants, *variants.NAME, variants.aggregates_pool, final, clear_states, bucket); - - APPLY_FOR_VARIANTS_TIME_BUCKET_TWO_LEVEL(M) -#undef M - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + size_t max_temporary_block_size_rows = 0; + size_t max_temporary_block_size_bytes = 0; - return block; -} + auto update_max_sizes = [&](const Block & block) + { + size_t block_size_rows = block.rows(); + size_t block_size_bytes = block.bytes(); -Block Aggregator::mergeAndConvertOneBucketToBlock( - ManyAggregatedDataVariants & variants, bool final, ConvertAction action, size_t bucket) const -{ - auto prepared_data_ptr = prepareVariantsToMerge(variants); - if (prepared_data_ptr->empty()) - return {}; + if (block_size_rows > max_temporary_block_size_rows) + max_temporary_block_size_rows = block_size_rows; + if (block_size_bytes > max_temporary_block_size_bytes) + max_temporary_block_size_bytes = block_size_bytes; + }; - auto & merged_data = *prepared_data_ptr->at(0); - auto method = merged_data.type; - Arena * arena = merged_data.aggregates_pool; - bool clear_states = shouldClearStates(action, final); - Block block; + for (auto bucket : method.data.buckets()) + { + Block block = convertOneBucketToBlockImpl(data_variants, method, data_variants.aggregates_pool, false, false, bucket); + out.write(block); + update_max_sizes(block); + } - if (false) {} // NOLINT -#define M(NAME) \ - else if (method == AggregatedDataVariants::Type::NAME) \ - { \ - mergeBucketImpl(*prepared_data_ptr, final, clear_states, bucket, arena); \ - block = convertOneBucketToBlockImpl(merged_data, *merged_data.NAME, arena, final, clear_states, bucket); \ + if (params.overflow_row) + { + Block block = prepareBlockAndFillWithoutKey(data_variants, false, true, false); + out.write(block); + update_max_sizes(block); } - APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) -#undef M + /// Pass ownership of the aggregate functions states: + /// `data_variants` will not destroy them in the destructor, they are now owned by ColumnAggregateFunction objects. + data_variants.aggregator = nullptr; - return block; + LOG_DEBUG(log, "Max size of temporary block: {} rows, {}.", max_temporary_block_size_rows, ReadableSize(max_temporary_block_size_bytes)); } -BlocksList -Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, ConvertAction action, size_t max_threads) const -{ - auto prepared_data_ptr = prepareVariantsToMerge(data_variants); - if (prepared_data_ptr->empty()) - return {}; - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); +bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const +{ + if (!no_more_keys && params.max_rows_to_group_by && result_size > params.max_rows_to_group_by) + { + switch (params.group_by_overflow_mode) + { + case OverflowMode::THROW: + throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result_size) + + " rows, maximum: " + toString(params.max_rows_to_group_by), + ErrorCodes::TOO_MANY_ROWS); - bool clear_states = shouldClearStates(action, final); - BlocksList blocks; - auto & first = *prepared_data_ptr->at(0); - if (first.type == AggregatedDataVariants::Type::without_key) - blocks.emplace_back(mergeAndConvertWithoutKeyToBlock(*prepared_data_ptr, final, clear_states)); - else if (first.isTwoLevel()) - blocks.splice(blocks.end(), mergeAndConvertTwoLevelToBlocks(*prepared_data_ptr, final, max_threads, clear_states)); - else - blocks.emplace_back(mergeAndConvertSingleLevelToBlock(*prepared_data_ptr, final, clear_states)); - - if (clear_states) - clearDataVariants(first); - - return blocks; -} - -Block Aggregator::mergeAndConvertWithoutKeyToBlock(ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states) const -{ - auto & first = *non_empty_data.at(0); - assert(first.type == AggregatedDataVariants::Type::without_key); - mergeWithoutKeyDataImpl(non_empty_data, clear_states); - return prepareBlockAndFillWithoutKey(first, final, false, clear_states); -} - -Block Aggregator::mergeAndConvertSingleLevelToBlock(ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states) const -{ - auto & first = *non_empty_data.at(0); - if (false) - { - } // NOLINT -#define M(NAME) \ - else if (first.type == AggregatedDataVariants::Type::NAME) \ - mergeSingleLevelDataImpl(non_empty_data, clear_states); - - APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) -#undef M - else throw Exception("Unknown single level aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - return prepareBlockAndFillSingleLevel(first, final, clear_states); -} - -BlocksList Aggregator::mergeAndConvertTwoLevelToBlocks( - ManyAggregatedDataVariants & non_empty_data, bool final, size_t max_threads, bool clear_states) const -{ - auto & first = *non_empty_data.at(0); - assert(first.isTwoLevel()); -#define M(NAME) \ - else if (first.type == AggregatedDataVariants::Type::NAME) return mergeAndConvertTwoLevelToBlocksImpl< \ - decltype(first.NAME)::element_type>(non_empty_data, final, max_threads, clear_states); - - if (false) - { - } // NOLINT - APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) -#undef M - else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); -} - -template -BlocksList Aggregator::mergeAndConvertTwoLevelToBlocksImpl( - ManyAggregatedDataVariants & non_empty_data, bool final, size_t max_threads, bool clear_states) const -{ - auto & first = *non_empty_data.at(0); - - std::vector buckets; - if (first.isStaticBucketTwoLevel()) - buckets = getDataVariant(first).data.buckets(); - else - { - assert(first.isTimeBucketTwoLevel()); - std::set buckets_set; - for (auto & data_variants : non_empty_data) - { - auto tmp_buckets = getDataVariant(*data_variants).data.buckets(); - buckets_set.insert(tmp_buckets.begin(), tmp_buckets.end()); - } - buckets.assign(buckets_set.begin(), buckets_set.end()); - } - - std::atomic next_bucket_idx_to_merge = 0; - auto converter = [&](size_t thread_id, ThreadGroupStatusPtr thread_group, const std::atomic_flag * cancelled) { - SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachQueryIfNotDetached();); - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - - BlocksList blocks; - while (true) - { - if (cancelled && cancelled->test()) - break; - - UInt32 bucket_idx = next_bucket_idx_to_merge.fetch_add(1); - if (bucket_idx >= buckets.size()) - break; - - auto bucket = buckets[bucket_idx]; - - /// Merge one bucket into first one - Arena * arena = first.aggregates_pools.at(thread_id).get(); - mergeBucketImpl(non_empty_data, final, clear_states, bucket, arena); - auto & method = getDataVariant(first); - if (method.data.impls[bucket].empty()) - continue; - - /// Convert one bucket of first one - blocks.emplace_back(convertOneBucketToBlockImpl(first, method, arena, final, clear_states, bucket)); - } - - if (clear_states) - { - for (size_t i = 1; i < non_empty_data.size(); ++i) - clearDataVariants(*non_empty_data[i]); - } - - return blocks; - }; - - auto num_threads = std::min(max_threads, buckets.size()); - if (num_threads <= 1) - return converter(0, nullptr, nullptr); - - /// Process in parallel - /// proton FIXME : separate final vs non-final converting. For non-final converting, we don't need - /// each arena for each thread. - for (size_t i = first.aggregates_pools.size(); i < num_threads; ++i) - first.aggregates_pools.push_back(std::make_shared()); - - auto results = std::make_shared>(); - results->resize(num_threads); - ThreadPool thread_pool(num_threads); - { - std::atomic_flag cancelled; - SCOPE_EXIT_SAFE(cancelled.test_and_set();); - - for (size_t thread_id = 0; thread_id < num_threads; ++thread_id) - thread_pool.scheduleOrThrowOnError([thread_id, group = CurrentThread::getGroup(), results, &converter, &cancelled] { - (*results)[thread_id] = converter(thread_id, group, &cancelled); - }); - - thread_pool.wait(); - } - - BlocksList blocks; - for (auto & result : *results) - blocks.splice(blocks.end(), std::move(result)); - - return blocks; -} - -template -void Aggregator::writeToTemporaryFileImpl( - AggregatedDataVariants & data_variants, - Method & method, - NativeWriter & out) const -{ - size_t max_temporary_block_size_rows = 0; - size_t max_temporary_block_size_bytes = 0; - - auto update_max_sizes = [&](const Block & block) - { - size_t block_size_rows = block.rows(); - size_t block_size_bytes = block.bytes(); - - if (block_size_rows > max_temporary_block_size_rows) - max_temporary_block_size_rows = block_size_rows; - if (block_size_bytes > max_temporary_block_size_bytes) - max_temporary_block_size_bytes = block_size_bytes; - }; - - for (auto bucket : method.data.buckets()) - { - Block block = convertOneBucketToBlockImpl(data_variants, method, data_variants.aggregates_pool, false, false, bucket); - out.write(block); - update_max_sizes(block); - } - - if (params.overflow_row) - { - Block block = prepareBlockAndFillWithoutKey(data_variants, false, true, false); - out.write(block); - update_max_sizes(block); - } - - /// Pass ownership of the aggregate functions states: - /// `data_variants` will not destroy them in the destructor, they are now owned by ColumnAggregateFunction objects. - data_variants.aggregator = nullptr; - - LOG_DEBUG(log, "Max size of temporary block: {} rows, {}.", max_temporary_block_size_rows, ReadableSize(max_temporary_block_size_bytes)); -} - - -bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const -{ - if (!no_more_keys && params.max_rows_to_group_by && result_size > params.max_rows_to_group_by) - { - switch (params.group_by_overflow_mode) - { - case OverflowMode::THROW: - throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result_size) - + " rows, maximum: " + toString(params.max_rows_to_group_by), - ErrorCodes::TOO_MANY_ROWS); - - case OverflowMode::BREAK: - return false; + case OverflowMode::BREAK: + return false; case OverflowMode::ANY: no_more_keys = true; @@ -1566,51 +1445,51 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const template -void Aggregator::convertToBlockImpl( - Method & method, - Table & data, - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool final, - bool clear_states) const +Block Aggregator::convertToBlockImpl( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, AggregateStateType type) const { if (data.empty()) - return; - - if (key_columns.size() != params.keys_size) - throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; + { + auto && out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, rows); + return {finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows)}; + } - std::vector raw_key_columns; - raw_key_columns.reserve(key_columns.size()); - for (auto & column : key_columns) - raw_key_columns.push_back(column.get()); + Block res; if (final) { #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) + /// TODO: So far not support compiled functions with expanded data + if (compiled_aggregate_functions_holder && !hasExpandedData()) { static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization; - convertToBlockImplFinal(method, data, std::move(raw_key_columns), final_aggregate_columns, arena, clear_states); + assert(type == AggregateStateType::Normal); + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); } else #endif { - convertToBlockImplFinal(method, data, std::move(raw_key_columns), final_aggregate_columns, arena, clear_states); + if (type == AggregateStateType::OnlyUpdated) + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); + else if (type == AggregateStateType::OnlyRetracted) + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); + else + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); } } else { - convertToBlockImplNotFinal(method, data, std::move(raw_key_columns), aggregate_columns); + assert(type == AggregateStateType::Normal); + res = convertToBlockImplNotFinal(method, data, aggregates_pools, rows); } /// In order to release memory early. /// proton: starts. For streaming aggr, we hold on to the states - if (clear_states) + if (clear_states && type == AggregateStateType::Normal) data.clearAndShrink(); /// proton: ends + + return res; } @@ -1618,7 +1497,8 @@ template inline void Aggregator::insertAggregatesIntoColumns( Mapped & mapped, MutableColumns & final_aggregate_columns, - Arena * arena) const + Arena * arena, + bool clear_states) const { /** Final values of aggregate functions are inserted to columns. * Then states of aggregate functions, that are not longer needed, are destroyed. @@ -1657,7 +1537,7 @@ inline void Aggregator::insertAggregatesIntoColumns( /// proton: starts /// For streaming aggregation, we hold up to the states - if (params.keep_state) + if (!clear_states) { if (exception) std::rethrow_exception(exception); @@ -1690,76 +1570,14 @@ inline void Aggregator::insertAggregatesIntoColumns( std::rethrow_exception(exception); } - -template -void NO_INLINE Aggregator::convertToBlockImplFinal( - Method & method, - Table & data, - std::vector key_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool clear_states) const +template +Block Aggregator::insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool clear_states) const { - if constexpr (Method::low_cardinality_optimization) - { - if (data.hasNullKeyData()) - { - key_columns[0]->insertDefault(); - insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns, arena); - } - } - - auto shuffled_key_sizes = method.shuffleKeyColumns(key_columns, key_sizes); - const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; - - PaddedPODArray places; - places.reserve(data.size()); - - data.forEachValue([&](const auto & key, auto & mapped) - { - /// Ingore invalid mapped, there are two cases: - /// 1) mapped was destroyed (it's a bug) - /// 2) no mapped states for retracted data (means it's an new group key, but no retracted data) - if (!mapped) - return; - - /// For UDA with own emit strategy, there are two special cases to be handled: - /// 1. not all groups need to be emitted. therefore proton needs to pick groups - /// that should emits, and only emit those groups while keep other groups unchanged. - /// 2. a single block trigger multiple emits. In this case, proton need insert the - /// same key multiple times for each emit result of this group. - - /// for non-UDA or UDA without emit strategy, 'should_emit' is always true. - /// For UDA with emit strategy, it is true only if the group should emit. - size_t emit_times = 1; - if (params.group_by == Params::GroupBy::USER_DEFINED) - { - assert(aggregate_functions.size() == 1); - emit_times = aggregate_functions[0]->getEmitTimes(mapped + offsets_of_aggregate_states[0]); - } - - if (emit_times > 0) - { - /// duplicate key for each emit - for (size_t i = 0; i < emit_times; i++) - method.insertKeyIntoColumns(key, key_columns, key_sizes_ref); - - places.emplace_back(mapped); - - /// Mark the cell as destroyed so it will not be destroyed in destructor. - /// proton: starts. Here we push the `mapped` to `places`, for streaming - /// case, we don't want aggregate function to destroy the places - if (clear_states) - mapped = nullptr; - } - }); - std::exception_ptr exception; size_t aggregate_functions_destroy_index = 0; try { -#if USE_EMBEDDED_COMPILER if constexpr (use_compiled_functions) { /** For JIT compiled functions we need to resize columns before pass them into compiled code. @@ -1774,7 +1592,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( if (!is_aggregate_function_compiled[i]) continue; - auto & final_aggregate_column = final_aggregate_columns[i]; + auto & final_aggregate_column = out_cols.final_aggregate_columns[i]; final_aggregate_column = final_aggregate_column->cloneResized(places.size()); columns_data.emplace_back(getColumnData(final_aggregate_column.get())); } @@ -1782,7 +1600,6 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( auto insert_aggregates_into_columns_function = compiled_functions.insert_aggregates_into_columns_function; insert_aggregates_into_columns_function(0, places.size(), columns_data.data(), places.data()); } -#endif for (; aggregate_functions_destroy_index < params.aggregates_size;) { @@ -1795,7 +1612,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( } } - auto & final_aggregate_column = final_aggregate_columns[aggregate_functions_destroy_index]; + auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index]; size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; /** We increase aggregate_functions_destroy_index because by function contract if insertResultIntoBatch @@ -1877,137 +1694,126 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( if (exception) std::rethrow_exception(exception); + + return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size()); } -template -void NO_INLINE Aggregator::convertToBlockImplNotFinal( - Method & method, - Table & data, - std::vector key_columns, - AggregateColumnsData & aggregate_columns) const +template +Block NO_INLINE Aggregator::convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states) const { + constexpr bool final = true; + auto out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, rows); + if constexpr (Method::low_cardinality_optimization) { if (data.hasNullKeyData()) { - key_columns[0]->insertDefault(); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); - - data.getNullKeyData() = nullptr; + assert(type == AggregateStateType::Normal); + out_cols.key_columns[0]->insertDefault(); + insertAggregatesIntoColumns(data.getNullKeyData(), out_cols.final_aggregate_columns, arena, clear_states); } } - auto shuffled_key_sizes = method.shuffleKeyColumns(key_columns, key_sizes); - const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; + auto shuffled_key_sizes = method.shuffleKeyColumns(out_cols.raw_key_columns, key_sizes); + const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; + + PaddedPODArray places; + places.reserve(rows); + + constexpr bool only_updated = (type == AggregateStateType::OnlyUpdated); + constexpr bool only_retracted = (type == AggregateStateType::OnlyRetracted); data.forEachValue([&](const auto & key, auto & mapped) { - method.insertKeyIntoColumns(key, key_columns, key_sizes_ref); + if constexpr (only_updated) + { + if (!UpdatedDataEx::isUpdated(mapped)) + return; - /// reserved, so push_back does not throw exceptions - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(mapped + offsets_of_aggregate_states[i]); + /// Finalized it for current coverting + UpdatedDataEx::resetUpdated(mapped); + } + else if constexpr (only_retracted) + { + if (!RetractedDataEx::hasRetracted(mapped)) + return; + } - /// proton: starts. For streaming aggr, we hold on to the states - /// Since it is not final, we shall never clear the state - /// if (!params.keep_state) - /// mapped = nullptr; - /// proton: ends. - }); -} + auto & place = RetractedDataEx::getData(mapped); + /// For UDA with own emit strategy, there are two special cases to be handled: + /// 1. not all groups need to be emitted. therefore proton needs to pick groups + /// that should emits, and only emit those groups while keep other groups unchanged. + /// 2. a single block trigger multiple emits. In this case, proton need insert the + /// same key multiple times for each emit result of this group. -template -Block Aggregator::prepareBlockAndFill( - AggregatedDataVariants & data_variants, - bool final, - bool clear_states, - size_t rows, - Filler && filler) const -{ - MutableColumns key_columns(params.keys_size); - MutableColumns aggregate_columns(params.aggregates_size); - MutableColumns final_aggregate_columns(params.aggregates_size); - AggregateColumnsData aggregate_columns_data(params.aggregates_size); - - Block header = getHeader(final); - - for (size_t i = 0; i < params.keys_size; ++i) - { - key_columns[i] = header.safeGetByPosition(i).type->createColumn(); - key_columns[i]->reserve(rows); - } + /// for non-UDA or UDA without emit strategy, 'should_emit' is always true. + /// For UDA with emit strategy, it is true only if the group should emit. + size_t emit_times = 1; + if (params.group_by == Params::GroupBy::USER_DEFINED) + { + assert(aggregate_functions.size() == 1); + emit_times = aggregate_functions[0]->getEmitTimes(place + offsets_of_aggregate_states[0]); + } - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!final) + if (emit_times > 0) { - const auto & aggregate_column_name = params.aggregates[i].column_name; - aggregate_columns[i] = header.getByName(aggregate_column_name).type->createColumn(); + /// duplicate key for each emit + for (size_t i = 0; i < emit_times; i++) + method.insertKeyIntoColumns(key, out_cols.raw_key_columns, key_sizes_ref); - /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + places.emplace_back(place); - /// proton: starts - column_aggregate_func.setKeepState(params.keep_state); - /// proton: ends + /// Mark the cell as destroyed so it will not be destroyed in destructor. + /// proton: starts. Here we push the `place` to `places`, for streaming + /// case, we don't want aggregate function to destroy the places + if (clear_states) + place = nullptr; + } + }); - /// Add arenas to ColumnAggregateFunction, which can result in moving ownership to it if reference count - /// get dropped in other places - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func.addArena(pool); + return insertResultsIntoColumns(places, std::move(out_cols), arena, clear_states); +} - aggregate_columns_data[i] = &column_aggregate_func.getData(); - aggregate_columns_data[i]->reserve(rows); - } - else +template +Block NO_INLINE Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t rows) const +{ + constexpr bool final = false; + auto out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, rows); + + if constexpr (Method::low_cardinality_optimization) + { + if (data.hasNullKeyData()) { - final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); - final_aggregate_columns[i]->reserve(rows); + out_cols.raw_key_columns[0]->insertDefault(); - if (aggregate_functions[i]->isState()) - { - /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - if (auto * column_aggregate_func = typeid_cast(final_aggregate_columns[i].get())) - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func->addArena(pool); + for (size_t i = 0; i < params.aggregates_size; ++i) + out_cols.aggregate_columns_data[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); - /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. - final_aggregate_columns[i]->forEachSubcolumn([&data_variants](IColumn::WrappedPtr & subcolumn) - { - if (auto * column_aggregate_func = typeid_cast(subcolumn.get())) - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func->addArena(pool); - }); - } + data.getNullKeyData() = nullptr; } } - filler(key_columns, aggregate_columns_data, final_aggregate_columns, final, clear_states); - - Block res = header.cloneEmpty(); - - for (size_t i = 0; i < params.keys_size; ++i) - res.getByPosition(i).column = std::move(key_columns[i]); + auto shuffled_key_sizes = method.shuffleKeyColumns(out_cols.raw_key_columns, key_sizes); + const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; - for (size_t i = 0; i < params.aggregates_size; ++i) + data.forEachValue([&](const auto & key, auto & mapped) { - const auto & aggregate_column_name = params.aggregates[i].column_name; - if (final) - res.getByName(aggregate_column_name).column = std::move(final_aggregate_columns[i]); - else - res.getByName(aggregate_column_name).column = std::move(aggregate_columns[i]); - } + method.insertKeyIntoColumns(key, out_cols.raw_key_columns, key_sizes_ref); - /// Change the size of the columns-constants in the block. - size_t columns = header.columns(); - for (size_t i = 0; i < columns; ++i) - if (isColumnConst(*res.getByPosition(i).column)) - res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows); + /// reserved, so push_back does not throw exceptions + for (size_t i = 0; i < params.aggregates_size; ++i) + out_cols.aggregate_columns_data[i]->push_back(mapped + offsets_of_aggregate_states[i]); - return res; + /// proton: starts. For streaming aggr, we hold on to the states + /// Since it is not final, we shall never clear the state + /// if (!params.keep_state) + /// mapped = nullptr; + /// proton: ends. + }); + + return finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows); } void Aggregator::addSingleKeyToAggregateColumns( @@ -2034,71 +1840,50 @@ void Aggregator::addArenasToAggregateColumns( } } -void Aggregator::createStatesAndFillKeyColumnsWithSingleKey( - AggregatedDataVariants & data_variants, - Columns & key_columns, - size_t key_row, - MutableColumns & final_key_columns) const -{ - AggregateDataPtr place = data_variants.aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(place); - data_variants.without_key = place; - - for (size_t i = 0; i < params.keys_size; ++i) - { - final_key_columns[i]->insertFrom(*key_columns[i].get(), key_row); - } -} - -Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states) const +Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states, AggregateStateType type) const { - /// proton: starts. - if (!data_variants.without_key) - { - data_variants.invalidate(); - return {}; - } - /// proton: ends. - + auto res_header = getHeader(final); size_t rows = 1; + auto && out_cols = prepareOutputBlockColumns(params, aggregate_functions, res_header, data_variants.aggregates_pools, final, rows); + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; - auto filler = [&data_variants, this]( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_, - bool clear_states_) - { - if (data_variants.type == AggregatedDataVariants::Type::without_key || params.overflow_row) - { - AggregatedDataWithoutKey & data = data_variants.without_key; + /// TODO: support overflow row ? + assert(!is_overflows); + assert(!params.overflow_row); + assert(data_variants.type == AggregatedDataVariants::Type::without_key); - if (!data) - throw Exception("Wrong data variant passed.", ErrorCodes::LOGICAL_ERROR); + if ((type == AggregateStateType::OnlyUpdated && !UpdatedDataEx::isUpdated(data_variants.without_key)) + || (type == AggregateStateType::OnlyRetracted && !RetractedDataEx::hasRetracted(data_variants.without_key))) + return res_header.cloneEmpty(); - if (!final_) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]); + AggregatedDataWithoutKey & data = [&]() -> AggregateDataPtr & { + if (type == AggregateStateType::OnlyUpdated) + { + UpdatedDataEx::resetUpdated( data_variants.without_key); + return data_variants.without_key; + } + else if (type == AggregateStateType::OnlyRetracted) + return RetractedDataEx::getRetracted(data_variants.without_key); + else + return data_variants.without_key; + }(); - /// proton: starts - if (clear_states_) - data = nullptr; - /// proton: ends - } - else - { - /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. - insertAggregatesIntoColumns(data, final_aggregate_columns, data_variants.aggregates_pool); - } + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong data variant passed."); - if (params.overflow_row) - for (size_t i = 0; i < params.keys_size; ++i) - key_columns[i]->insertDefault(); - } - }; + if (!final) + { + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_columns_data[i]->push_back(data + offsets_of_aggregate_states[i]); + data = nullptr; + } + else + { + /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. + insertAggregatesIntoColumns(data, final_aggregate_columns, getArena(data_variants, type), clear_states); + } - Block block = prepareBlockAndFill(data_variants, final, clear_states, rows, filler); + Block block = finalizeBlock(params, res_header, std::move(out_cols), final, rows); if (is_overflows) block.info.is_overflows = true; @@ -2106,143 +1891,65 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va return block; } -Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states) const +Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, AggregateStateType type) const { - size_t rows = data_variants.sizeWithoutOverflowRow(); - - auto filler = [&data_variants, this]( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_, - bool clear_states_) - { - #define M(NAME) \ - else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ - convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ - key_columns, aggregate_columns, final_aggregate_columns, data_variants.aggregates_pool, final_, clear_states_); - - if (false) {} // NOLINT - APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) - #undef M - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - }; + const size_t rows = data_variants.sizeWithoutOverflowRow(); +#define M(NAME) \ + else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ + return convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, getArena(data_variants, type), data_variants.aggregates_pools, final, rows, clear_states, type); - return prepareBlockAndFill(data_variants, final, clear_states, rows, filler); + if (false) {} // NOLINT + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) +#undef M + else throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } - -BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, size_t max_threads, bool clear_states) const +BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, AggregateStateType type) const { + /// TODO Make a custom threshold. + /// TODO Use the shared thread pool with the `merge` function. std::unique_ptr thread_pool; - if (max_threads > 1 && data_variants.sizeWithoutOverflowRow() > 100000 /// TODO Make a custom threshold. - && data_variants.isStaticBucketTwoLevel()) /// TODO Use the shared thread pool with the `merge` function. + if (max_threads > 1 && data_variants.sizeWithoutOverflowRow() > 100000 + && final && type == AggregateStateType::Normal) /// use single thread for non-final or retracted data or updated data thread_pool = std::make_unique(max_threads); + if (false) {} // NOLINT #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ - return prepareBlocksAndFillTwoLevelImpl(data_variants, *data_variants.NAME, final, clear_states, thread_pool.get()); + return prepareBlocksAndFillTwoLevelImpl(data_variants, *data_variants.NAME, final, clear_states, thread_pool.get(), type); - if (false) {} // NOLINT APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) #undef M else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } - template BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( AggregatedDataVariants & data_variants, Method & method, bool final, bool clear_states, - ThreadPool * thread_pool) const + ThreadPool * thread_pool, + AggregateStateType type) const { - size_t max_threads = thread_pool ? thread_pool->getMaxThreads() : 1; - /// proton FIXME : separate final vs non-final converting. For non-final converting, we don't need - /// each arena for each thread. - for (size_t i = data_variants.aggregates_pools.size(); i < max_threads; ++i) - data_variants.aggregates_pools.push_back(std::make_shared()); - - auto buckets = method.data.buckets(); - std::atomic next_bucket_idx_to_merge = 0; - - auto converter = [&](size_t thread_id, ThreadGroupStatusPtr thread_group) - { - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - ); - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - - BlocksList blocks; - while (true) - { - UInt32 bucket_idx = next_bucket_idx_to_merge.fetch_add(1); - - if (bucket_idx >= buckets.size()) - break; - - auto bucket = buckets[bucket_idx]; - if (method.data.impls[bucket].empty()) - continue; - - /// Select Arena to avoid race conditions - Arena * arena = data_variants.aggregates_pools.at(thread_id).get(); - blocks.emplace_back(convertOneBucketToBlockImpl(data_variants, method, arena, final, clear_states, bucket)); - } - return blocks; - }; - - /// packaged_task is used to ensure that exceptions are automatically thrown into the main stream. - - std::vector> tasks(max_threads); - - try - { - for (size_t thread_id = 0; thread_id < max_threads; ++thread_id) - { - tasks[thread_id] = std::packaged_task( - [group = CurrentThread::getGroup(), thread_id, &converter] { return converter(thread_id, group); }); - - if (thread_pool) - thread_pool->scheduleOrThrowOnError([thread_id, &tasks] { tasks[thread_id](); }); - else - tasks[thread_id](); - } - } - catch (...) - { - /// If this is not done, then in case of an exception, tasks will be destroyed before the threads are completed, and it will be bad. - if (thread_pool) - thread_pool->wait(); - - throw; - } - - if (thread_pool) - thread_pool->wait(); - - BlocksList blocks; - - for (auto & task : tasks) - { - if (!task.valid()) - continue; - - blocks.splice(blocks.end(), task.get_future().get()); - } - - return blocks; + return concurrentBucketConvert( + thread_pool, + method.data.buckets(), + getArena(data_variants, type), + data_variants.aggregates_pools, + [&](Int64 bucket, Arena * arena) -> BlocksList { + /// Skip no changed bucket if only updated is requested + if (type == AggregateStateType::OnlyUpdated && !method.data.isUpdatedBucket(bucket)) + return {}; + + return {convertOneBucketToBlockImpl(data_variants, method, arena, final, clear_states, bucket, type)}; + }); } - -BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, ConvertAction action, size_t max_threads) const +BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const { - LOG_TRACE(log, "Converting aggregated data to blocks"); + LOG_DEBUG(log, "Converting aggregated data to blocks"); Stopwatch watch; @@ -2252,29 +1959,19 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b if (data_variants.empty()) return blocks; - bool clear_states = shouldClearStates(action, final); - - if (data_variants.without_key) - /// When without_key is setup, it doesn't necessary mean no GROUP BY keys, it may be overflow - blocks.emplace_back(prepareBlockAndFillWithoutKey( - data_variants, final, data_variants.type != AggregatedDataVariants::Type::without_key, clear_states)); + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - if (data_variants.type != AggregatedDataVariants::Type::without_key) - { - if (data_variants.isTwoLevel()) - blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, max_threads, clear_states)); - else - blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states)); - } + if (data_variants.type == AggregatedDataVariants::Type::without_key) + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states)); + else if (!data_variants.isTwoLevel()) + blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states)); + else + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, max_threads)); /// proton: starts. if (clear_states) - { - /// `data_variants` will not destroy the states of aggregate functions in the destructor, - /// since already cleared up in `prepareBlocksAndFill...()` - data_variants.aggregator = nullptr; - clearDataVariants(data_variants); - } + data_variants.reset(); /// proton: ends size_t rows = 0; @@ -2287,7 +1984,7 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b } double elapsed_seconds = watch.elapsedSeconds(); - LOG_INFO(log, + LOG_DEBUG(log, "Converted aggregated data to blocks. {} rows, {} in {} sec. ({:.3f} rows/sec., {}/sec.)", rows, ReadableSize(bytes), elapsed_seconds, rows / elapsed_seconds, @@ -2296,7 +1993,6 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b return blocks; } - template void NO_INLINE Aggregator::mergeDataNullKey( Table & table_dst, @@ -2352,7 +2048,6 @@ void NO_INLINE Aggregator::mergeDataImpl( auto func = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted) { - /// proton: starts if (inserted) { /// If there are multiple sources, there are more than one AggregatedDataVariant. Aggregator always creates a new AggregatedDataVariant and merge all other @@ -2360,72 +2055,12 @@ void NO_INLINE Aggregator::mergeDataImpl( /// If it does not alloc new memory for the 'dst' (i.e. aggregate state of the new AggregatedDataVariant which get destroyed after finalize()) but reuse /// that from the 'src' to store the final aggregated result, it will cause the data from other AggregatedDataVariant will be merged multiple times and /// generate incorrect aggregated result. - dst = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_aggregate_functions.create_aggregate_states_function(dst); - if (compiled_aggregate_functions.functions_count != aggregate_functions.size()) - { - static constexpr bool skip_compiled_aggregate_functions = true; - createAggregateStates(dst); - } - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = dst + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif - } - else -#endif - { - createAggregateStates(dst); - } - } - /// proton: ends - -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_functions.merge_aggregate_states_function(dst, src); - - if (compiled_aggregate_functions_holder->compiled_aggregate_functions.functions_count != params.aggregates_size) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - } - -// for (size_t i = 0; i < params.aggregates_size; ++i) -// { -// /// proton: starts -// if (!is_aggregate_function_compiled[i] && !params.streaming) -// aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]); -// /// proton: ends -// } - } - } - else -#endif - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); + auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + dst = aggregate_data; } - if (clear_states) - destroyAggregateStates(src); + mergeAggregateStates(dst, src, arena, clear_states); }; if constexpr (std::is_same_v) @@ -2433,152 +2068,65 @@ void NO_INLINE Aggregator::mergeDataImpl( else table_src.mergeToViaEmplace(table_dst, func, std::move(key_handler)); + /// In order to release memory early. if (clear_states) table_src.clearAndShrink(); /// proton: ends } - -template -void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( - Table & table_dst, - AggregatedDataWithoutKey & overflows, - Table & table_src, - Arena * arena, - bool clear_states) const +void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const { - /// Note : will create data for NULL key if not exist - if constexpr (Method::low_cardinality_optimization) - mergeDataNullKey(table_dst, table_src, arena, clear_states); + AggregatedDataVariantsPtr & res = non_empty_data[0]; - table_src.mergeToViaFind(table_dst, [&](AggregateDataPtr dst, AggregateDataPtr & src, bool found) + /// We merge all aggregation results to the first. + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - AggregateDataPtr res_data = found ? dst : overflows; - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge( - res_data + offsets_of_aggregate_states[i], - src + offsets_of_aggregate_states[i], - arena); + AggregatedDataVariants & current = *non_empty_data[result_num]; + mergeAggregateStates(res->without_key, current.without_key, res->aggregates_pool, clear_states); - /// proton : starts + /// In order to release memory early. if (clear_states) - destroyAggregateStates(src); - /// proton : ends - }); - - if (clear_states) - table_src.clearAndShrink(); + current.reset(); + } } -template -void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( - Table & table_dst, - Table & table_src, - Arena * arena, - bool clear_states) const +template +void NO_INLINE Aggregator::mergeSingleLevelDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const { - /// Note : will create data for NULL key if not exist - if constexpr (Method::low_cardinality_optimization) - mergeDataNullKey(table_dst, table_src, arena, clear_states); + AggregatedDataVariantsPtr & res = non_empty_data[0]; + bool no_more_keys = false; - table_src.mergeToViaFind(table_dst, - [&](AggregateDataPtr dst, AggregateDataPtr & src, bool found) + /// We merge all aggregation results to the first. + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!found) - return; - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge( - dst + offsets_of_aggregate_states[i], - src + offsets_of_aggregate_states[i], - arena); - - /// proton : starts - if (clear_states) - destroyAggregateStates(src); - }); - - if (clear_states) - table_src.clearAndShrink(); - /// proton : ends -} - - -void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const -{ - AggregatedDataVariantsPtr & res = non_empty_data[0]; - - /// We merge all aggregation results to the first. - for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) - { - /// proton: starts. - mergeAggregateStates(res->without_key, non_empty_data[result_num]->without_key, res->aggregates_pool, clear_states); - /// proton: ends. - } -} - - -template -void NO_INLINE Aggregator::mergeSingleLevelDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const -{ - AggregatedDataVariantsPtr & res = non_empty_data[0]; - bool no_more_keys = false; - - /// We merge all aggregation results to the first. - for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) - { - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) - break; + if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) + break; AggregatedDataVariants & current = *non_empty_data[result_num]; - if (!no_more_keys) - { + assert(!no_more_keys); #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - mergeDataImpl( - getDataVariant(*res).data, - getDataVariant(current).data, - res->aggregates_pool, - clear_states); - } - else -#endif - { - mergeDataImpl( - getDataVariant(*res).data, - getDataVariant(current).data, - res->aggregates_pool, - clear_states); - } - } - else if (res->without_key) + if (compiled_aggregate_functions_holder) { - mergeDataNoMoreKeysImpl( + mergeDataImpl( getDataVariant(*res).data, - res->without_key, getDataVariant(current).data, res->aggregates_pool, clear_states); } else + #endif { - mergeDataOnlyExistingKeysImpl( + mergeDataImpl( getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, clear_states); } + /// In order to release memory early. if (clear_states) - { - /// `current` will not destroy the states of aggregate functions in the destructor, - /// since already cleared up in `mergeData...Impl()` - current.aggregator = nullptr; - clearDataVariants(current); - } + current.reset(); } } @@ -2588,9 +2136,98 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(ManyAggregatedDataVariants & APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) #undef M + +BlocksList +Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const +{ + auto prepared_data_ptr = prepareVariantsToMerge(data_variants); + if (prepared_data_ptr->empty()) + return {}; + + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); + + BlocksList blocks; + auto & first = *prepared_data_ptr->at(0); + if (first.type == AggregatedDataVariants::Type::without_key) + { + mergeWithoutKeyDataImpl(*prepared_data_ptr, clear_states); + blocks.emplace_back(prepareBlockAndFillWithoutKey(first, final, false, clear_states)); + } + else if (!first.isTwoLevel()) + { + if (false) { } // NOLINT +#define M(NAME) \ + else if (first.type == AggregatedDataVariants::Type::NAME) \ + mergeSingleLevelDataImpl(*prepared_data_ptr, clear_states); + + APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) +#undef M + else throw Exception("Unknown single level aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + blocks.emplace_back(prepareBlockAndFillSingleLevel(first, final, clear_states)); + } + else + { + auto total_size = std::accumulate(prepared_data_ptr->begin(), prepared_data_ptr->end(), 0ull, [](size_t size, const auto & variants) { + return size + variants->sizeWithoutOverflowRow(); + }); + /// TODO Make a custom threshold. + /// TODO Use the shared thread pool with the `merge` function. + std::unique_ptr thread_pool; + if (max_threads > 1 && total_size > 100000 && final) + thread_pool = std::make_unique(max_threads); + + if (false) { } // NOLINT +#define M(NAME) \ + else if (first.type == AggregatedDataVariants::Type::NAME) \ + blocks = mergeAndConvertTwoLevelToBlocksImpl(*prepared_data_ptr, final, clear_states, thread_pool.get()); + + APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + } + + if (clear_states) + { + for (auto & variants : *prepared_data_ptr) + variants->reset(); + } + + return blocks; +} + +template +BlocksList Aggregator::mergeAndConvertTwoLevelToBlocksImpl( + ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states, ThreadPool * thread_pool) const +{ + auto & first = *non_empty_data.at(0); + + std::vector buckets; + if (first.isStaticBucketTwoLevel()) + buckets = getDataVariant(first).data.buckets(); + else + { + assert(first.isTimeBucketTwoLevel()); + std::unordered_set buckets_set; + for (auto & data_variants : non_empty_data) + { + auto tmp_buckets = getDataVariant(*data_variants).data.buckets(); + buckets_set.insert(tmp_buckets.begin(), tmp_buckets.end()); + } + buckets.assign(buckets_set.begin(), buckets_set.end()); + } + + return concurrentBucketConvert( + thread_pool, buckets, first.aggregates_pool, first.aggregates_pools, [&](Int64 bucket, Arena * arena) -> BlocksList { + mergeBucketImpl(non_empty_data, bucket, arena, clear_states); + return {convertOneBucketToBlockImpl(first, getDataVariant(first), arena, final, clear_states, bucket)}; + }); +} + template void NO_INLINE Aggregator::mergeBucketImpl( - ManyAggregatedDataVariants & data, bool final, bool clear_states, Int64 bucket, Arena * arena, std::atomic * is_cancelled) const + ManyAggregatedDataVariants & data, Int64 bucket, Arena * arena, bool clear_states, std::atomic * is_cancelled) const { /// We merge all aggregation results to the first. AggregatedDataVariantsPtr & res = data[0]; @@ -2618,6 +2255,9 @@ void NO_INLINE Aggregator::mergeBucketImpl( arena, clear_states); } + + /// Assume the current bucket has been finalized. + getDataVariant(current).data.resetUpdated(bucket); } } @@ -2731,6 +2371,7 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( auto emplace_result = state.emplaceKey(data, i, *aggregates_pool); if (emplace_result.isInserted()) { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. emplace_result.setMapped(nullptr); aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); @@ -2974,7 +2615,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari } -Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, ConvertAction action) +Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, bool clear_states, bool only_updated) { if (blocks.empty()) return {}; @@ -3036,7 +2677,6 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, ConvertAction act throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } - bool clear_states = shouldClearStates(action, final); Block block; if (result.type == AggregatedDataVariants::Type::without_key || is_overflows) block = prepareBlockAndFillWithoutKey(result, final, is_overflows, clear_states); @@ -3321,33 +2961,40 @@ std::vector Aggregator::bucketsBefore(const AggregatedDataVariants & resu /// 1) The keys can reside in hashmap or in arena /// 2) The state can reside in arena or in the aggregation function /// And there is a special one which is group without key -void Aggregator::checkpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb) +void Aggregator::checkpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const { + auto version = getVersion(); /// Serialization layout /// [version] + [states layout] - VersionType version = getVersion(); writeIntBinary(version, wb); if (version <= 1) - return doCheckpointLegacy(data_variants, wb); + return const_cast(this)->doCheckpointLegacy(data_variants, wb); - return doCheckpoint(data_variants, wb); + if (version <= 2) + return doCheckpointV2(data_variants, wb); + else + return doCheckpointV3(data_variants, wb); } -void Aggregator::recover(AggregatedDataVariants & data_variants, ReadBuffer & rb) +void Aggregator::recover(AggregatedDataVariants & data_variants, ReadBuffer & rb) const { /// Serialization layout /// [version] + [states layout] VersionType recovered_version = 0; readIntBinary(recovered_version, rb); + assert(recovered_version <= getVersion()); + /// So far, no broken changes from `recovered_version` to `version`. /// FIXME: Legacy layout needs to be cleaned after no use if (recovered_version <= 1) - return doRecoverLegacy(data_variants, rb); + return const_cast(this)->doRecoverLegacy(data_variants, rb); - /// Recover STATE V2 - return doRecover(data_variants, rb); + if (recovered_version <= 2) + return doRecoverV2(data_variants, rb); + else + return doRecoverV3(data_variants, rb); } void Aggregator::doCheckpointLegacy(const AggregatedDataVariants & data_variants, WriteBuffer & wb) @@ -3370,7 +3017,7 @@ void Aggregator::doCheckpointLegacy(const AggregatedDataVariants & data_variants /// FIXME, set a good max_threads /// For ConvertAction::Checkpoint, don't clear state `data_variants` - auto blocks = convertToBlocks(const_cast(data_variants), false, ConvertAction::Checkpoint, 8); + auto blocks = convertToBlocks(const_cast(data_variants), false, false, 8); /// assert(!blocks.empty()); @@ -3614,7 +3261,7 @@ void Aggregator::recoverStatesTwoLevel(AggregatedDataVariants & data_variants, B /// The complexity of checkpoint the state of Aggregator is a combination of the following 2 cases /// 1) without key states (without_key or overflow rows) /// 2) hash table states -void Aggregator::doCheckpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb) +void Aggregator::doCheckpointV2(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const { /// Serialization layout, there are 2 cases: /// 1) Without key: [uint8][uint16][aggr-func-state-without-key] @@ -3652,7 +3299,7 @@ void Aggregator::doCheckpoint(const AggregatedDataVariants & data_variants, Writ else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } -void Aggregator::doRecover(AggregatedDataVariants & data_variants, ReadBuffer & rb) +void Aggregator::doRecoverV2(AggregatedDataVariants & data_variants, ReadBuffer & rb) const { UInt8 inited = 0; readIntBinary(inited, rb); @@ -3687,61 +3334,24 @@ void Aggregator::doRecover(AggregatedDataVariants & data_variants, ReadBuffer & if (is_two_level && !data_variants.isTwoLevel()) data_variants.convertToTwoLevel(); - bool use_string_hash_map = data_variants.type == AggregatedDataVariants::Type::key_string - || data_variants.type == AggregatedDataVariants::Type::key_string_two_level - || data_variants.type == AggregatedDataVariants::Type::key_fixed_string - || data_variants.type == AggregatedDataVariants::Type::key_fixed_string_two_level; - /// [aggr-func-state-in-hash-map] if (false) { } // NOLINT #define M(NAME, IS_TWO_LEVEL) \ - else if (data_variants.type == AggregatedDataVariants::Type::NAME) { \ - if (use_string_hash_map) \ - DB::deserializeHashMap(data_variants.NAME->data, [this](auto & mapped, Arena & pool, ReadBuffer & rb_) { deserializeAggregateStates(mapped, rb_, &pool); }, *data_variants.aggregates_pool, rb); \ - else \ - DB::deserializeHashMap(data_variants.NAME->data, [this](auto & mapped, Arena & pool, ReadBuffer & rb_) { deserializeAggregateStates(mapped, rb_, &pool); }, *data_variants.aggregates_pool, rb); \ - } + else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ + DB::deserializeHashMap(data_variants.NAME->data, [this](auto & mapped, Arena & pool, ReadBuffer & rb_) { deserializeAggregateStates(mapped, rb_, &pool); }, *data_variants.aggregates_pool, rb); APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) #undef M else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } -bool Aggregator::shouldClearStates(ConvertAction action, bool final_) const -{ - /// For streaming processing, data_variants.aggregator will never be nullptr once set - /// and we will never move the ownership of the states to `ColumnAggregateFunction` - /// unless we don't need keep the states - - switch (action) - { - case ConvertAction::DistributedMerge: - /// Distributed processing case. Only clear states on initiator - return final_; - case ConvertAction::WriteToTmpFS: - /// We are dumping all states to file system in case of memory is not efficient - /// In this case, we should not keep the states - return true; - case ConvertAction::Checkpoint: - /// Checkpoint is snapshot of in-memory states, we shall not clear the states - return false; - case ConvertAction::InternalMerge: - return false; - case ConvertAction::RetractedEmit: - return true; - case ConvertAction::StreamingEmit: - [[fallthrough]]; - default: - /// By default, streaming processing needs hold on to the states - return !params.keep_state; - } -} - VersionType Aggregator::getVersionFromRevision(UInt64 revision) const { - if (revision >= STATE_V2_MIN_REVISION) + if (revision >= STATE_V3_MIN_REVISION) + return static_cast(3); + else if (revision >= STATE_V2_MIN_REVISION) return static_cast(2); else throw Exception( @@ -3757,10 +3367,9 @@ template void NO_INLINE Aggregator::spliceBucketsImpl( AggregatedDataVariants & data_dest, AggregatedDataVariants & data_src, - bool final, - bool clear_states, const std::vector & gcd_buckets, - Arena * arena) const + Arena * arena, + bool clear_states) const { /// In order to merge state with same other keys of different gcd buckets, reset the window group keys to zero /// create a new key, where the window key part is 0, and the other key parts are the same as the original value. @@ -3809,22 +3418,24 @@ void NO_INLINE Aggregator::spliceBucketsImpl( } Block Aggregator::spliceAndConvertBucketsToBlock( - AggregatedDataVariants & variants, bool final, ConvertAction action, const std::vector & gcd_buckets) const + AggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const { - AggregatedDataVariants result_variants; - result_variants.aggregator = this; - initDataVariants(result_variants, method_chosen, key_sizes, params); - initStatesForWithoutKeyOrOverflow(result_variants); - - auto method = result_variants.type; - Arena * arena = result_variants.aggregates_pool; - bool clear_states = shouldClearStates(action, final); + assert(variants.isTimeBucketTwoLevel()); + if (false) {} // NOLINT #define M(NAME) \ - else if (method == AggregatedDataVariants::Type::NAME) \ + else if (variants.type == AggregatedDataVariants::Type::NAME) \ { \ - spliceBucketsImpl(result_variants, variants, final, clear_states, gcd_buckets, arena); \ - return convertOneBucketToBlockImpl(result_variants, *result_variants.NAME, arena, final, clear_states, 0); \ + if (gcd_buckets.size() > 1) \ + { \ + AggregatedDataVariants result_variants; \ + result_variants.aggregator = this; \ + initDataVariants(result_variants, method_chosen, key_sizes, params); \ + spliceBucketsImpl(result_variants, variants, gcd_buckets, result_variants.aggregates_pool, clear_states); \ + return convertOneBucketToBlockImpl(result_variants, *result_variants.NAME, result_variants.aggregates_pool, final, /*clear_states*/ true, 0); \ + } \ + else \ + return convertOneBucketToBlockImpl(variants, *variants.NAME, variants.aggregates_pool, final, clear_states, gcd_buckets[0]); \ } APPLY_FOR_VARIANTS_TIME_BUCKET_TWO_LEVEL(M) @@ -3836,30 +3447,31 @@ Block Aggregator::spliceAndConvertBucketsToBlock( } Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock( - ManyAggregatedDataVariants & variants, bool final, ConvertAction action, const std::vector & gcd_buckets) const + ManyAggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const { - auto prepared_data = prepareVariantsToMerge(variants); + bool need_splice = gcd_buckets.size() > 1; + auto prepared_data = prepareVariantsToMerge(variants, /*always_merge_into_empty*/ need_splice); if (prepared_data->empty()) return {}; - AggregatedDataVariants result_variants; - result_variants.aggregator = this; - initDataVariants(result_variants, method_chosen, key_sizes, params); - initStatesForWithoutKeyOrOverflow(result_variants); - - auto method = result_variants.type; - Arena * arena = result_variants.aggregates_pool; - bool clear_states = shouldClearStates(action, final); + auto & first = *prepared_data->at(0); + assert(first.isTimeBucketTwoLevel()); + Arena * arena = first.aggregates_pool; if (false) {} // NOLINT #define M(NAME) \ - else if (method == AggregatedDataVariants::Type::NAME) \ + else if (first.type == AggregatedDataVariants::Type::NAME) \ { \ - using Method = decltype(result_variants.NAME)::element_type; \ + using Method = decltype(first.NAME)::element_type; \ for (auto bucket : gcd_buckets) \ - mergeBucketImpl(*prepared_data, final, clear_states, bucket, arena); \ - spliceBucketsImpl(result_variants, *prepared_data->at(0), final, clear_states, gcd_buckets, arena); \ - return convertOneBucketToBlockImpl(result_variants, *result_variants.NAME, arena, final, clear_states, 0); \ + mergeBucketImpl(*prepared_data, bucket, arena, clear_states); \ + if (need_splice) \ + { \ + spliceBucketsImpl(first, first, gcd_buckets, arena, /*clear_states*/ true); \ + return convertOneBucketToBlockImpl(first, *first.NAME, arena, final, /*clear_states*/ true, 0); \ + } \ + else \ + return convertOneBucketToBlockImpl(first, *first.NAME, arena, final, clear_states, gcd_buckets[0]); \ } APPLY_FOR_VARIANTS_TIME_BUCKET_TWO_LEVEL(M) @@ -3870,105 +3482,320 @@ Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock( UNREACHABLE(); } -template -bool Aggregator::executeAndRetractImpl( - Method & method, - Arena * aggregates_pool, - Method & retracted_method, - Arena * retracted_pool, - size_t row_begin, - size_t row_end, - ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions) const +void Aggregator::mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const { - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - typename Method::State retracted_state(key_columns, key_sizes, nullptr); + assert(src); + assert(dst); - /// Optimization for special case when there are no aggregate functions. - if (params.aggregates_size == 0) + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); + + if (clear_states) + destroyAggregateStates(src); +} + +void Aggregator::destroyAggregateStates(AggregateDataPtr & place) const +{ + if (place) { - if (params.delta_col_pos >= 0) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changelog aggregating must have aggregate functions"); + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->destroy(place + offsets_of_aggregate_states[i]); - /// For all rows. - AggregateDataPtr place = aggregates_pool->alloc(0); - for (size_t i = row_begin; i < row_end; ++i) - { - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - if (emplace_result.isInserted()) - { - emplace_result.setMapped(place); - /// Only add new key - retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool).setMapped(place); - } - } - return false; + place = nullptr; } +} - bool need_finalization = false; - - /// NOTE: only row_end-row_start is required, but: - /// - this affects only optimize_aggregation_in_order, - /// - this is just a pointer, so it should not be significant, - /// - and plus this will require other changes in the interface. - std::unique_ptr places(new AggregateDataPtr[row_end]); +void Aggregator::serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const +{ + UInt8 has_states = place ? 1 : 0; + writeIntBinary(has_states, wb); + if (has_states) + { + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->serialize(place + offsets_of_aggregate_states[i], wb); + } +} - /// For all rows. - for (size_t i = row_begin; i < row_end; ++i) +void Aggregator::deserializeAggregateStates(AggregateDataPtr & place, ReadBuffer & rb, Arena * arena) const +{ + UInt8 has_states; + readIntBinary(has_states, rb); + if (has_states) { - AggregateDataPtr aggregate_data = nullptr; + if (!place) + { + /// Allocate states for all aggregate functions + AggregateDataPtr aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + place = aggregate_data; + } - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->deserialize(place + offsets_of_aggregate_states[i], rb, std::nullopt, arena); + } +} - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); +void Aggregator::doCheckpointV3(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const +{ + /// Serialization layout, there are 2 cases: + /// 1) Without key: [uint8][uint16][aggr-func-state-without-key] + /// 2) Otherwise: [uint8][uint16][aggr-func-state-for-overflow-row][is_two_level][aggr-func-state-in-hash-map] + bool inited = !data_variants.empty(); + writeBoolText(inited, wb); + if (!inited) + return; /// No aggregated data yet - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - /// TODO: support use_compiled_functions - createAggregateStates(aggregate_data); - emplace_result.setMapped(aggregate_data); + writeIntBinary(static_cast(data_variants.type), wb); - /// Save new group without retracted state (used for emit new key group) - /// FIXME: There is a bug when use hash table (key8 or key16), it use a optimzed FixedImplicitZeroHashMap that the empty mapped directly means zero (i.e. invalid insertion). - /// But in retract group scenario, we need to use an empty mapped to represent no ratracted value for new group - /// Use a non-optimized FixedHashMap ? or revisit retract implementation ? - retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool).setMapped(nullptr); - } - else + writeIntBinary(static_cast(expanded_data_type), wb); + + auto state_serializer = [this](auto place, auto & wb_) { + assert(place); + if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) { - aggregate_data = emplace_result.getMapped(); + UpdatedDataEx::serialize(place, wb_); - /// Save changed group with retracted state (used for emit changed group) - auto retracted_result = retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool); - if (retracted_result.isInserted()) - { - retracted_result.setMapped(nullptr); - auto retracted_data = retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(retracted_data); - /// Copy aggregate data to retracted data before changed - mergeAggregateStates(retracted_data, aggregate_data, retracted_pool, /*clear_states*/ false); - retracted_result.setMapped(retracted_data); - } + auto & retracted_place = RetractedDataEx::getRetracted(place); + bool has_retracted = retracted_place != nullptr; + writeBoolText(has_retracted, wb_); + if (has_retracted) + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->serialize(retracted_place + offsets_of_aggregate_states[i], wb_); } + else if (expanded_data_type == ExpandedDataType::Updated) + UpdatedDataEx::serialize(place, wb_); - assert(aggregate_data != nullptr); - places[i] = aggregate_data; - } + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->serialize(place + offsets_of_aggregate_states[i], wb_); + }; - /// Add values to the aggregate functions. - for (size_t i = 0; i < aggregate_functions.size(); ++i) - { - AggregateFunctionInstruction * inst = aggregate_instructions + i; + /// [aggr-func-state-without-key] + assert(!params.overflow_row); + if (data_variants.type == AggregatedDataVariants::Type::without_key) + state_serializer(data_variants.without_key, wb); - if (inst->offsets) - inst->batch_that->addBatchArray(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool); - else - inst->batch_that->addBatch(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool, -1, inst->delta_column); + /// [aggr-func-state-in-hash-map] +#define M(NAME, IS_TWO_LEVEL) \ + else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ + { \ + if constexpr (IS_TWO_LEVEL) \ + DB::serializeTwoLevelHashMap(data_variants.NAME->data, [&](const auto & mapped, WriteBuffer & wb_) { state_serializer(mapped, wb_); }, wb); \ + else \ + DB::serializeHashMap(data_variants.NAME->data, [&](const auto & mapped, WriteBuffer & wb_) { state_serializer(mapped, wb_); }, wb); \ + } - if (inst->batch_that->isUserDefined()) + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); +} + +void Aggregator::doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer & rb) const +{ + bool inited = !data_variants.empty(); + readBoolText(inited, rb); + if (!inited) + return; + + UInt8 recovered_data_variants_type_uint8; + readIntBinary(recovered_data_variants_type_uint8, rb); + AggregatedDataVariants::Type recovered_data_variants_type = static_cast(recovered_data_variants_type_uint8); + + data_variants.aggregator = this; + initDataVariants(data_variants, method_chosen, key_sizes, params); + /// Data variants is inited with single level hashmap, however the checkpoint states are 2 levels + /// which means data variants was converted to two level + if (data_variants.type != recovered_data_variants_type) + if (data_variants.isConvertibleToTwoLevel()) + data_variants.convertToTwoLevel(); + + if (data_variants.type != recovered_data_variants_type) + throw Exception( + ErrorCodes::RECOVER_CHECKPOINT_FAILED, + "Failed to recover aggregation checkpoint. Aggregated data variant type is not compatible, checkpointed={}, current={}", + magic_enum::enum_name(recovered_data_variants_type), + magic_enum::enum_name(method_chosen)); + + UInt8 recovered_expanded_data_type_uint8; + readIntBinary(recovered_expanded_data_type_uint8, rb); + ExpandedDataType recovered_expanded_data_type = static_cast(recovered_expanded_data_type_uint8); + if (recovered_expanded_data_type != expanded_data_type) + throw Exception( + ErrorCodes::RECOVER_CHECKPOINT_FAILED, + "Failed to recover aggregation checkpoint. Expanded data type is not the same, checkpointed={}, current={}", + magic_enum::enum_name(recovered_expanded_data_type), + magic_enum::enum_name(expanded_data_type)); + + auto state_deserializer = [this](auto & place, auto & rb_, Arena * arena) { + place = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + place = aggregate_data; + + if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) + { + UpdatedDataEx::deserialize(place, rb_); + + auto & retracted = RetractedDataEx::getRetracted(place); + bool has_retracted = false; + readBoolText(has_retracted, rb_); + if (has_retracted) + { + auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(tmp_retracted); + retracted = tmp_retracted; + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->deserialize(retracted + offsets_of_aggregate_states[i], rb_, std::nullopt, arena); + } + } + else if (expanded_data_type == ExpandedDataType::Updated) + UpdatedDataEx::deserialize(place, rb_); + + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->deserialize(place + offsets_of_aggregate_states[i], rb_, std::nullopt, arena); + }; + + /// [aggr-func-state-without-key] + assert(!params.overflow_row); + if (data_variants.type == AggregatedDataVariants::Type::without_key) + state_deserializer(data_variants.without_key, rb, data_variants.aggregates_pool); + + /// [aggr-func-state-in-hash-map] +#define M(NAME, IS_TWO_LEVEL) \ + else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ + { \ + if constexpr (IS_TWO_LEVEL) \ + DB::deserializeTwoLevelHashMap(data_variants.NAME->data, [&](auto & mapped, Arena & pool, ReadBuffer & rb_) { state_deserializer(mapped, rb_, &pool); }, *data_variants.aggregates_pool, rb); \ + else \ + DB::deserializeHashMap(data_variants.NAME->data, [&](auto & mapped, Arena & pool, ReadBuffer & rb_) { state_deserializer(mapped, rb_, &pool); }, *data_variants.aggregates_pool, rb); \ + } + + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); +} + +bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const +{ + size_t result_size = result.sizeWithoutOverflowRow(); + Int64 current_memory_usage = 0; + if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = memory_tracker_child->getParent()) + current_memory_usage = memory_tracker->get(); + + /// Here all the results in the sum are taken into account, from different threads. + Int64 result_size_bytes = current_memory_usage - memory_usage_before_aggregation; + + bool worth_convert_to_two_level = worthConvertToTwoLevel( + params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes); + + /** Converting to a two-level data structure. + * It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel. + */ + if (result.isConvertibleToTwoLevel() && worth_convert_to_two_level) + result.convertToTwoLevel(); + + /// Checking the constraints. + if (!checkLimits(result_size, no_more_keys)) + return true; + + /** Flush data to disk if too much RAM is consumed. + * Data can only be flushed to disk if a two-level aggregation structure is used. + */ + if (params.max_bytes_before_external_group_by + && result.isTwoLevel() + && current_memory_usage > static_cast(params.max_bytes_before_external_group_by) + && worth_convert_to_two_level) + { + size_t size = current_memory_usage + params.min_free_disk_space; + + std::string tmp_path = params.tmp_volume->getDisk()->getPath(); + + // enoughSpaceInDirectory() is not enough to make it right, since + // another process (or another thread of aggregator) can consume all + // space. + // + // But true reservation (IVolume::reserve()) cannot be used here since + // current_memory_usage does not take compression into account and + // will reserve way more that actually will be used. + // + // Hence, let's do a simple check. + if (!enoughSpaceInDirectory(tmp_path, size)) + throw Exception("Not enough space for external aggregation in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); + + writeToTemporaryFile(result, tmp_path); + } + + return false; +} + +template +bool Aggregator::executeAndRetractImpl( + Method & method, + Arena * aggregates_pool, + Arena * retracted_pool, + size_t row_begin, + size_t row_end, + ColumnRawPtrs & key_columns, + AggregateFunctionInstruction * aggregate_instructions) const +{ + typename Method::State state(key_columns, key_sizes, aggregation_state_cache); + bool need_finalization = false; + + /// NOTE: only row_end-row_start is required, but: + /// - this affects only optimize_aggregation_in_order, + /// - this is just a pointer, so it should not be significant, + /// - and plus this will require other changes in the interface. + std::unique_ptr places(new AggregateDataPtr[row_end]); + + /// For all rows. + for (size_t i = row_begin; i < row_end; ++i) + { + AggregateDataPtr aggregate_data = nullptr; + + auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); + + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); + + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + /// TODO: support use_compiled_functions + createAggregateStates(aggregate_data); + emplace_result.setMapped(aggregate_data); + } + else + { + aggregate_data = emplace_result.getMapped(); + + /// Save changed group with retracted state (used for emit changed group) + /// If there are aggregate data and no retracted data, copy aggregate data to retracted data before changed + if (!UpdatedDataEx::isEmpty(aggregate_data) && !RetractedDataEx::hasRetracted(aggregate_data)) + { + auto & retracted = RetractedDataEx::getRetracted(aggregate_data); + auto tmp_retracted = retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(tmp_retracted); + retracted = tmp_retracted; + mergeAggregateStates(retracted, aggregate_data, retracted_pool, /*clear_states*/ false); + } + } + + assert(aggregate_data != nullptr); + places[i] = aggregate_data; + } + + /// Add values to the aggregate functions. + for (size_t i = 0; i < aggregate_functions.size(); ++i) + { + AggregateFunctionInstruction * inst = aggregate_instructions + i; + + if (inst->offsets) + inst->batch_that->addBatchArray(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool); + else + inst->batch_that->addBatch(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool, -1, inst->delta_column); + + if (inst->batch_that->isUserDefined()) { AggregateDataPtr * places_ptr = places.get(); /// It is ok to re-flush if it is flush already, then we don't need maintain a map to check if it is ready flushed @@ -3984,6 +3811,9 @@ bool Aggregator::executeAndRetractImpl( } } + if (hasExpandedData()) + UpdatedDataEx::addBatch(row_begin, row_end, places.get(), aggregate_instructions ? aggregate_instructions->delta_column : nullptr); + return need_finalization; } @@ -3992,7 +3822,6 @@ std::pair Aggregator::executeAndRetractOnBlock( size_t row_begin, size_t row_end, AggregatedDataVariants & result, - AggregatedDataVariants & retracted_result, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) const @@ -4008,6 +3837,7 @@ std::pair Aggregator::executeAndRetractOnBlock( if (result.empty()) { initDataVariants(result, method_chosen, key_sizes, params); + initStatesForWithoutKeyOrOverflow(result); LOG_TRACE(log, "Aggregation method: {}", result.getMethodName()); } @@ -4020,324 +3850,389 @@ std::pair Aggregator::executeAndRetractOnBlock( prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder); assert(!params.overflow_row && !no_more_keys); - - retracted_result.aggregator = this; + assert(expanded_data_type == ExpandedDataType::UpdatedWithRetracted); if (result.type == AggregatedDataVariants::Type::without_key) { /// Save last finalization state into `retracted_result` before processing new data. /// We shall clear and reset it after finalization - if (retracted_result.empty()) + if (!UpdatedDataEx::isEmpty(result.without_key) && !RetractedDataEx::hasRetracted(result.without_key)) { - initDataVariants(retracted_result, method_chosen, key_sizes, params); - - if (result.without_key) - { - initStatesForWithoutKeyOrOverflow(retracted_result); - mergeAggregateStates(retracted_result.without_key, result.without_key, retracted_result.aggregates_pool, false); - } + auto & retracted = RetractedDataEx::getRetracted(result.without_key); + auto tmp_retracted = result.retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(tmp_retracted); + retracted = tmp_retracted; + mergeAggregateStates(retracted, result.without_key, result.retracted_pool.get(), /*clear_states*/ false); } - initStatesForWithoutKeyOrOverflow(result); - need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); + need_finalization = executeWithoutKeyImpl( + result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); } - else - { - if (retracted_result.empty()) - initDataVariants(retracted_result, method_chosen, key_sizes, params); - if (result.isTwoLevel() && !retracted_result.isTwoLevel()) - retracted_result.convertToTwoLevel(); - - #define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - need_finalization = executeAndRetractImpl(*result.NAME, result.aggregates_pool, *retracted_result.NAME, retracted_result.aggregates_pool, row_begin, row_end, key_columns, aggregate_functions_instructions.data()); +#define M(NAME, IS_TWO_LEVEL) \ + else if (result.type == AggregatedDataVariants::Type::NAME) \ + need_finalization = executeAndRetractImpl(*result.NAME, result.aggregates_pool, result.retracted_pool.get(), row_begin, row_end, key_columns, aggregate_functions_instructions.data()); - if (false) {} // NOLINT - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) - #undef M - } + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) +#undef M need_abort = checkAndProcessResult(result, no_more_keys); - /// it's possible for gloabl single level hash table was converted to two level table after `checkAndProcessResult`, - /// so we also convert retarcted data to two level. - if (result.isTwoLevel() && !retracted_result.isTwoLevel()) - retracted_result.convertToTwoLevel(); - return return_result; } -std::pair -Aggregator::mergeRetractedGroups(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const +BlocksList Aggregator::convertUpdatedToBlocks(AggregatedDataVariants & data_variants) const { - auto prepared_data = prepareVariantsToMerge(aggregated_data, /*always_merge_into_empty*/ true); - if (prepared_data->empty()) - return {}; + LOG_DEBUG(log, "Converting updated aggregated data to blocks"); - auto first = prepared_data->at(0); + Stopwatch watch; - auto prepared_retracted_data = prepareVariantsToMerge(retracted_data, first->type != AggregatedDataVariants::Type::without_key); - assert(!prepared_retracted_data->empty()); + BlocksList blocks; - /// So far, only global aggregation support emit changelog, so time bucket two level is not possible + /// In what data structure is the data aggregated? + if (data_variants.empty()) + return blocks; -#define M(NAME, ...) \ - else if (first->type == AggregatedDataVariants::Type::NAME) \ - mergeRetractedGroupsImplNAME)::element_type>(*prepared_data, *prepared_retracted_data); + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - if (first->type == AggregatedDataVariants::Type::without_key) - { - mergeWithoutKeyDataImpl(*prepared_retracted_data, true); - mergeWithoutKeyDataImpl(*prepared_data, false); - } - APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) - APPLY_FOR_VARIANTS_STATIC_BUCKET_TWO_LEVEL(M) -#undef M + constexpr bool final = true; + constexpr bool clear_states = false; + if (data_variants.type == AggregatedDataVariants::Type::without_key) + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states, AggregateStateType::OnlyUpdated)); + else if (!data_variants.isTwoLevel()) + blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, AggregateStateType::OnlyUpdated)); else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, AggregateStateType::OnlyUpdated)); - return {prepared_data->at(0), prepared_retracted_data->at(0)}; -} + size_t rows = 0; + size_t bytes = 0; -template -void Aggregator::mergeRetractedGroupsImpl( - ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const -{ - AggregatedDataVariantsPtr & res = aggregated_data[0]; - AggregatedDataVariantsPtr & retracted_res = retracted_data[0]; + for (const auto & block : blocks) + { + rows += block.rows(); + bytes += block.bytes(); + } - bool no_more_keys = false; + double elapsed_seconds = watch.elapsedSeconds(); + LOG_DEBUG(log, + "Converted updated aggregated data to blocks. {} rows, {} in {} sec. ({:.3f} rows/sec., {}/sec.)", + rows, ReadableSize(bytes), + elapsed_seconds, rows / elapsed_seconds, + ReadableSize(bytes / elapsed_seconds)); + + return blocks; +} - using Table = typename Method::Data; - Table & dst_table = getDataVariant(*res).data; - Table & dst_retracted_table = getDataVariant(*retracted_res).data; - /// First data variants always is empty. - assert(dst_table.empty() && dst_retracted_table.empty()); +template +void NO_INLINE Aggregator::mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const +{ + AggregatedDataVariantsPtr & res = non_empty_data[0]; + auto & dst_table = getDataVariant(*res).data; + /// Always merge updated data into empty first. + assert(dst_table.empty()); /// For example: /// thread-1 thread-2 - /// group-1 changed non-changed - /// group-2 non-changed changed - /// group-3 non-changed non-changed - - /// Collect all changed groups, then merge retracted/updated data - /// 1) Collect changed groups: - /// `dst_retracted` <= (thread-1: group-1) + (thread-2: group-2) - for (size_t result_num = 1, size = retracted_data.size(); result_num < size; ++result_num) + /// group-1 updated non-updated + /// group-2 non-updated updated + /// group-3 non-updated non-updated + /// + /// 1) Collect all updated groups + /// `dst` <= (group-1, group-2) + bool no_more_keys = false; + using Table = typename Method::Data; + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!checkLimits(retracted_res->sizeWithoutOverflowRow(), no_more_keys)) + if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) break; assert(!no_more_keys); - auto & src_retracted_table = getDataVariant(*retracted_data[result_num]).data; - src_retracted_table.mergeToViaEmplace(dst_retracted_table, [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted) { + auto & src_table = getDataVariant(*non_empty_data[result_num]).data; + auto merge_updated_func = [&](const auto & key, auto & mapped) { + /// Skip no updated group + if (!UpdatedDataEx::isUpdated(mapped)) + return; + + typename Table::LookupResult dst_it; + bool inserted; + /// For StringRef `key`, it is safe to store to `dst_table` + /// since the `dst_table` is temporary and the `src_table` will not be cleaned in the meantime + dst_table.emplace(key, dst_it, inserted); if (inserted) - dst = nullptr; + { + auto & dst = dst_it->getMapped(); + dst = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + dst = aggregate_data; + } + }; - mergeAggregateStates(dst, src, retracted_res->aggregates_pool, true); - }); + if constexpr (is_two_level) + src_table.forEachValueOfUpdatedBuckets(std::move(merge_updated_func), /*reset_updated*/ true); + else + src_table.forEachValue(std::move(merge_updated_func)); } - /// 2) Merge retracted groups non-changed thread parts (based on all changed groups) - /// `dst_retracted` <= (thread-1: group-2) + (thread-2: group-1) - for (size_t result_num = 1, size = retracted_data.size(); result_num < size; ++result_num) + /// 2) Merge all updated groups parts for each thread (based on `1)` ) + /// `dst` <= (thread-1: group-1 group-2) + (thread-2: group-1 group-2) + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!checkLimits(retracted_res->sizeWithoutOverflowRow(), no_more_keys)) - break; - - assert(!no_more_keys); - - auto & current_retracted = *retracted_data[result_num]; - Table & src_retracted_table = getDataVariant(current_retracted).data; - Table & src_aggregated_table = getDataVariant(*aggregated_data[result_num]).data; - dst_retracted_table.forEachValue([&](const auto & key, auto & mapped) { - /// Merge retracted groups non-changed thread parts - if (!src_retracted_table.find(key)) + auto & src_table = getDataVariant(*non_empty_data[result_num]).data; + dst_table.forEachValue([&](const auto & key, auto & mapped) { + if (auto find_it = src_table.find(key)) { - auto find_it = src_aggregated_table.find(key); - if (find_it) - mergeAggregateStates( - mapped, - find_it->getMapped(), - retracted_res->aggregates_pool, - /*clear_states*/ false); - }}); - - /// Reset retracted data after finalization - clearDataVariants(current_retracted); + mergeAggregateStates(mapped, find_it->getMapped(), arena, /*clear_states*/ false); + /// NOTE: We always reset the updated flag after merged + UpdatedDataEx::resetUpdated(find_it->getMapped()); + } + }); } +} - /// 3) Merge new/updated groups (based on all changed groups) - /// `dst` <= (thread-1: group-1 group-2) + (thread-2: group-1 group-2) - for (size_t result_num = 1, size = aggregated_data.size(); result_num < size; ++result_num) - { - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) - break; +AggregatedDataVariantsPtr Aggregator::mergeUpdatedGroups(ManyAggregatedDataVariants & data_variants) const +{ + auto prepared_data_ptr = prepareVariantsToMerge(data_variants, /*always_merge_into_empty*/ true); + if (prepared_data_ptr->empty()) + return {}; - assert(!no_more_keys); - Table & src_aggregated_table = getDataVariant(*aggregated_data[result_num]).data; - dst_retracted_table.forEachValue([&](const auto & key, auto & mapped) { - /// Merge new/updated groups - typename Table::LookupResult dst_it; - bool inserted; + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - /// NOTE: For StringRef `key`, its memory was allocated in `retracted_res->aggregates_pool`, - /// we shall save this key in itself pool (i.e. res->aggregates_pool) if inserted - using KeyType = std::decay_t; - if constexpr (std::is_same_v) - dst_table.emplace(ArenaKeyHolder{key, *res->aggregates_pool}, dst_it, inserted); - else - dst_table.emplace(key, dst_it, inserted); + BlocksList blocks; + auto & first = *prepared_data_ptr->at(0); + if (first.type == AggregatedDataVariants::Type::without_key) + { + if (std::ranges::none_of(*prepared_data_ptr, [](auto & variants) { + return variants->without_key && UpdatedDataEx::isUpdated(variants->without_key); + })) + return {}; - if (inserted) - dst_it->getMapped() = nullptr; - - auto find_it = src_aggregated_table.find(key); - if (find_it) - mergeAggregateStates( - dst_it->getMapped(), - find_it->getMapped(), - res->aggregates_pool, - /*clear_states*/ false); - }); + mergeWithoutKeyDataImpl(*prepared_data_ptr, /*clear_states*/ false); } + +#define M(NAME, IS_TWO_LEVEL) \ + else if (first.type == AggregatedDataVariants::Type::NAME) \ + mergeUpdatedGroupsImpl(*prepared_data_ptr, first.aggregates_pool); + + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + return prepared_data_ptr->at(0); } -void Aggregator::mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const +BlocksList Aggregator::convertRetractedToBlocks(AggregatedDataVariants & data_variants) const { - if (!src) - return; + LOG_DEBUG(log, "Converting retracted aggregated data to blocks"); + + Stopwatch watch; + + BlocksList blocks; + + /// In what data structure is the data aggregated? + if (data_variants.empty()) + return blocks; + + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); + + constexpr bool final = true; + constexpr bool clear_states = true; + if (data_variants.type == AggregatedDataVariants::Type::without_key) + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states, AggregateStateType::OnlyRetracted)); + else if (!data_variants.isTwoLevel()) + blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, AggregateStateType::OnlyRetracted)); + else + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, AggregateStateType::OnlyRetracted)); + + size_t rows = 0; + size_t bytes = 0; - if (!dst) + for (const auto & block : blocks) { - dst = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(dst); + rows += block.rows(); + bytes += block.bytes(); } - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); + double elapsed_seconds = watch.elapsedSeconds(); + LOG_DEBUG(log, + "Converted retracted aggregated data to blocks. {} rows, {} in {} sec. ({:.3f} rows/sec., {}/sec.)", + rows, ReadableSize(bytes), + elapsed_seconds, rows / elapsed_seconds, + ReadableSize(bytes / elapsed_seconds)); - if (clear_states) - destroyAggregateStates(src); + return blocks; } -void Aggregator::destroyAggregateStates(AggregateDataPtr & place) const +template +void Aggregator::mergeRetractedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const { - if (place) + AggregatedDataVariantsPtr & res = non_empty_data[0]; + auto & dst_table = getDataVariant(*res).data; + /// Always merge retracted data into empty first. + assert(dst_table.empty()); + + /// For example: + /// thread-1 thread-2 + /// group-1 retracted non-retracted + /// group-2 non-retracted retracted + /// group-3 non-retracted non-retracted + /// + /// 1) Collect all retracted groups + /// `dst` <= (group-1, group-2) + bool no_more_keys = false; + using Table = typename Method::Data; + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy(place + offsets_of_aggregate_states[i]); + if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) + break; - place = nullptr; - } -} + assert(!no_more_keys); -void Aggregator::serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const -{ - UInt8 has_states = place ? 1 : 0; - writeIntBinary(has_states, wb); - if (has_states) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->serialize(place + offsets_of_aggregate_states[i], wb); + auto & src_table = getDataVariant(*non_empty_data[result_num]).data; + src_table.forEachValue([&](const auto & key, auto & mapped) { + /// Skip no retracted group + if (!RetractedDataEx::hasRetracted(mapped)) + return; + + typename Table::LookupResult dst_it; + bool inserted; + /// For StringRef `key`, it is safe to store to `dst_table` + /// since the `dst_table` is temporary and the `src_table` will not be cleaned in the meantime + dst_table.emplace(key, dst_it, inserted); + if (inserted) + { + auto & dst = dst_it->getMapped(); + dst = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + dst = aggregate_data; + } + }); } -} -void Aggregator::deserializeAggregateStates(AggregateDataPtr & place, ReadBuffer & rb, Arena * arena) const -{ - UInt8 has_states; - readIntBinary(has_states, rb); - if (has_states) + /// 2) Merge all retracted groups parts for each thread (based on `1)` ) + /// `dst` <= (thread-1: group-1 group-2) + (thread-2: group-1 group-2) + for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!place) - { - /// Allocate states for all aggregate functions - AggregateDataPtr aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); - place = aggregate_data; - } + auto & current = *non_empty_data[result_num]; + auto & src_table = getDataVariant(current).data; + dst_table.forEachValue([&](const auto & key, auto & mapped) { + if (auto find_it = src_table.find(key)) + { + auto & src_mapped = find_it->getMapped(); + if (RetractedDataEx::hasRetracted(src_mapped)) + mergeAggregateStates(mapped, RetractedDataEx::getRetracted(src_mapped), arena, /*clear_states*/ true); + else + /// If retracted data not exist, assume it does't be changed, we should use original data + mergeAggregateStates(mapped, src_mapped, arena, /*clear_states*/ false); + } + }); - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->deserialize(place + offsets_of_aggregate_states[i], rb, std::nullopt, arena); + current.resetRetractedPool(); } } -void Aggregator::clearDataVariants(AggregatedDataVariants & data_variants) const +AggregatedDataVariantsPtr Aggregator::mergeRetractedGroups(ManyAggregatedDataVariants & data_variants) const { - /// Clear states - destroyAllAggregateStates(data_variants); + auto prepared_data_ptr = prepareVariantsToMerge(data_variants, /*always_merge_into_empty*/ true); + if (prepared_data_ptr->empty()) + return {}; - /// Clear hash map - switch (data_variants.type) + if (unlikely(params.overflow_row)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); + + auto & first = *prepared_data_ptr->at(0); + if (first.type == AggregatedDataVariants::Type::without_key) { - case AggregatedDataVariants::Type::EMPTY: break; - case AggregatedDataVariants::Type::without_key: break; + if (std::ranges::none_of(*prepared_data_ptr, [](auto & variants) { return RetractedDataEx::hasRetracted(variants->without_key); })) + return {}; /// Skip if no retracted - #define M(NAME, IS_TWO_LEVEL) \ - case AggregatedDataVariants::Type::NAME: data_variants.NAME.reset(); break; - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) - #undef M + for (size_t result_num = 1, size = prepared_data_ptr->size(); result_num < size; ++result_num) + { + auto & src_without_key = (*prepared_data_ptr)[result_num]->without_key; + if (RetractedDataEx::hasRetracted(src_without_key)) + mergeAggregateStates(first.without_key, RetractedDataEx::getRetracted(src_without_key), first.aggregates_pool, /*clear_states*/ true); + else + /// If retracted data not exist, assume it does't be changed, we should use original data + mergeAggregateStates(first.without_key, src_without_key, first.aggregates_pool, /*clear_states*/ false); + } } - data_variants.invalidate(); - /// Reset pool - data_variants.aggregates_pools = Arenas(1, std::make_shared()); - data_variants.aggregates_pool = data_variants.aggregates_pools.back().get(); +#define M(NAME) \ + else if (first.type == AggregatedDataVariants::Type::NAME) \ + mergeRetractedGroupsImpl(*prepared_data_ptr, first.aggregates_pool); + + APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) + APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + return prepared_data_ptr->at(0); } -bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const +template +void Aggregator::mergeRetractedIntoImpl(Method & method, Method & retracted_method, Arena * arena) const { - size_t result_size = result.sizeWithoutOverflowRow(); - Int64 current_memory_usage = 0; - if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) - if (auto * memory_tracker = memory_tracker_child->getParent()) - current_memory_usage = memory_tracker->get(); + using Table = typename Method::Data; + Table & table = method.data; + Table & retracted_table = retracted_method.data; - /// Here all the results in the sum are taken into account, from different threads. - Int64 result_size_bytes = current_memory_usage - memory_usage_before_aggregation; + retracted_table.forEachValue([&](const auto & key, auto & retracted_mapped) { - bool worth_convert_to_two_level = worthConvertToTwoLevel( - params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes); + auto find_it = table.find(key); + assert(find_it); - /** Converting to a two-level data structure. - * It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel. - */ - if (result.isConvertibleToTwoLevel() && worth_convert_to_two_level) - result.convertToTwoLevel(); + auto & mapped = find_it->getMapped(); + assert(!RetractedDataEx::hasRetracted(mapped)); + UpdatedDataEx::setUpdated(mapped); - /// Checking the constraints. - if (!checkLimits(result_size, no_more_keys)) - return true; - - /** Flush data to disk if too much RAM is consumed. - * Data can only be flushed to disk if a two-level aggregation structure is used. - */ - if (params.max_bytes_before_external_group_by - && result.isTwoLevel() - && current_memory_usage > static_cast(params.max_bytes_before_external_group_by) - && worth_convert_to_two_level) - { - size_t size = current_memory_usage + params.min_free_disk_space; + /// For old impl, no retracted data for new group + if (!retracted_mapped) + return; - std::string tmp_path = params.tmp_volume->getDisk()->getPath(); + auto & retracted = RetractedDataEx::getRetracted(mapped); + auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(tmp_retracted); + retracted = tmp_retracted; + mergeAggregateStates(retracted, retracted_mapped, arena, /*clear_states*/ true); + }); +} - // enoughSpaceInDirectory() is not enough to make it right, since - // another process (or another thread of aggregator) can consume all - // space. - // - // But true reservation (IVolume::reserve()) cannot be used here since - // current_memory_usage does not take compression into account and - // will reserve way more that actually will be used. - // - // Hence, let's do a simple check. - if (!enoughSpaceInDirectory(tmp_path, size)) - throw Exception("Not enough space for external aggregation in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); +void Aggregator::mergeRetractedInto(AggregatedDataVariants & result, AggregatedDataVariants && retracted_result) const +{ + assert(expanded_data_type == ExpandedDataType::UpdatedWithRetracted); + if (result.type != retracted_result.type) [[unlikely]] + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Don't merge retracted aggregation result, the current data variants type is {}, but retracted data variants type is {}", + magic_enum::enum_name(result.type), + magic_enum::enum_name(retracted_result.type)); - writeToTemporaryFile(result, tmp_path); + Arena * arena = result.retracted_pool.get(); + if (result.type == AggregatedDataVariants::Type::without_key) + { + if (retracted_result.without_key) + { + assert(!RetractedDataEx::hasRetracted(result.without_key)); + auto & retracted = RetractedDataEx::getRetracted(result.without_key); + auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(tmp_retracted); + retracted = tmp_retracted; + mergeAggregateStates(retracted, retracted_result.without_key, arena, /*clear_states*/ true); + } } - return false; +#define M(NAME, IS_TWO_LEVEL) \ + else if (result.type == AggregatedDataVariants::Type::NAME) \ + mergeRetractedIntoImpl(*result.NAME, *retracted_result.NAME, arena); + + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) +#undef M + + retracted_result.reset(); } void Aggregator::updateMetrics(const AggregatedDataVariants & variants, AggregatedDataMetrics & metrics) const diff --git a/src/Interpreters/Streaming/Aggregator.h b/src/Interpreters/Streaming/Aggregator.h index e90535dea75..945260170a0 100644 --- a/src/Interpreters/Streaming/Aggregator.h +++ b/src/Interpreters/Streaming/Aggregator.h @@ -37,11 +37,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include /// proton: ends @@ -74,15 +76,11 @@ namespace Streaming * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons. */ -enum class ConvertAction : uint8_t +enum class AggregateStateType { - Unkonwn = 0, - DistributedMerge, - WriteToTmpFS, - Checkpoint, - StreamingEmit, - InternalMerge, - RetractedEmit + Normal, + OnlyUpdated, + OnlyRetracted, }; /// using TimeBucketAggregatedDataWithUInt16Key = TimeBucketHashMap>; @@ -103,7 +101,8 @@ using TimeBucketAggregatedDataWithKeys256TwoLevel = TimeBucketHashMap retracted_pool; /// Use an independent pool to manage retracted data, which will be cleared after each finalization /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by. */ @@ -371,6 +371,17 @@ struct AggregatedDataVariants : private boost::noncopyable /// proton: ends; } + void reset(); + + void resetAggregatesPool() + { + aggregates_pools = Arenas(1, std::make_shared()); + aggregates_pool = aggregates_pools.back().get(); + aggregates_pool->enableRecycle(true); + } + + void resetRetractedPool() { retracted_pool = std::make_unique(); } + /// Number of rows (different keys). size_t size() const { @@ -558,12 +569,17 @@ struct AggregatedDataVariants : private boost::noncopyable throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } } + + void serialize(WriteBuffer & wb, const Aggregator & aggregator_) const; + void deserialize(ReadBuffer & rb, const Aggregator & aggregator_); }; using AggregatedDataVariantsPtr = std::shared_ptr; using ManyAggregatedDataVariants = std::vector; using ManyAggregatedDataVariantsPtr = std::shared_ptr; +struct OutputBlockColumns; + /** How are "total" values calculated with WITH TOTALS? * (For more details, see TotalsHavingTransform.) * @@ -650,6 +666,9 @@ class Aggregator final size_t window_keys_num; WindowParamsPtr window_params; + + bool tracking_changes = false; + bool tracking_updated = false; /// proton: ends /// proton: starts @@ -670,7 +689,9 @@ class Aggregator final GroupBy streaming_group_by_ = GroupBy::OTHER, ssize_t delta_col_pos_ = -1, size_t window_keys_num_ = 0, - WindowParamsPtr window_params_ = nullptr) + WindowParamsPtr window_params_ = nullptr, + bool tracking_changes_ = false, + bool tracking_updated_ = false) : src_header(src_header_), intermediate_header(intermediate_header_), keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), @@ -687,7 +708,9 @@ class Aggregator final group_by(streaming_group_by_), delta_col_pos(delta_col_pos_), window_keys_num(window_keys_num_), - window_params(window_params_) + window_params(window_params_), + tracking_changes(tracking_changes_), + tracking_updated(tracking_updated_) { } /// proton: ends @@ -757,7 +780,6 @@ class Aggregator final size_t row_begin, size_t row_end, AggregatedDataVariants & result, - AggregatedDataVariants & retracted_result, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block bool & no_more_keys) const; @@ -787,25 +809,39 @@ class Aggregator final * SELECT avg(i) AS i, sum(k) AS k FROM my_stream GROUP BY device_id <-- first level global aggr, don't prune states * ); */ - BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, ConvertAction action, size_t max_threads) const; - BlocksList mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, ConvertAction action, size_t max_threads) const; + BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const; + BlocksList mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const; - Block convertOneBucketToBlock(AggregatedDataVariants & data_variants, bool final, ConvertAction action, size_t bucket) const; - Block mergeAndConvertOneBucketToBlock(ManyAggregatedDataVariants & variants, bool final, ConvertAction action, size_t bucket) const; - - /// Used by hop window function, merge multiple gcd windows (buckets) to a hop window + /// For Tumble/Session window function, there is only one bucket + /// For Hop window function, merge multiple gcd windows (buckets) to a hop window /// For examples: /// gcd_bucket1 - [00:00, 00:02) /// => result block - [00:00, 00:04) /// gcd_bucket2 - [00:02, 00:04) Block spliceAndConvertBucketsToBlock( - AggregatedDataVariants & variants, bool final, ConvertAction action, const std::vector & gcd_buckets) const; + AggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const; Block mergeAndSpliceAndConvertBucketsToBlock( - ManyAggregatedDataVariants & variants, bool final, ConvertAction action, const std::vector & gcd_buckets) const; + ManyAggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const; + + /// Convert the `updated data` (different with `normal data`) + BlocksList convertUpdatedToBlocks(AggregatedDataVariants & data_variants) const; + + /// \return: merged updated data if exists + /// NOTE: The merged data is as `normal data`, which should use `convertToBlocks` to convert + AggregatedDataVariantsPtr mergeUpdatedGroups(ManyAggregatedDataVariants & data_variants) const; - /// Used for merge changed groups and return the of changed groups - std::pair - mergeRetractedGroups(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const; + /// Convert the `retracted data` (different with `normal data`) + BlocksList convertRetractedToBlocks(AggregatedDataVariants & data_variants) const; + + /// \return: merged retracted data if exists + /// NOTE: The merged data is as `normal data`, which should use `convertToBlocks` to convert + AggregatedDataVariantsPtr mergeRetractedGroups(ManyAggregatedDataVariants & data_variants) const; + + /// Used for merge legacy retracted data into result + void mergeRetractedInto(AggregatedDataVariants & result, AggregatedDataVariants && retracted_result) const; + + bool hasExpandedData() const { return expanded_data_type != ExpandedDataType::None; } + ExpandedDataType expandedDataType() const { return expanded_data_type; } std::vector bucketsBefore(const AggregatedDataVariants & result, Int64 max_bucket) const; void removeBucketsBefore(AggregatedDataVariants & result, Int64 max_bucket) const; @@ -821,7 +857,7 @@ class Aggregator final /// Precondition: for all blocks block.info.is_overflows flag must be the same. /// (either all blocks are from overflow data or none blocks are). /// The resulting block has the same value of is_overflows flag. - Block mergeBlocks(BlocksList & blocks, bool final, ConvertAction action); + Block mergeBlocks(BlocksList & blocks, bool final, bool clear_states, bool only_updated); /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. * This is needed to simplify merging of that data with other results, that are already two-level. @@ -904,6 +940,8 @@ class Aggregator final bool all_aggregates_has_trivial_destructor = false; + ExpandedDataType expanded_data_type = ExpandedDataType::None; + /// How many RAM were used to process the query before processing the first block. Int64 memory_usage_before_aggregation = 0; @@ -933,7 +971,7 @@ class Aggregator final /** Create states of aggregate functions for one key. */ - template + template void createAggregateStates(AggregateDataPtr & aggregate_data) const; /** Call `destroy` methods for states of aggregate functions. @@ -982,6 +1020,7 @@ class Aggregator final AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; +#if 0 /// Unused for now static void executeOnIntervalWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t row_begin, @@ -989,6 +1028,7 @@ class Aggregator final AggregateFunctionInstruction * aggregate_instructions, Arena * arena, const IColumn * delta_col); +#endif template void writeToTemporaryFileImpl( @@ -1014,68 +1054,32 @@ class Aggregator final bool clear_states, KeyHandler && key_handler = nullptr) const; - /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. - template - void mergeDataNoMoreKeysImpl( - Table & table_dst, - AggregatedDataWithoutKey & overflows, - Table & table_src, - Arena * arena, - bool clear_states) const; - - /// Same, but ignores the rest of the keys. - template - void mergeDataOnlyExistingKeysImpl( - Table & table_dst, - Table & table_src, - Arena * arena, - bool clear_states) const; - void mergeWithoutKeyDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const; template void mergeSingleLevelDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const; template - void convertToBlockImpl( - Method & method, - Table & data, - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool final, - bool clear_states) const; + Block convertToBlockImpl( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, AggregateStateType type) const; template void insertAggregatesIntoColumns( Mapped & mapped, MutableColumns & final_aggregate_columns, - Arena * arena) const; - - template - void convertToBlockImplFinal( - Method & method, - Table & data, - std::vector key_columns, - MutableColumns & final_aggregate_columns, Arena * arena, bool clear_states) const; - template - void convertToBlockImplNotFinal( - Method & method, - Table & data, - std::vector key_columns, - AggregateColumnsData & aggregate_columns) const; + template + Block insertResultsIntoColumns( + PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool clear_states) const; - template - Block prepareBlockAndFill( - AggregatedDataVariants & data_variants, - bool final, - bool clear_states, - size_t rows, - Filler && filler) const; + template + Block convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states) const; + + template + Block convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t rows) const; template Block convertOneBucketToBlockImpl( @@ -1084,65 +1088,60 @@ class Aggregator final Arena * arena, bool final, bool clear_states, - size_t bucket) const; + Int64 bucket, + AggregateStateType type = AggregateStateType::Normal) const; /// proton: starts. template void spliceBucketsImpl( AggregatedDataVariants & data_dest, AggregatedDataVariants & data_src, - bool final, - bool clear_states, const std::vector & gcd_buckets, - Arena * arena) const; + Arena * arena, + bool clear_states) const; template BlocksList mergeAndConvertTwoLevelToBlocksImpl( - ManyAggregatedDataVariants & non_empty_data, bool final, size_t max_threads, bool clear_states) const; + ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states, ThreadPool * thread_pool) const; - Block mergeAndConvertWithoutKeyToBlock(ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states) const; - Block mergeAndConvertSingleLevelToBlock(ManyAggregatedDataVariants & non_empty_data, bool final, bool clear_states) const; - BlocksList - mergeAndConvertTwoLevelToBlocks(ManyAggregatedDataVariants & non_empty_data, bool final, size_t max_threads, bool clear_states) const; + void mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const; + + void destroyAggregateStates(AggregateDataPtr & place) const; + + void serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const; + void deserializeAggregateStates(AggregateDataPtr & place, ReadBuffer & rb, Arena * arena) const; + + void clearDataVariants(AggregatedDataVariants & data_variants) const; + + /// @return does need abort ? + bool checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const; template bool executeAndRetractImpl( Method & method, Arena * aggregates_pool, - Method & retracted_method, Arena * retracted_pool, size_t row_begin, size_t row_end, ColumnRawPtrs & key_columns, AggregateFunctionInstruction * aggregate_instructions) const; + template + void mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const; template - void mergeRetractedGroupsImpl(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const; + void mergeRetractedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const; - void mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const; - - void destroyAggregateStates(AggregateDataPtr & place) const; - - void serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const; - void deserializeAggregateStates(AggregateDataPtr & place, ReadBuffer & rb, Arena * arena) const; - - void clearDataVariants(AggregatedDataVariants & data_variants) const; - - /// @return does need abort ? - bool checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const; + template + void mergeRetractedIntoImpl(Method & method, Method & retracted_method, Arena * arena) const; /// proton: ends. - Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states) const; - Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states) const; - BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, size_t max_threads, bool clear_states) const; + Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states, AggregateStateType type = AggregateStateType::Normal) const; + Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, AggregateStateType type = AggregateStateType::Normal) const; + BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, AggregateStateType type = AggregateStateType::Normal) const; template BlocksList prepareBlocksAndFillTwoLevelImpl( - AggregatedDataVariants & data_variants, - Method & method, - bool final, - bool clear_states, - ThreadPool * thread_pool) const; + AggregatedDataVariants & data_variants, Method & method, bool final, bool clear_states, ThreadPool * thread_pool, AggregateStateType type) const; template void mergeStreamsImplCase( @@ -1167,7 +1166,7 @@ class Aggregator final template void mergeBucketImpl( - ManyAggregatedDataVariants & data, bool final, bool clear_states, Int64 bucket, Arena * arena, std::atomic * is_cancelled = nullptr) const; + ManyAggregatedDataVariants & data, Int64 bucket, Arena * arena, bool clear_states, std::atomic * is_cancelled = nullptr) const; template void convertBlockToTwoLevelImpl( @@ -1207,30 +1206,31 @@ class Aggregator final const AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const; - void createStatesAndFillKeyColumnsWithSingleKey( - AggregatedDataVariants & data_variants, - Columns & key_columns, size_t key_row, - MutableColumns & final_key_columns) const; - /// proton: starts void setupAggregatesPoolTimestamps(size_t row_begin, size_t row_end, const ColumnRawPtrs & key_columns, Arena * aggregates_pool) const; - inline bool shouldClearStates(ConvertAction action, bool final_) const; +public: + /// Existed versions: + /// STATE V1 - Legacy version (REVISION 1) + /// STATE V2 - REVISION 1 (Enable revision) + /// STATE V3 - REVISION 3 (Add expanded data) + static constexpr UInt64 STATE_V2_MIN_REVISION = 1; + static constexpr UInt64 STATE_V3_MIN_REVISION = 3; VersionType getVersionFromRevision(UInt64 revision) const; VersionType getVersion() const; -public: - /// Existed versions: - /// STATE VERSION 1 - Legacy version - /// STATE VERSION 2 - REVISION 1 (Enable revision) - static constexpr UInt64 STATE_V2_MIN_REVISION = 1; + void checkpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const; + void recover(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; - void checkpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb); - void recover(AggregatedDataVariants & data_variants, ReadBuffer & rb); +private: + /// [Version-3] + void doCheckpointV3(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const; + void doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; - void doCheckpoint(const AggregatedDataVariants & data_variants, WriteBuffer & wb); - void doRecover(AggregatedDataVariants & data_variants, ReadBuffer & rb); + /// [Version-2] + void doCheckpointV2(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const; + void doRecoverV2(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; /// [Legacy] void doCheckpointLegacy(const AggregatedDataVariants & data_variants, WriteBuffer & wb); diff --git a/src/Processors/Transforms/Streaming/AggregatingHelper.cpp b/src/Processors/Transforms/Streaming/AggregatingHelper.cpp index 6fa6139d38a..849b82b802c 100644 --- a/src/Processors/Transforms/Streaming/AggregatingHelper.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingHelper.cpp @@ -30,12 +30,19 @@ Chunk mergeBlocksToChunk(BlocksList && blocks) return merged_chunk; } -Chunk convertToChunkImpl(AggregatedDataVariants & data, const AggregatingTransformParams & params, ConvertAction action) +Chunk convertToChunkImpl(AggregatedDataVariants & data, const AggregatingTransformParams & params, AggregateStateType type) { if (data.empty()) return {}; - auto blocks = params.aggregator.convertToBlocks(data, params.final, action, params.params.max_threads); + BlocksList blocks; + if (type == AggregateStateType::OnlyUpdated) + blocks = params.aggregator.convertUpdatedToBlocks(data); + else if (type == AggregateStateType::OnlyRetracted) + blocks = params.aggregator.convertRetractedToBlocks(data); + else + blocks = params.aggregator.convertToBlocks(data, params.final, !params.params.keep_state, params.params.max_threads); + /// FIXME: When global aggr states was converted two level hash table, the merged chunk may be too large return mergeBlocksToChunk(std::move(blocks)); } @@ -45,12 +52,12 @@ namespace AggregatingHelper { Chunk convertToChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params) { - return convertToChunkImpl(data, params, ConvertAction::StreamingEmit); + return convertToChunkImpl(data, params, AggregateStateType::Normal); } Chunk mergeAndConvertToChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params) { - auto blocks = params.aggregator.mergeAndConvertToBlocks(data, params.final, ConvertAction::StreamingEmit, params.params.max_threads); + auto blocks = params.aggregator.mergeAndConvertToBlocks(data, params.final, !params.params.keep_state, params.params.max_threads); /// FIXME: When global aggr states was converted two level hash table, the merged chunk may be too large return mergeBlocksToChunk(std::move(blocks)); } @@ -58,32 +65,21 @@ Chunk mergeAndConvertToChunk(ManyAggregatedDataVariants & data, const Aggregatin Chunk spliceAndConvertBucketsToChunk( AggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets) { - if (buckets.size() == 1) - return convertToChunk(params.aggregator.convertOneBucketToBlock(data, params.final, ConvertAction::StreamingEmit, buckets[0])); - else - return convertToChunk(params.aggregator.spliceAndConvertBucketsToBlock(data, params.final, ConvertAction::InternalMerge, buckets)); + return convertToChunk(params.aggregator.spliceAndConvertBucketsToBlock(data, params.final, /*clear_states*/ false, buckets)); } Chunk mergeAndSpliceAndConvertBucketsToChunk( ManyAggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets) { - if (buckets.size() == 1) - return convertToChunk( - params.aggregator.mergeAndConvertOneBucketToBlock(data, params.final, ConvertAction::StreamingEmit, buckets[0])); - else - return convertToChunk( - params.aggregator.mergeAndSpliceAndConvertBucketsToBlock(data, params.final, ConvertAction::InternalMerge, buckets)); + return convertToChunk(params.aggregator.mergeAndSpliceAndConvertBucketsToBlock(data, params.final, /*clear_states*/ false, buckets)); } -ChunkPair -convertToChangelogChunk(AggregatedDataVariants & data, RetractedDataVariants & retracted_data, const AggregatingTransformParams & params) +ChunkPair convertToChangelogChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params) { if (data.empty()) return {}; - assert(!retracted_data.empty()); - - auto retracted_chunk = convertToChunkImpl(retracted_data, params, ConvertAction::RetractedEmit); + auto retracted_chunk = convertToChunkImpl(data, params, AggregateStateType::OnlyRetracted); if (retracted_chunk) { auto retracted_delta_col = ColumnInt8::create(retracted_chunk.rows(), Int8(-1)); @@ -91,25 +87,46 @@ convertToChangelogChunk(AggregatedDataVariants & data, RetractedDataVariants & r retracted_chunk.setConsecutiveDataFlag(); } - auto chunk = convertToChunkImpl(data, params, ConvertAction::StreamingEmit); + auto chunk = convertToChunkImpl(data, params, AggregateStateType::OnlyUpdated); if (chunk) { auto delta_col = ColumnInt8::create(chunk.rows(), Int8(1)); chunk.addColumn(std::move(delta_col)); } - return {std::move(retracted_chunk), std::move(chunk)}; } -ChunkPair mergeAndConvertToChangelogChunk( - ManyAggregatedDataVariants & data, ManyRetractedDataVariants & retracted_data, const AggregatingTransformParams & params) +ChunkPair mergeAndConvertToChangelogChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params) { - auto [merged_data, merged_retracted_data] = params.aggregator.mergeRetractedGroups(data, retracted_data); - if (!merged_data) - return {}; + if (data.size() == 1) + return convertToChangelogChunk(*data[0], params); - assert(merged_retracted_data); - return convertToChangelogChunk(*merged_data, *merged_retracted_data, params); + ChunkPair results; + auto & [retracted_chunk, chunk] = results; + + auto merged_retracted_data = params.aggregator.mergeRetractedGroups(data); + if (merged_retracted_data) + { + retracted_chunk = convertToChunk(*merged_retracted_data, params); + if (retracted_chunk) + { + auto retracted_delta_col = ColumnInt8::create(retracted_chunk.rows(), Int8(-1)); + retracted_chunk.addColumn(std::move(retracted_delta_col)); + retracted_chunk.setConsecutiveDataFlag(); + } + } + + auto merged_updated_data = params.aggregator.mergeUpdatedGroups(data); + if (merged_updated_data) + { + chunk = convertToChunk(*merged_updated_data, params); + if (chunk) + { + auto delta_col = ColumnInt8::create(chunk.rows(), Int8(1)); + chunk.addColumn(std::move(delta_col)); + } + } + return results; } } } diff --git a/src/Processors/Transforms/Streaming/AggregatingHelper.h b/src/Processors/Transforms/Streaming/AggregatingHelper.h index 5ca32f6fc00..85b177b5b51 100644 --- a/src/Processors/Transforms/Streaming/AggregatingHelper.h +++ b/src/Processors/Transforms/Streaming/AggregatingHelper.h @@ -38,16 +38,13 @@ Chunk mergeAndSpliceAndConvertBucketsToChunk( ManyAggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets); /// Only used for emit changelog -/// @brief Based on new/updated groups @p retracted_data , only convert the state of changed groups (retracted: last state, aggregated: current state) -/// \data: current aggregated state of all groups -/// \retracted_data: only have last state of changed groups (i.e. new/updated/deleted) +/// @brief only convert the state of changed groups (retracted: last state, aggregated: current state) +/// \data: current aggregated state of all groups (contains retracted states and updated states) /// @returns /// retracted_chunk: just contains retracted data of changed groups /// aggregated_chunk: just contains aggregated data of changed groups -ChunkPair -convertToChangelogChunk(AggregatedDataVariants & data, RetractedDataVariants & retracted_data, const AggregatingTransformParams & params); -ChunkPair mergeAndConvertToChangelogChunk( - ManyAggregatedDataVariants & data, ManyRetractedDataVariants & retracted_data, const AggregatingTransformParams & params); +ChunkPair convertToChangelogChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params); +ChunkPair mergeAndConvertToChangelogChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params); } } diff --git a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp index 629a047c1f3..b9fa8205e75 100644 --- a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp @@ -498,7 +498,7 @@ void AggregatingTransform::checkpoint(CheckpointContextPtr ckpt_ctx) } /// Serializing no shared data - params->aggregator.checkpoint(variants, wb); + DB::serialize(variants, wb, params->aggregator); DB::writeIntBinary(watermark, wb); @@ -554,7 +554,7 @@ void AggregatingTransform::recover(CheckpointContextPtr ckpt_ctx) } /// Serializing local or stable data during checkpointing - params->aggregator.recover(variants, rb); + DB::deserialize(variants, rb, params->aggregator); DB::readIntBinary(watermark, rb); diff --git a/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp b/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp index 918d1337658..fd1cda27554 100644 --- a/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp @@ -270,7 +270,7 @@ void AggregatingTransformWithSubstream::checkpoint(CheckpointContextPtr ckpt_ctx for (const auto & [id, substream_ctx] : substream_contexts) { assert(id == substream_ctx->id); - substream_ctx->serialize(wb, getVersion()); + serialize(*substream_ctx, wb, getVersion()); } }); } @@ -284,7 +284,7 @@ void AggregatingTransformWithSubstream::recover(CheckpointContextPtr ckpt_ctx) for (size_t i = 0; i < num_substreams; ++i) { auto substream_ctx = std::make_shared(this); - substream_ctx->deserialize(rb, version_); + deserialize(*substream_ctx, rb, version_); substream_contexts.emplace(substream_ctx->id, std::move(substream_ctx)); } }); @@ -294,7 +294,7 @@ void SubstreamContext::serialize(WriteBuffer & wb, VersionType version) const { DB::Streaming::serialize(id, wb); - aggregating_transform->params->aggregator.checkpoint(variants, wb); + DB::serialize(variants, wb, aggregating_transform->params->aggregator); DB::writeIntBinary(finalized_watermark, wb); @@ -312,7 +312,7 @@ void SubstreamContext::deserialize(ReadBuffer & rb, VersionType version) { DB::Streaming::deserialize(id, rb); - aggregating_transform->params->aggregator.recover(variants, rb); + DB::deserialize(variants, rb, aggregating_transform->params->aggregator); DB::readIntBinary(finalized_watermark, rb); diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp index 6d19e51fcc0..3049e4bebce 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp @@ -9,6 +9,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int UNSUPPORTED; +extern const int RECOVER_CHECKPOINT_FAILED; } namespace Streaming @@ -40,35 +41,58 @@ GlobalAggregatingTransform::GlobalAggregatingTransform( if (unlikely(params->params.overflow_row)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in global aggregation"); - /// Need extra retracted data if (params->emit_changelog) { if (params->emit_version) throw Exception(ErrorCodes::UNSUPPORTED, "'emit_version()' is not supported in global aggregation emit changelog"); - ManyRetractedDataVariants retracted_data(many_data->variants.size()); - for (auto & elem : retracted_data) - elem = std::make_shared(); - + bool retract_enabled = false; many_data->setField( - {std::move(retracted_data), + {retract_enabled, /// Field serializer - [this](const std::any & field, WriteBuffer & wb, VersionType) { - const auto & data = std::any_cast(field); - DB::writeIntBinary(data.size(), wb); - for (const auto & elem : data) - params->aggregator.checkpoint(*elem, wb); + [](const std::any & field, WriteBuffer & wb, [[maybe_unused]] VersionType version) { + assert(version >= IMPL_V2_MIN_VERSION); + DB::writeBoolText(std::any_cast(field), wb); }, /// Field deserializer - [this](std::any & field, ReadBuffer & rb, VersionType) { - auto & data = std::any_cast(field); - size_t num; - DB::readIntBinary(num, rb); - data.resize(num); - for (auto & elem : data) + [this](std::any & field, ReadBuffer & rb, VersionType version) { + if (version >= IMPL_V2_MIN_VERSION) + { + DB::readBoolText(std::any_cast(field), rb); + } + else { - elem = std::make_shared(); - params->aggregator.recover(*elem, rb); + /// Convert old impl to new impl V2 + if (params->aggregator.expandedDataType() != ExpandedDataType::UpdatedWithRetracted) + throw Exception( + ErrorCodes::RECOVER_CHECKPOINT_FAILED, + "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint, checkpointed need retracted, " + "but " + "current not need", + version); + + size_t retracted_num; + DB::readIntBinary(retracted_num, rb); + if (retracted_num != many_data->variants.size()) + throw Exception( + ErrorCodes::RECOVER_CHECKPOINT_FAILED, + "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint but the scale of the pipeline " + "is " + "inconsistent, checkpointed={}, current={}", + version, + retracted_num, + many_data->variants.size()); + + bool has_retracted = false; + for (auto & current : many_data->variants) + { + AggregatedDataVariants retracted; + DB::deserialize(retracted, rb, params->aggregator); + has_retracted |= retracted.size() > 0; + params->aggregator.mergeRetractedInto(*current, std::move(retracted)); + } + + std::any_cast(field) = many_data->emited_version > 0 || has_retracted; /// retracted enabled } }}); } @@ -103,14 +127,17 @@ std::pair GlobalAggregatingTransform::executeOrMergeColumns(Chunk & if (params->emit_changelog) { assert(!params->only_merge); - - auto & retracted_variants = many_data->getField()[current_variant]; - auto & aggregated_variants = many_data->variants[current_variant]; - /// Blocking finalization during execution on current variant std::lock_guard lock(variants_mutex); - return params->aggregator.executeAndRetractOnBlock( - chunk.detachColumns(), 0, num_rows, *aggregated_variants, *retracted_variants, key_columns, aggregate_columns, no_more_keys); + + /// Enable retract after first finalization + auto retract_enabled = many_data->getField(); + if (retract_enabled) [[likely]] + return params->aggregator.executeAndRetractOnBlock( + chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); + else + return params->aggregator.executeOnBlock( + chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); } else return AggregatingTransform::executeOrMergeColumns(chunk, num_rows); @@ -127,8 +154,9 @@ void GlobalAggregatingTransform::finalize(const ChunkContextPtr & chunk_ctx) if (params->emit_changelog) { - auto [retracted_chunk, chunk] = AggregatingHelper::mergeAndConvertToChangelogChunk( - many_data->variants, many_data->getField(), *params); + auto [retracted_chunk, chunk] = AggregatingHelper::mergeAndConvertToChangelogChunk(many_data->variants, *params); + /// Enable retract after first finalization + many_data->getField() |= chunk.rows(); chunk.setChunkContext(chunk_ctx); setCurrentChunk(std::move(chunk), std::move(retracted_chunk)); diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h index 474824e1977..975fe4e115f 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h @@ -28,6 +28,8 @@ class GlobalAggregatingTransform final : public AggregatingTransform bool prepareFinalization(Int64 min_watermark) override; void finalize(const ChunkContextPtr & chunk_ctx) override; + + static constexpr VersionType IMPL_V2_MIN_VERSION = 3; }; } diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp index e223ee5b623..d59f40c2199 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp @@ -9,6 +9,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int UNSUPPORTED; +extern const int RECOVER_CHECKPOINT_FAILED; } namespace Streaming @@ -28,20 +29,42 @@ GlobalAggregatingTransformWithSubstream::GlobalAggregatingTransformWithSubstream SubstreamContextPtr GlobalAggregatingTransformWithSubstream::getOrCreateSubstreamContext(const SubstreamID & id) { auto substream_ctx = AggregatingTransformWithSubstream::getOrCreateSubstreamContext(id); + /// Need extra retracted data for old version impl if (params->emit_changelog && !substream_ctx->hasField()) { + bool retract_enabled = false; substream_ctx->setField( - {std::make_shared(), - /// Field serializer - [this](const std::any & field, WriteBuffer & wb, VersionType) { - const auto & data = std::any_cast(field); - params->aggregator.checkpoint(*data, wb); - }, - /// Field deserializer - [this](std::any & field, ReadBuffer & rb, VersionType) { - auto & data = std::any_cast(field); - params->aggregator.recover(*data, rb); - }}); + {retract_enabled, + /// Field serializer + [](const std::any & field, WriteBuffer & wb, VersionType version) { + assert(version >= IMPL_V2_MIN_VERSION); + DB::writeBoolText(std::any_cast(field), wb); + }, + /// Field deserializer + [substream_ctx, this](std::any & field, ReadBuffer & rb, VersionType version) { + if (version >= IMPL_V2_MIN_VERSION) + { + DB::readBoolText(std::any_cast(field), rb); + } + else + { + /// Convert old impl to new impl V2 + if (params->aggregator.expandedDataType() != ExpandedDataType::UpdatedWithRetracted) + throw Exception( + ErrorCodes::RECOVER_CHECKPOINT_FAILED, + "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint, checkpointed need retracted, " + "but " + "current not need", + version); + + AggregatedDataVariants retracted; + DB::deserialize(retracted, rb, params->aggregator); + bool has_retracted = retracted.size() > 0; + params->aggregator.mergeRetractedInto(substream_ctx->variants, std::move(retracted)); + + std::any_cast(field) = substream_ctx->emited_version > 0 || has_retracted; /// retracted enabled + } + }}); } return substream_ctx; } @@ -52,13 +75,14 @@ GlobalAggregatingTransformWithSubstream::executeOrMergeColumns(Chunk & chunk, co if (params->emit_changelog) { assert(!params->only_merge); - auto num_rows = chunk.getNumRows(); - auto & retracted_variants = substream_ctx->getField(); - auto & aggregated_variants = substream_ctx->variants; - - return params->aggregator.executeAndRetractOnBlock( - chunk.detachColumns(), 0, num_rows, aggregated_variants, *retracted_variants, key_columns, aggregate_columns, no_more_keys); + auto retract_enabled = substream_ctx->getField(); + if (retract_enabled) [[likely]] + return params->aggregator.executeAndRetractOnBlock( + chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns, no_more_keys); + else + return params->aggregator.executeOnBlock( + chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns, no_more_keys); } else return AggregatingTransformWithSubstream::executeOrMergeColumns(chunk, substream_ctx); @@ -87,8 +111,10 @@ void GlobalAggregatingTransformWithSubstream::finalize(const SubstreamContextPtr auto start = MonotonicMilliseconds::now(); if (params->emit_changelog) { - auto [retracted_chunk, chunk] - = AggregatingHelper::convertToChangelogChunk(variants, *substream_ctx->getField(), *params); + auto [retracted_chunk, chunk] = AggregatingHelper::convertToChangelogChunk(variants, *params); + /// Enable retract after first finalization + substream_ctx->getField() |= chunk.rows(); + chunk.setChunkContext(chunk_ctx); setCurrentChunk(std::move(chunk), std::move(retracted_chunk)); } diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h index 27c69ba6ac5..72bc161bf7c 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h @@ -21,6 +21,8 @@ class GlobalAggregatingTransformWithSubstream final : public AggregatingTransfor private: void finalize(const SubstreamContextPtr & substream_ctx, const ChunkContextPtr & chunk_ctx) override; + + static constexpr VersionType IMPL_V2_MIN_VERSION = 3; }; } From 9a93a4a02d94b772116a00a9be65ec813ccddddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisen=20=E6=9D=A8?= Date: Tue, 30 Jan 2024 14:45:21 +0800 Subject: [PATCH 3/5] fix unstable smoke test --- .../test_stream_smoke/0001_view_case.json | 7 +- .../0013_changelog_stream13.yaml | 4 +- .../0013_changelog_stream14.yaml | 24 ++-- .../0013_changelog_stream2.json | 114 ++++++++---------- .../0018_query_state7_view.json | 6 +- .../0030_two_level_global_aggr.yaml | 4 +- .../test_stream_smoke/0099_fixed_issues.json | 2 +- 7 files changed, 75 insertions(+), 86 deletions(-) diff --git a/tests/stream/test_stream_smoke/0001_view_case.json b/tests/stream/test_stream_smoke/0001_view_case.json index cc73303442b..b5eae39feb0 100644 --- a/tests/stream/test_stream_smoke/0001_view_case.json +++ b/tests/stream/test_stream_smoke/0001_view_case.json @@ -482,8 +482,8 @@ "steps":[ {"statements": [ {"client":"python", "query_type": "table", "query":"drop view if exists test1_mv_union"}, - {"client":"python","query_id":"300", "wait":3, "query_type": "table", "query":"create materialized view test1_mv_union as (select id, sum(value) as sum_value from test1_mv group by id limit 4 union select id, sum(value) as sum_value from test1_mv group by id limit 4)"}, - {"client":"python", "query_id":"301", "query_type": "stream","depends_on_stream":"test1_mv_union","wait":2,"query_end_timer":5,"drop_view":"test1_mv_union", "drop_view_wait":1, "query":"select id, sum_value from test1_mv_union settings seek_to='earliest'"} + {"client":"python", "wait":3, "query_type": "table", "query":"create materialized view test1_mv_union as (select id, sum(value) as sum_value from test1_mv group by id union select id, sum(value) as sum_value from test1_mv group by id)"}, + {"client":"python", "query_id":"301", "query_type": "stream","depends_on_stream":"test1_mv_union","wait":2, "query":"select id, sum_value from test1_mv_union settings seek_to='earliest'"} ]}, {"inputs": [ @@ -498,7 +498,8 @@ ["dev2", "ca", 76, "\"create_time\":\"2021-11-02 20:00:10\"", "2020-02-02 20:01:05"], ["dev2", "ca", 80, "\"create_time\":\"2021-11-02 20:00:01\"", "2020-02-02 20:01:03"], ["dev8", "ca", 67, "\"create_time\":\"2021-11-02 20:00:01\"", "2020-02-02 20:01:02"], - ["dev8", "ca", 77, "\"create_time\":\"2021-11-02 20:00:10\"", "2020-02-02 20:01:08"]]} + ["dev8", "ca", 77, "\"create_time\":\"2021-11-02 20:00:10\"", "2020-02-02 20:01:08"]], + "kill":301, "kill_wait":3, "drop_view":"test1_mv_union", "drop_view_wait":2} ]} ], diff --git a/tests/stream/test_stream_smoke/0013_changelog_stream13.yaml b/tests/stream/test_stream_smoke/0013_changelog_stream13.yaml index 12c2345964a..73af7a1dce0 100644 --- a/tests/stream/test_stream_smoke/0013_changelog_stream13.yaml +++ b/tests/stream/test_stream_smoke/0013_changelog_stream13.yaml @@ -368,12 +368,12 @@ tests: wait: 2 depends_on_stream: changelog_kv_13 query_id: '13108' - query: select count_distinct(val), sum_distinct(val) from changelog_kv_13; + query: select count_distinct(val), sum_distinct(val) from changelog_kv_13 emit periodic 1s; - client: python query_type: table depends_on: '13108' - wait: 3 + wait: 2 kill: '13108' kill_wait: 2 query: insert into changelog_kv_13(id, val, _tp_delta) values(2, 1, 1)(2, 1, -1)(3, 2, 1)(3, 2, -1); diff --git a/tests/stream/test_stream_smoke/0013_changelog_stream14.yaml b/tests/stream/test_stream_smoke/0013_changelog_stream14.yaml index 09912690e77..056513b5aea 100644 --- a/tests/stream/test_stream_smoke/0013_changelog_stream14.yaml +++ b/tests/stream/test_stream_smoke/0013_changelog_stream14.yaml @@ -207,34 +207,34 @@ tests: wait: 2 depends_on_stream: test_changelog_14 query_id: '15112' - query: select group_uniq_array(val), _tp_delta from test_changelog_14 emit changelog; + query: select group_uniq_array(val), _tp_delta from test_changelog_14 emit changelog periodic 1s; - client: python query_type: table depends_on: '15112' - wait: 3 + wait: 2 query: insert into test_changelog_14(id, val) values(1, 1); - client: python query_type: table - wait: 2 + wait: 1 query: insert into test_changelog_14(id, val) values(1, 2); - client: python query_type: table - wait: 2 + wait: 1 query: insert into test_changelog_14(id, val) values(2, 3); - client: python query_type: table - wait: 2 + wait: 1 query: insert into test_changelog_14(id, val) values(3, 3); - client: python query_type: table kill : '15112' kill_wait: 2 - wait: 2 + wait: 1 query: insert into test_changelog_14(id, val) values(3, 4); expected_results: - query_id: '15112' @@ -278,27 +278,27 @@ tests: - client: python query_type: table depends_on: '15113' - wait: 3 + wait: 2 query: insert into test_changelog_14(id, val) values(1, 1), (2,2); - client: python query_type: table kill: '15113' - kill_wait: 2 - wait: 3 + kill_wait: 3 + wait: 2 query: insert into test_changelog_14(id, val, _tp_delta) values(3, 3, +1), (2, 2, -1); - statements: - client: python query_type: stream query_id: 15113-1 - wait: 2 + wait: 1 terminate: manual query: recover from '15113' - client: python query_type: table - depends_on_stream: test_changelog_14 + depends_on: '15113' wait: 2 query: insert into test_changelog_14(id, val) values(4, 4), (5,5); @@ -314,7 +314,7 @@ tests: - client: python query_type: table - wait: 2 + wait: 3 query: kill query where query_id='15113-1' sync - client: python diff --git a/tests/stream/test_stream_smoke/0013_changelog_stream2.json b/tests/stream/test_stream_smoke/0013_changelog_stream2.json index 42b8e4d7a38..42c6b1ff542 100644 --- a/tests/stream/test_stream_smoke/0013_changelog_stream2.json +++ b/tests/stream/test_stream_smoke/0013_changelog_stream2.json @@ -191,7 +191,7 @@ {"client":"python", "query_type": "table", "exist":"test14_append_stream1_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream1_2 (i int, k1 int, k2 string)"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream2_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream2_2 (j int, kk1 int, kk2 string) primary key(kk1, kk2) settings mode='versioned_kv'"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream3_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream3_2 (k int, kkk1 int, kkk2 string) primary key (kkk1, kkk2) settings mode='versioned_kv'"}, - {"client":"python", "query_type": "stream", "query_id":"1444", "wait":1, "terminate":"manual", "query":"select a.i, a.k1, a.k2, b.j, b.kk1, b.kk2, c.k, c.kkk1, c.kkk2, _tp_delta from test14_append_stream1_2 as a inner all join test14_append_stream2_2 as b on a.i = b.j inner all join test14_append_stream3_2 as c on b.kk2 = c.kkk2"}, + {"client":"python", "query_type": "stream", "depends_on_stream":"test14_append_stream3_2", "query_id":"1444", "wait":1, "terminate":"manual", "query":"select a.i, a.k1, a.k2, b.j, b.kk1, b.kk2, c.k, c.kkk1, c.kkk2, _tp_delta from test14_append_stream1_2 as a inner all join test14_append_stream2_2 as b on a.i = b.j inner all join test14_append_stream3_2 as c on b.kk2 = c.kkk2"}, {"client":"python", "query_type": "table", "depends_on":"1444", "wait":1, "query": "insert into test14_append_stream3_2(k, kkk1, kkk2) values (3, 2, 'k2')"}, {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream2_2(j, kk1, kk2) values (1, 1, 'k2')"}, {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream1_2(i, k1, k2) values (1, 1, 'k')"}, @@ -221,13 +221,10 @@ "statements": [ {"client":"python", "query_type":"table", "query":"drop stream if exists test14_append_stream_2"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream_2 (id int, name string, val float, ts datetime)"}, - {"client":"python", "query_type": "stream", "query_id":"1445", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name)"}, - {"client":"python", "query_type": "table", "depends_on":"1445", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'a', 22.2 ,'2020-02-02 20:00:01')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 33.3 ,'2020-02-02 20:00:02')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'a', 44.4 ,'2020-02-02 20:00:03')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:04')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 44.4 ,'2020-02-02 20:00:05')"}, + {"client":"python", "query_type": "stream", "depends_on_stream":"test14_append_stream_2", "query_id":"1445", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name) emit periodic 1s"}, + {"client":"python", "query_type": "table", "depends_on":"1445", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00') (2, 'a', 22.2 ,'2020-02-02 20:00:01')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 33.3 ,'2020-02-02 20:00:02') (2, 'a', 44.4 ,'2020-02-02 20:00:03')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:04') (2, 'b', 44.4 ,'2020-02-02 20:00:05')"}, {"client":"python", "query_type": "table", "kill":"1445", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 33.3 ,'2020-02-02 20:00:05')"} ] } @@ -236,10 +233,10 @@ { "query_id":"1445", "expected_results":[ - [2, 11.100000381469727, 22.200000762939453, 33.30000114440918, 16.65000057220459], - [2, 33.29999923706055, 44.400001525878906, 77.70000076293945, 38.85000038146973], - [4, 22.200000762939453, 44.400001525878906, 144.3000030517578, 36.07500076293945], - [4, 33.29999923706055, 44.400001525878906, 155.4000015258789, 38.85000038146973] + [2, 11.1, 22.2, 33.3, 16.65], + [2, 33.3, 44.4, 77.7, 38.85], + [4, 22.2, 44.4, 144.3, 36.075], + [4, 33.3, 44.4, 155.4, 38.85] ] } ] @@ -254,15 +251,11 @@ "statements": [ {"client":"python", "query_type":"table", "query":"drop stream if exists test14_append_stream_2"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream_2 (id int, name string, val float, ts datetime)"}, - {"client":"python", "query_type": "stream", "query_id":"1446", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name) group by id"}, - {"client":"python", "query_type": "table", "depends_on":"1446", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06')"}, - {"client":"python", "query_type": "table", "kill":"1446", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} + {"client":"python", "query_type": "stream", "depends_on_stream":"test14_append_stream_2", "query_id":"1446", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name) group by id emit periodic 1s"}, + {"client":"python", "query_type": "table", "depends_on":"1446", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00') (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02') (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04') (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, + {"client":"python", "query_type": "table", "kill":"1446", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06') (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} ] } ], @@ -270,14 +263,14 @@ { "query_id":"1446", "expected_results":[ - [1, 33.29999923706055, 33.29999923706055, 33.29999923706055, 33.29999923706055], - [1, 11.100000381469727, 11.100000381469727, 11.100000381469727, 11.100000381469727], - [2, 22.200000762939453, 33.29999923706055, 55.5, 27.75], - [2, 11.100000381469727, 22.200000762939453, 33.30000114440918, 16.65000057220459], - [3, 11.100000381469727, 33.29999923706055, 66.60000038146973, 22.200000127156574], - [3, 11.100000381469727, 33.29999923706055, 66.60000038146973, 22.200000127156574], - [3, 11.100000381469727, 44.400001525878906, 88.80000114440918, 29.600000381469727], - [3, 11.100000381469727, 44.400001525878906, 88.80000114440918, 29.600000381469727] + [1, 33.3, 33.3, 33.3, 33.3], + [1, 11.1, 11.1, 11.1, 11.1], + [2, 22.2, 33.3, 55.5, 27.75], + [2, 11.1, 22.2, 33.3, 16.65], + [3, 11.1, 33.3, 66.6, 22.2], + [3, 11.1, 33.3, 66.6, 22.2], + [3, 11.1, 44.4, 88.8, 29.6], + [3, 11.1, 44.4, 88.8, 29.6] ] } ] @@ -292,15 +285,11 @@ "statements": [ {"client":"python", "query_type":"table", "query":"drop stream if exists test14_append_stream_2"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream_2 (id int, name string, val float, ts datetime)"}, - {"client":"python", "query_type": "stream", "query_id":"1447", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name) group by id"}, - {"client":"python", "query_type": "table", "depends_on":"1447", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06')"}, - {"client":"python", "query_type": "table", "kill":"1447", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} + {"client":"python", "query_type": "stream", "query_id":"1447", "wait":1, "terminate":"manual", "query":"select count(), min(val), max(val), sum(val), avg(val) from changelog(test14_append_stream_2, id, name) group by id emit periodic 1s"}, + {"client":"python", "query_type": "table", "depends_on":"1447", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00') (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02') (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, + {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04') (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, + {"client":"python", "query_type": "table", "kill":"1447", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06') (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} ] } ], @@ -308,14 +297,14 @@ { "query_id":"1447", "expected_results":[ - [1, 33.29999923706055, 33.29999923706055, 33.29999923706055, 33.29999923706055], - [1, 11.100000381469727, 11.100000381469727, 11.100000381469727, 11.100000381469727], - [2, 22.200000762939453, 33.29999923706055, 55.5, 27.75], - [2, 11.100000381469727, 22.200000762939453, 33.30000114440918, 16.65000057220459], - [3, 11.100000381469727, 33.29999923706055, 66.60000038146973, 22.200000127156574], - [3, 11.100000381469727, 33.29999923706055, 66.60000038146973, 22.200000127156574], - [3, 11.100000381469727, 44.400001525878906, 88.80000114440918, 29.600000381469727], - [3, 11.100000381469727, 44.400001525878906, 88.80000114440918, 29.600000381469727] + [1, 33.3, 33.3, 33.3, 33.3], + [1, 11.1, 11.1, 11.1, 11.1], + [2, 22.2, 33.3, 55.5, 27.75], + [2, 11.1, 22.2, 33.3, 16.65], + [3, 11.1, 33.3, 66.6, 22.2], + [3, 11.1, 33.3, 66.6, 22.2], + [3, 11.1, 44.4, 88.8, 29.6], + [3, 11.1, 44.4, 88.8, 29.6] ] } ] @@ -344,8 +333,8 @@ { "query_id":"1448", "expected_results":[ - [1, 3, 11.100000381469727, 44.400001525878906, 88.80000114440918, 29.600000381469727, "2020-02-02 20:00:00", "2020-02-02 20:00:05"], - [2, 2, 22.200000762939453, 44.400001525878906, 66.60000228881836, 33.30000114440918, "2020-02-02 20:00:00", "2020-02-02 20:00:05"] + [1, 3, 11.1, 44.4, 88.8, 29.6, "2020-02-02 20:00:00", "2020-02-02 20:00:05"], + [2, 2, 22.2, 44.4, 66.6, 33.3, "2020-02-02 20:00:00", "2020-02-02 20:00:05"] ] } ] @@ -360,7 +349,7 @@ "statements": [ {"client":"python", "query_type":"table", "query":"drop stream if exists test14_append_stream_2"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream_2 (id int, name string, val float, ts datetime)"}, - {"client":"python", "query_type": "stream", "query_id":"1449", "wait":1, "terminate":"manual", "query":"with subquery as (select * from changelog(test14_append_stream_2, id, name))select id, count(*), min(val), max(val) from subquery group by id"}, + {"client":"python", "query_type": "stream", "query_id":"1449", "wait":1, "terminate":"manual", "query":"with subquery as (select * from changelog(test14_append_stream_2, id, name))select id, count(*), min(val), max(val) from subquery group by id emit periodic 1s"}, {"client":"python", "query_type": "table", "depends_on":"1449", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00')"}, {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02')"}, {"client":"python", "query_type": "table", "kill":"1449", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 33.3 ,'2020-02-02 20:00:04')"} @@ -371,8 +360,9 @@ { "query_id":"1449", "expected_results":[ - [1, 2, 11.100000381469727, 22.200000762939453], - [1, 2, 22.200000762939453, 33.29999923706055] + [1, 1, 11.1, 11.1], + [1, 2, 11.1, 22.2], + [1, 2, 22.2, 33.3] ] } ] @@ -387,15 +377,11 @@ "statements": [ {"client":"python", "query_type":"table", "query":"drop stream if exists test14_append_stream_2"}, {"client":"python", "query_type": "table", "exist":"test14_append_stream_2", "exist_wait":2, "wait":1, "query":"create stream if not exists test14_append_stream_2 (id int, name string, val float, ts datetime)"}, - {"client":"python", "query_type": "stream", "query_id":"1450", "wait":1, "terminate":"manual", "query":"with subquery as (select id, count() as cnt, min(val) as min_val, max(val) as max_val from changelog(test14_append_stream_2, id, name) group by id)select count(*), sum(cnt), min(min_val), max(max_val) from subquery"}, - {"client":"python", "query_type": "table", "depends_on":"1450", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, - {"client":"python", "query_type": "table", "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06')"}, - {"client":"python", "query_type": "table", "kill":"1450", "kill_wait":3, "wait":1, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} + {"client":"python", "query_type": "stream", "depends_on_stream":"test14_append_stream_2", "query_id":"1450", "wait":1, "terminate":"manual", "query":"with subquery as (select id, count() as cnt, min(val) as min_val, max(val) as max_val from changelog(test14_append_stream_2, id, name) group by id)select count(*), sum(cnt), min(min_val), max(max_val) from subquery"}, + {"client":"python", "query_type": "table", "depends_on":"1450", "wait":2, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'a', 11.1 ,'2020-02-02 20:00:00') (2, 'a', 33.3 ,'2020-02-02 20:00:01')"}, + {"client":"python", "query_type": "table", "wait":2, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 22.2 ,'2020-02-02 20:00:02') (2, 'b', 22.2 ,'2020-02-02 20:00:03')"}, + {"client":"python", "query_type": "table", "wait":2, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'c', 33.3 ,'2020-02-02 20:00:04') (2, 'c', 11.1 ,'2020-02-02 20:00:05')"}, + {"client":"python", "query_type": "table", "kill":"1450", "kill_wait":3, "wait":2, "query": "insert into test14_append_stream_2 (id, name, val, ts) values (1, 'b', 44.4 ,'2020-02-02 20:00:06') (2, 'b', 44.4 ,'2020-02-02 20:00:07')"} ] } ], @@ -403,10 +389,10 @@ { "query_id":"1450", "expected_results":[ - [2, 2, 11.100000381469727, 33.29999923706055], - [2, 4, 11.100000381469727, 33.29999923706055], - [2, 6, 11.100000381469727, 33.29999923706055], - [2, 6, 11.100000381469727, 44.400001525878906] + [2, 2, 11.1, 33.3], + [2, 4, 11.1, 33.3], + [2, 6, 11.1, 33.3], + [2, 6, 11.1, 44.4] ] } ] diff --git a/tests/stream/test_stream_smoke/0018_query_state7_view.json b/tests/stream/test_stream_smoke/0018_query_state7_view.json index 2803f5c8d77..79a53ddd044 100644 --- a/tests/stream/test_stream_smoke/0018_query_state7_view.json +++ b/tests/stream/test_stream_smoke/0018_query_state7_view.json @@ -548,7 +548,7 @@ {"client":"python", "query_type": "table", "wait":1, "query":"drop view if exists test19_state_mv7"}, {"client":"python", "query_type": "table", "wait":1, "query":"drop stream if exists test19_state_stream7"}, {"client":"python", "query_type": "table", "exist":"test19_state_stream7", "exist_wait":2, "wait":1, "query":"create stream test19_state_stream7 (id string, location string, value float, timestamp datetime64(3) default now64(3))"}, - {"client":"python", "query_type": "table", "exist":"test19_state_mv7", "exist_wait":2, "wait":1, "query":"create materialized view test19_state_mv7 as (select id, sum(value) as sum_value from test19_state_stream7 group by id limit 4 union select id, sum(value) as sum_value from test19_state_stream7 group by id limit 4)"}, + {"client":"python", "query_type": "table", "exist":"test19_state_mv7", "exist_wait":2, "wait":1, "query":"create materialized view test19_state_mv7 as (select id, sum(value) as sum_value from test19_state_stream7 group by id union select id, sum(value) as sum_value from test19_state_stream7 group by id)"}, {"client":"python", "query_type": "stream", "query_id":"19177", "wait":1, "terminate":"manual", "query":"subscribe to select id, sum_value from test19_state_mv7 settings checkpoint_interval=1"}, {"client":"python", "query_type": "table", "depends_on":"19177", "kill":"19177", "kill_wait":3, "wait":1, "query": "insert into test19_state_stream7(id, location, value, timestamp) values ('dev1', 'ca', 57.3, '2020-02-02 20:00:00')('dev2', 'ca', 58.3, '2020-02-02 20:00:03')"} ] @@ -579,8 +579,10 @@ "expected_results":[ ["dev1", "57.3"], ["dev2", "127.3"], + ["dev4", "67"], ["dev1", "57.3"], - ["dev2", "127.3"] + ["dev2", "127.3"], + ["dev4", "67"] ] } ] diff --git a/tests/stream/test_stream_smoke/0030_two_level_global_aggr.yaml b/tests/stream/test_stream_smoke/0030_two_level_global_aggr.yaml index 09a1c4d9240..9c5657d1111 100644 --- a/tests/stream/test_stream_smoke/0030_two_level_global_aggr.yaml +++ b/tests/stream/test_stream_smoke/0030_two_level_global_aggr.yaml @@ -131,7 +131,7 @@ tests: query_id: 3100 depends_on_stream: test_31_multishards_stream query: | - subscribe to with cte as (select i as key, count() from test_31_multishards_stream where _tp_time > earliest_ts() group by key settings group_by_two_level_threshold=50) select count() from cte settings checkpoint_interval=2, emit_during_backfill=false; + subscribe to with cte as (select i as key, count() from test_31_multishards_stream where _tp_time > earliest_ts() group by key settings group_by_two_level_threshold=50) select count() from cte settings checkpoint_interval=2; - client: python query_type: table @@ -207,7 +207,7 @@ tests: depends_on_stream: test_31_multishards_stream wait: 1 query: | - subscribe to with cte as (select i as key, count() from changelog(test_31_multishards_stream, i) where _tp_time > earliest_ts() group by key emit changelog settings group_by_two_level_threshold=50) select count() from cte settings checkpoint_interval=2, emit_during_backfill=false; + subscribe to with cte as (select i as key, count() from changelog(test_31_multishards_stream, i) where _tp_time > earliest_ts() group by key emit changelog settings group_by_two_level_threshold=50) select count() from cte settings checkpoint_interval=2; - client: python query_type: table diff --git a/tests/stream/test_stream_smoke/0099_fixed_issues.json b/tests/stream/test_stream_smoke/0099_fixed_issues.json index 1422c6d81a7..3f298ec5413 100644 --- a/tests/stream/test_stream_smoke/0099_fixed_issues.json +++ b/tests/stream/test_stream_smoke/0099_fixed_issues.json @@ -640,7 +640,7 @@ {"client":"python", "query_type": "table", "query": "drop stream if exists v_12183487"}, {"client":"python", "query_type": "table", "wait":2, "query": "create stream v_12183487(id int, val int) primary key id settings shards=3;"}, {"client":"python", "query_type": "stream", "wait":2, "depends_on_stream":"v_12183487", "query_id":"12183487213", "query":"subscribe to select sum_distinct_streaming(val), sum(val), count_distinct(val), count(val) from v_12183487 settings checkpoint_interval=1;"}, - {"client":"python", "query_type": "table", "depends_on_stream": "v_12183487", "kill":"12183487213", "kill_wait":2, "wait": 3, "query": "insert into v_12183487(id, val) values(3, 30);"} + {"client":"python", "query_type": "table", "depends_on": "12183487213", "kill":"12183487213", "kill_wait":3, "wait": 2, "query": "insert into v_12183487(id, val) values(3, 30);"} ] }, { From 22a14c5608330ef0bcb02e9b058328216f3b1815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisen=20=E6=9D=A8?= Date: Sat, 3 Feb 2024 01:58:12 +0800 Subject: [PATCH 4/5] fix comments * remove compile aggregate functions for streaming query now * remove no_more_keys * remove overflow_rows * move out refactor code for retract impl --- cmake/autogenerated_versions.txt | 2 +- src/Common/HashMapsTemplate.h | 4 +- src/Common/HashTable/TimeBucketHashMap.h | 4 +- src/Common/HashTable/TimeBucketHashTable.h | 42 +- src/Common/HashTable/TwoLevelHashMap.h | 4 +- src/Common/HashTable/TwoLevelHashTable.h | 37 +- src/Common/HashTable/TwoLevelStringHashMap.h | 4 +- .../HashTable/TwoLevelStringHashTable.h | 50 +- src/Interpreters/InterpreterSelectQuery.cpp | 3 +- src/Interpreters/Streaming/AggregateDataEx.h | 124 -- .../Streaming/AggregationUtils.cpp | 10 +- src/Interpreters/Streaming/Aggregator.cpp | 1848 +++++------------ src/Interpreters/Streaming/Aggregator.h | 202 +- .../Streaming/UpdatesTrackingData.h | 105 + .../Streaming/AggregatingHelper.cpp | 107 +- .../Transforms/Streaming/AggregatingHelper.h | 11 +- .../Streaming/AggregatingTransform.cpp | 21 +- .../AggregatingTransformWithSubstream.cpp | 12 +- .../Streaming/GlobalAggregatingTransform.cpp | 84 +- .../Streaming/GlobalAggregatingTransform.h | 2 - ...lobalAggregatingTransformWithSubstream.cpp | 58 +- .../GlobalAggregatingTransformWithSubstream.h | 2 - 22 files changed, 917 insertions(+), 1819 deletions(-) delete mode 100644 src/Interpreters/Streaming/AggregateDataEx.h create mode 100644 src/Interpreters/Streaming/UpdatesTrackingData.h diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 2f61abb85dc..29ccf0cc41c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,7 +2,7 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 3) +SET(VERSION_REVISION 2) SET(VERSION_MAJOR 1) SET(VERSION_MINOR 4) SET(VERSION_PATCH 1) diff --git a/src/Common/HashMapsTemplate.h b/src/Common/HashMapsTemplate.h index 53df5ecd69f..09e1a031935 100644 --- a/src/Common/HashMapsTemplate.h +++ b/src/Common/HashMapsTemplate.h @@ -70,14 +70,14 @@ template void serializeTwoLevelHashMap(const Map & map, MappedSerializer && mapped_serializer, WriteBuffer & wb) { serializeHashMap(map, std::move(mapped_serializer), wb); - map.writeBucketUpdatedFlags(wb); + map.writeUpdatedBuckets(wb); } template void deserializeTwoLevelHashMap(Map & map, MappedDeserializer && mapped_deserializer, Arena & pool, ReadBuffer & rb) { deserializeHashMap(map, std::move(mapped_deserializer), pool, rb); - map.readBucketUpdatedFlags(rb); /// recover buckets updated status + map.readUpdatedBuckets(rb); /// recover buckets updated status } /// HashMapsTemplate is a taken from HashJoin class and make it standalone diff --git a/src/Common/HashTable/TimeBucketHashMap.h b/src/Common/HashTable/TimeBucketHashMap.h index 685ede30af4..827c396f8ef 100644 --- a/src/Common/HashTable/TimeBucketHashMap.h +++ b/src/Common/HashTable/TimeBucketHashMap.h @@ -38,11 +38,11 @@ class TimeBucketHashMapTable { for (auto & p : this->impls) { - if (this->isUpdatedBucket(p.first)) + if (this->isBucketUpdated(p.first)) { p.second.forEachValue(func); if (reset_updated) - this->resetUpdated(p.first); + this->resetUpdatedBucket(p.first); } } } diff --git a/src/Common/HashTable/TimeBucketHashTable.h b/src/Common/HashTable/TimeBucketHashTable.h index 9bff2271aa3..023a10ba9de 100644 --- a/src/Common/HashTable/TimeBucketHashTable.h +++ b/src/Common/HashTable/TimeBucketHashTable.h @@ -110,7 +110,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty /// FIXME, choose a better perf data structure /// Usually we don't have too many time buckets std::map impls; - std::unordered_map bucket_updated_flags; + std::unordered_map updated_buckets; Impl sentinel; TimeBucketHashTable() { } @@ -265,7 +265,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty { auto window = windowKey(key_holder); impls[window].emplace(key_holder, it, inserted, hash_value); - bucket_updated_flags[window] = true; /// updated + updated_buckets[window] = true; /// updated } LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) @@ -292,7 +292,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty { DB::writeIntBinary(p.first); p.second.write(wb); - DB::writeBoolText(bucket_updated_flags[p.first], wb); + DB::writeBinary(updated_buckets[p.first], wb); } } @@ -317,7 +317,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty DB::writeChar('<', wb); p.second.writeText(wb); DB::writeChar(',', wb); - DB::writeBoolText(bucket_updated_flags[p.first], wb); + DB::writeBoolText(updated_buckets[p.first], wb); DB::writeChar('>', wb); } DB::writeChar(END_BUCKET_MARKER, wb); @@ -336,7 +336,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty assert(key != 0); assert(!impls.contains(key)); impls[key].read(rb); - DB::readBoolText(bucket_updated_flags[key], rb); + DB::readBinary(updated_buckets[key], rb); } } @@ -363,7 +363,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty DB::assertChar('<', rb); impls[key].readText(rb); DB::assertChar(',', rb); - DB::readBoolText(bucket_updated_flags[key], rb); + DB::readBoolText(updated_buckets[key], rb); DB::assertChar('>', rb); } DB::assertChar(END_BUCKET_MARKER, rb); @@ -417,7 +417,7 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty last_removed_watermark = it->first; ++removed; - bucket_updated_flags.erase(it->first); + updated_buckets.erase(it->first); it = impls.erase(it); } else @@ -455,44 +455,44 @@ class TimeBucketHashTable : private boost::noncopyable, protected Hash /// empty return buckets; } - bool isUpdatedBucket(Int64 bucket_) const + bool isBucketUpdated(Int64 bucket_) const { - auto it = bucket_updated_flags.find(bucket_); - if (it != bucket_updated_flags.end()) + auto it = updated_buckets.find(bucket_); + if (it != updated_buckets.end()) return it->second; return false; } - void resetUpdated(Int64 bucket_) + void resetUpdatedBucket(Int64 bucket_) { - auto it = bucket_updated_flags.find(bucket_); - if (it != bucket_updated_flags.end()) + auto it = updated_buckets.find(bucket_); + if (it != updated_buckets.end()) it->second = false; } - void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + void writeUpdatedBuckets(DB::WriteBuffer & wb) const { - DB::writeVarUInt(bucket_updated_flags.size(), wb); - for (const auto & [bucket, updated] : bucket_updated_flags) + DB::writeVarUInt(updated_buckets.size(), wb); + for (const auto & [bucket, updated] : updated_buckets) { DB::writeIntBinary(bucket, wb); - DB::writeBoolText(updated, wb); + DB::writeBinary(updated, wb); } } - void readBucketUpdatedFlags(DB::ReadBuffer & rb) + void readUpdatedBuckets(DB::ReadBuffer & rb) { size_t size = 0; DB::readVarUInt(size, rb); - bucket_updated_flags.clear(); + updated_buckets.clear(); Int64 bucket = 0; bool updated = false; for (size_t i = 0; i < size; ++i) { DB::readIntBinary(bucket, rb); - DB::readBoolText(updated, rb); - bucket_updated_flags.emplace(bucket, updated); + DB::readBinary(updated, rb); + updated_buckets.emplace(bucket, updated); } } }; diff --git a/src/Common/HashTable/TwoLevelHashMap.h b/src/Common/HashTable/TwoLevelHashMap.h index 5c87d5e6eb0..26008468974 100644 --- a/src/Common/HashTable/TwoLevelHashMap.h +++ b/src/Common/HashTable/TwoLevelHashMap.h @@ -43,11 +43,11 @@ class TwoLevelHashMapTable : public TwoLevelHashTableNUM_BUCKETS; ++i) { - if (this->isUpdatedBucket(i)) + if (this->isBucketUpdated(i)) { this->impls[i].forEachValue(func); if (reset_updated) - this->resetUpdated(i); + this->resetUpdatedBucket(i); } } } diff --git a/src/Common/HashTable/TwoLevelHashTable.h b/src/Common/HashTable/TwoLevelHashTable.h index 4dd13e6e7e4..46d9e3ad637 100644 --- a/src/Common/HashTable/TwoLevelHashTable.h +++ b/src/Common/HashTable/TwoLevelHashTable.h @@ -90,7 +90,7 @@ class TwoLevelHashTable : using ConstLookupResult = typename Impl::ConstLookupResult; Impl impls[NUM_BUCKETS]; - bool bucket_updated_flags[NUM_BUCKETS] = {false}; + bool updated_buckets[NUM_BUCKETS] = {false}; TwoLevelHashTable() = default; @@ -120,7 +120,7 @@ class TwoLevelHashTable : size_t hash_value = cell->getHash(src); size_t buck = getBucketFromHash(hash_value); impls[buck].insertUniqueNonZero(cell, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } } @@ -273,7 +273,7 @@ class TwoLevelHashTable : { size_t buck = getBucketFromHash(hash_value); impls[buck].emplace(key_holder, it, inserted, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) @@ -297,7 +297,7 @@ class TwoLevelHashTable : for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { impls[i].write(wb); - DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeBinary(updated_buckets[i], wb); } } @@ -307,11 +307,12 @@ class TwoLevelHashTable : { if (i != 0) DB::writeChar(',', wb); + /// DB::writeChar('<', wb); impls[i].writeText(wb); DB::writeChar(',', wb); - DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeBoolText(updated_buckets[i], wb); DB::writeChar('>', wb); } } @@ -321,7 +322,7 @@ class TwoLevelHashTable : for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { impls[i].read(rb); - DB::readBoolText(bucket_updated_flags[i], rb); + DB::readBinary(updated_buckets[i], rb); } } @@ -331,12 +332,12 @@ class TwoLevelHashTable : { if (i != 0) DB::assertChar(',', rb); - + /// DB::assertChar('<', rb); impls[i].readText(rb); DB::assertChar(',', rb); - DB::readBoolText(bucket_updated_flags[i], rb); + DB::readBoolText(updated_buckets[i], rb); DB::assertChar('>', rb); } } @@ -386,30 +387,30 @@ class TwoLevelHashTable : return bucket_ids; } - bool isUpdatedBucket(Int64 bucket_) const + bool isBucketUpdated(Int64 bucket_) const { - return bucket_updated_flags[bucket_]; + return updated_buckets[bucket_]; } - void resetUpdated(Int64 bucket_) + void resetUpdatedBucket(Int64 bucket_) { - bucket_updated_flags[bucket_] = false; + updated_buckets[bucket_] = false; } - void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + void writeUpdatedBuckets(DB::WriteBuffer & wb) const { DB::writeVarUInt(NUM_BUCKETS, wb); - for (const auto & elem : bucket_updated_flags) - DB::writeBoolText(elem, wb); + for (const auto & elem : updated_buckets) + DB::writeBinary(elem, wb); } - void readBucketUpdatedFlags(DB::ReadBuffer & rb) + void readUpdatedBuckets(DB::ReadBuffer & rb) { size_t size = 0; DB::readVarUInt(size, rb); assert(size == NUM_BUCKETS); - for (auto & elem : bucket_updated_flags) - DB::readBoolText(elem, rb); + for (auto & elem : updated_buckets) + DB::readBinary(elem, rb); } /// proton : ends }; diff --git a/src/Common/HashTable/TwoLevelStringHashMap.h b/src/Common/HashTable/TwoLevelStringHashMap.h index 9f2c5ba00d3..3501861a3ee 100644 --- a/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/src/Common/HashTable/TwoLevelStringHashMap.h @@ -34,11 +34,11 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTableNUM_BUCKETS; ++i) { - if (this->isUpdatedBucket(i)) + if (this->isBucketUpdated(i)) { this->impls[i].forEachValue(func); if (reset_updated) - this->resetUpdated(i); + this->resetUpdatedBucket(i); } } } diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index e74ae676143..e1a3910ecf4 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -39,7 +39,7 @@ class TwoLevelStringHashTable : private boost::noncopyable using ConstLookupResult = typename Impl::ConstLookupResult; Impl impls[NUM_BUCKETS]; - bool bucket_updated_flags[NUM_BUCKETS] = {false}; + bool updated_buckets[NUM_BUCKETS] = {false}; TwoLevelStringHashTable() {} @@ -54,28 +54,28 @@ class TwoLevelStringHashTable : private boost::noncopyable size_t hash_value = v.getHash(src.m1); size_t buck = getBucketFromHash(hash_value); impls[buck].m1.insertUniqueNonZero(&v, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } for (auto & v : src.m2) { size_t hash_value = v.getHash(src.m2); size_t buck = getBucketFromHash(hash_value); impls[buck].m2.insertUniqueNonZero(&v, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } for (auto & v : src.m3) { size_t hash_value = v.getHash(src.m3); size_t buck = getBucketFromHash(hash_value); impls[buck].m3.insertUniqueNonZero(&v, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } for (auto & v : src.ms) { size_t hash_value = v.getHash(src.ms); size_t buck = getBucketFromHash(hash_value); impls[buck].ms.insertUniqueNonZero(&v, hash_value); - bucket_updated_flags[buck] = true; + updated_buckets[buck] = true; } } @@ -90,7 +90,7 @@ class TwoLevelStringHashTable : private boost::noncopyable if (sz == 0) { if constexpr (std::is_same_v) - self.bucket_updated_flags[0] = true; + self.updated_buckets[0] = true; keyHolderDiscardKey(key_holder); return func(self.impls[0].m0, VoidKey{}, 0); @@ -103,7 +103,7 @@ class TwoLevelStringHashTable : private boost::noncopyable auto res = hash(x); auto buck = getBucketFromHash(res); if constexpr (std::is_same_v) - self.bucket_updated_flags[buck] = true; + self.updated_buckets[buck] = true; return func(self.impls[buck].ms, std::forward(key_holder), res); @@ -138,7 +138,7 @@ class TwoLevelStringHashTable : private boost::noncopyable auto res = hash(k8); auto buck = getBucketFromHash(res); if constexpr (std::is_same_v) - self.bucket_updated_flags[buck] = true; + self.updated_buckets[buck] = true; keyHolderDiscardKey(key_holder); return func(self.impls[buck].m1, k8, res); @@ -152,7 +152,7 @@ class TwoLevelStringHashTable : private boost::noncopyable auto res = hash(k16); auto buck = getBucketFromHash(res); if constexpr (std::is_same_v) - self.bucket_updated_flags[buck] = true; + self.updated_buckets[buck] = true; keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -166,7 +166,7 @@ class TwoLevelStringHashTable : private boost::noncopyable auto res = hash(k24); auto buck = getBucketFromHash(res); if constexpr (std::is_same_v) - self.bucket_updated_flags[buck] = true; + self.updated_buckets[buck] = true; keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); @@ -176,7 +176,7 @@ class TwoLevelStringHashTable : private boost::noncopyable auto res = hash(x); auto buck = getBucketFromHash(res); if constexpr (std::is_same_v) - self.bucket_updated_flags[buck] = true; + self.updated_buckets[buck] = true; return func(self.impls[buck].ms, std::forward(key_holder), res); } @@ -204,7 +204,7 @@ class TwoLevelStringHashTable : private boost::noncopyable for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { impls[i].write(wb); - DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeBinary(updated_buckets[i], wb); } } @@ -218,7 +218,7 @@ class TwoLevelStringHashTable : private boost::noncopyable DB::writeChar('<', wb); impls[i].writeText(wb); DB::writeChar(',', wb); - DB::writeBoolText(bucket_updated_flags[i], wb); + DB::writeBoolText(updated_buckets[i], wb); DB::writeChar('>', wb); } } @@ -228,7 +228,7 @@ class TwoLevelStringHashTable : private boost::noncopyable for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { impls[i].read(rb); - DB::readBoolText(bucket_updated_flags[i], rb); + DB::readBinary(updated_buckets[i], rb); } } @@ -243,7 +243,7 @@ class TwoLevelStringHashTable : private boost::noncopyable DB::assertChar('<', rb); impls[i].readText(rb); DB::assertChar(',', rb); - DB::readBoolText(bucket_updated_flags[i], rb); + DB::readBoolText(updated_buckets[i], rb); DB::assertChar('>', rb); } } @@ -293,29 +293,29 @@ class TwoLevelStringHashTable : private boost::noncopyable return bucket_ids; } - bool isUpdatedBucket(Int64 bucket_) const + bool isBucketUpdated(Int64 bucket_) const { - return bucket_updated_flags[bucket_]; + return updated_buckets[bucket_]; } - void resetUpdated(Int64 bucket_) + void resetUpdatedBucket(Int64 bucket_) { - bucket_updated_flags[bucket_] = false; + updated_buckets[bucket_] = false; } - void writeBucketUpdatedFlags(DB::WriteBuffer & wb) const + void writeUpdatedBuckets(DB::WriteBuffer & wb) const { DB::writeVarUInt(NUM_BUCKETS, wb); - for (const auto & elem : bucket_updated_flags) - DB::writeBoolText(elem, wb); + for (const auto & elem : updated_buckets) + DB::writeBinary(elem, wb); } - void readBucketUpdatedFlags(DB::ReadBuffer & rb) + void readUpdatedBuckets(DB::ReadBuffer & rb) { size_t size = 0; DB::readVarUInt(size, rb); assert(size == NUM_BUCKETS); - for (auto & elem : bucket_updated_flags) - DB::readBoolText(elem, rb); + for (auto & elem : updated_buckets) + DB::readBinary(elem, rb); } }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 66d99a2c0fb..ae40014e4ba 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3261,8 +3261,7 @@ void InterpreterSelectQuery::executeStreamingAggregation( streaming_group_by, delta_col_pos, window_keys_num, - query_info.streaming_window_params, - data_stream_semantic_pair.isChangelogOutput()); + query_info.streaming_window_params); auto merge_threads = max_streams; auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads diff --git a/src/Interpreters/Streaming/AggregateDataEx.h b/src/Interpreters/Streaming/AggregateDataEx.h deleted file mode 100644 index 2b969018a7d..00000000000 --- a/src/Interpreters/Streaming/AggregateDataEx.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ -using AggregateDataPtr = char *; -using ConstAggregateDataPtr = const char *; - -namespace Streaming -{ -SERDE struct UpdatedDataEx -{ - static ALWAYS_INLINE UpdatedDataEx & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } - static ALWAYS_INLINE const UpdatedDataEx & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } - - static ALWAYS_INLINE bool isEmpty(ConstAggregateDataPtr __restrict place) { return data(place).final_count == 0; } - static ALWAYS_INLINE bool isUpdated(ConstAggregateDataPtr __restrict place) { return data(place).updated_since_last_finalization; } - static ALWAYS_INLINE void setUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = true; } - static ALWAYS_INLINE void resetUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = false; } - - static void addBatch(size_t row_begin, size_t row_end, AggregateDataPtr * places, const IColumn * delta_col) - { - if (delta_col == nullptr) - { - for (size_t i = row_begin; i < row_end; ++i) - if (places[i]) - data(places[i]).add(); - } - else - { - const auto & delta_flags = assert_cast(*delta_col).getData(); - for (size_t i = row_begin; i < row_end; ++i) - { - if (places[i]) - { - if (delta_flags[i] >= 0) - data(places[i]).add(); - else - data(places[i]).negate(); - } - } - } - } - - static void addBatchSinglePlace(size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn * delta_col) - { - if (!place) - return; - - auto & metadata = data(place); - if (delta_col == nullptr) - metadata.final_count += row_end - row_begin; - else - { - const auto & delta_flags = assert_cast(*delta_col).getData(); - metadata.final_count = std::accumulate(delta_flags.begin(), delta_flags.end(), metadata.final_count); - } - - metadata.updated_since_last_finalization = true; - } - - static void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & wb) - { - const auto & data_ex = data(place); - writeIntBinary(data_ex.final_count, wb); - writeBoolText(data_ex.updated_since_last_finalization, wb); - } - - static void deserialize(AggregateDataPtr __restrict place, ReadBuffer & rb) - { - auto & data_ex = data(place); - readIntBinary(data_ex.final_count, rb); - readBoolText(data_ex.updated_since_last_finalization, rb); - } - - ALWAYS_INLINE void add() - { - ++final_count; - updated_since_last_finalization = true; - } - - ALWAYS_INLINE void negate() - { - --final_count; - updated_since_last_finalization = true; - } - - /// Used for tracking the group is empty or not - UInt32 final_count = 0; - - /// Used for tracking the group is updated or not - bool updated_since_last_finalization = true; -}; - -SERDE struct RetractedDataEx : UpdatedDataEx -{ - static ALWAYS_INLINE AggregateDataPtr & getRetracted(AggregateDataPtr & place) { return reinterpret_cast(place)->retracted_data; } - static ALWAYS_INLINE bool hasRetracted(ConstAggregateDataPtr __restrict place) { return reinterpret_cast(place)->retracted_data; } - - template - static ALWAYS_INLINE AggregateDataPtr & getData(AggregateDataPtr & place) - { - if constexpr (use_retracted_data) - return getRetracted(place); - else - return place; - } - - /// Used for tracking group changes - AggregateDataPtr retracted_data = nullptr; -}; - -enum class ExpandedDataType : uint8_t -{ - None = 0, - Updated = 1, /// Allow tracking group is empty or updated - UpdatedWithRetracted = 2, /// Allow tracking group is empty or updated and changes -}; - -} -} diff --git a/src/Interpreters/Streaming/AggregationUtils.cpp b/src/Interpreters/Streaming/AggregationUtils.cpp index b40851b65e6..6f79b641e01 100644 --- a/src/Interpreters/Streaming/AggregationUtils.cpp +++ b/src/Interpreters/Streaming/AggregationUtils.cpp @@ -39,6 +39,12 @@ OutputBlockColumns prepareOutputBlockColumns( /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + /// proton: starts + column_aggregate_func.setKeepState(params.keep_state); + /// proton: ends + + /// Add arenas to ColumnAggregateFunction, which can result in moving ownership to it if reference count + /// get dropped in other places for (auto & pool : aggregates_pools) column_aggregate_func.addArena(pool); @@ -52,10 +58,10 @@ OutputBlockColumns prepareOutputBlockColumns( if (aggregate_functions[i]->isState()) { - auto callback = [&](IColumn & subcolumn) + auto callback = [&](IColumn & column) { /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - if (auto * column_aggregate_func = typeid_cast(&subcolumn)) + if (auto * column_aggregate_func = typeid_cast(&column)) for (auto & pool : aggregates_pools) column_aggregate_func->addArena(pool); }; diff --git a/src/Interpreters/Streaming/Aggregator.cpp b/src/Interpreters/Streaming/Aggregator.cpp index f1937f482d7..d82dc8b1f8e 100644 --- a/src/Interpreters/Streaming/Aggregator.cpp +++ b/src/Interpreters/Streaming/Aggregator.cpp @@ -80,9 +80,6 @@ inline void initDataVariants( result.keys_size = params.keys_size; result.key_sizes = key_sizes; result.init(method_chosen); - - if (params.tracking_changes) - result.resetRetractedPool(); } Columns materializeKeyColumns(Columns & columns, ColumnRawPtrs & key_columns, const Aggregator::Params & params, bool is_low_cardinality) @@ -108,27 +105,11 @@ Columns materializeKeyColumns(Columns & columns, ColumnRawPtrs & key_columns, co return materialized_columns; } -Arena * getArena(AggregatedDataVariants & variants, AggregateStateType type) -{ - if (type == AggregateStateType::OnlyRetracted) - return variants.retracted_pool.get(); - else - return variants.aggregates_pool; -} - template -BlocksList concurrentBucketConvert(ThreadPool * thread_pool, const std::vector & buckets, Arena * arena, Arenas & pools, BucketConverter && bucket_converter) +BlocksList convertBucketsInParallel(ThreadPool * thread_pool, const std::vector & buckets, BucketConverter && bucket_converter) { std::atomic next_bucket_idx_to_merge = 0; - auto converter = [&](Arena * pool, ThreadGroupStatusPtr thread_group, const std::atomic_flag * cancelled) { - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - ); - - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - + auto converter = [&](Arena * pool, const std::atomic_flag * cancelled) { BlocksList blocks; while (true) { @@ -147,9 +128,14 @@ BlocksList concurrentBucketConvert(ThreadPool * thread_pool, const std::vectorgetMaxThreads(), buckets.size()) : 1; if (num_threads <= 1) - return converter(arena, nullptr, nullptr); + { + auto arena = std::make_shared(); + return converter(arena.get(), nullptr); + } /// Process in parallel + Arenas pools; + pools.reserve(num_threads); for (size_t i = pools.size(); i < num_threads; ++i) pools.push_back(std::make_shared()); @@ -161,9 +147,13 @@ BlocksList concurrentBucketConvert(ThreadPool * thread_pool, const std::vectorscheduleOrThrowOnError([&pools, thread_id, group = CurrentThread::getGroup(), results, &converter, &cancelled] { - (*results)[thread_id] = converter(pools[thread_id].get(), group, &cancelled); + CurrentThread::attachToIfDetached(group); + SCOPE_EXIT_SAFE( CurrentThread::detachQueryIfNotDetached() ); + (*results)[thread_id] = converter(pools[thread_id].get(), &cancelled); }); + } thread_pool->wait(); } @@ -212,8 +202,7 @@ void AggregatedDataVariants::reset() invalidate(); /// Reset pool - resetAggregatesPool(); - retracted_pool.reset(); + resetAndCreateAggregatesPools(); } void AggregatedDataVariants::convertToTwoLevel() @@ -241,12 +230,12 @@ void AggregatedDataVariants::convertToTwoLevel() void AggregatedDataVariants::serialize(WriteBuffer & wb, const Aggregator & aggregator_) const { + /// We cannot use itself `aggregator` since if there is no data, it is nullptr. aggregator_.checkpoint(*this, wb); } void AggregatedDataVariants::deserialize(ReadBuffer & rb, const Aggregator & aggregator_) { - aggregator = &aggregator_; aggregator_.recover(*this, rb); } @@ -375,6 +364,9 @@ void Aggregator::Params::explain(JSONBuilder::JSONMap & map) const Aggregator::Aggregator(const Params & params_) : params(params_), log(&Poco::Logger::get("StreamingAggregator")) { + if (params.overflow_row) [[unlikely]] + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); + /// Use query-level memory tracker if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) if (auto * memory_tracker = memory_tracker_child->getParent()) @@ -389,21 +381,14 @@ Aggregator::Aggregator(const Params & params_) : params(params_), log(&Poco::Lo total_size_of_aggregate_states = 0; all_aggregates_has_trivial_destructor = true; - if (params.tracking_changes) - { - total_size_of_aggregate_states = sizeof(RetractedDataEx); - align_aggregate_states = alignof(RetractedDataEx); - expanded_data_type = ExpandedDataType::UpdatedWithRetracted; - } - else if (params.tracking_updated) + if (trackingUpdatesType() == TrackingUpdatesType::Updates) { - total_size_of_aggregate_states = sizeof(UpdatedDataEx); - align_aggregate_states = alignof(UpdatedDataEx); - expanded_data_type = ExpandedDataType::Updated; + total_size_of_aggregate_states = sizeof(TrackingUpdates); + align_aggregate_states = alignof(TrackingUpdates); } // aggregate_states will be aligned as below: - // |<-- [ExpandedDataEx] -->||<-- state_1 -->|<-- pad_1 -->|<-- state_2 -->|<-- pad_2 -->| ..... + // |<-- [UpdatesTrackingData] -->||<-- state_1 -->|<-- pad_1 -->|<-- state_2 -->|<-- pad_2 -->| ..... // // pad_N will be used to match alignment requirement for each next state. // The address of state_1 is aligned based on maximum alignment requirements in states @@ -438,12 +423,13 @@ Aggregator::Aggregator(const Params & params_) : params(params_), log(&Poco::Lo aggregation_state_cache = AggregatedDataVariants::createCache(method_chosen, cache_settings); #if USE_EMBEDDED_COMPILER - compileAggregateFunctionsIfNeeded(); + /// TODO: Support compile aggregate functions + // compileAggregateFunctionsIfNeeded(); #endif } #if USE_EMBEDDED_COMPILER - +/* void Aggregator::compileAggregateFunctionsIfNeeded() { static std::unordered_map aggregate_functions_description_to_count; @@ -518,7 +504,7 @@ void Aggregator::compileAggregateFunctionsIfNeeded() } } } - +*/ #endif AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() @@ -770,46 +756,18 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethodTimeBucketTwoLev } /// proton: ends -template -void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const +void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, bool prefix_with_updates_tracking_state) const { - /// Initialize reserved UpdatedDataEx + /// Initialize reserved TrackingUpdates assert(aggregate_data); - if constexpr (!skip_expanded_data) - { - if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) - new (aggregate_data) RetractedDataEx(); - else if (expanded_data_type == ExpandedDataType::Updated) - new (aggregate_data) UpdatedDataEx(); - } - - if constexpr (use_compiled_functions) + if (prefix_with_updates_tracking_state) { - assert(compiled_aggregate_functions_holder); - const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_aggregate_functions.create_aggregate_states_function(aggregate_data); - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif + if (trackingUpdatesType() == TrackingUpdatesType::Updates) + new (aggregate_data) TrackingUpdates(); } for (size_t j = 0; j < params.aggregates_size; ++j) { - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[j]) - continue; - try { /** An exception may occur if there is a shortage of memory. @@ -821,13 +779,7 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const catch (...) { for (size_t rollback_j = 0; rollback_j < j; ++rollback_j) - { - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[j]) - continue; - aggregate_functions[rollback_j]->destroy(aggregate_data + offsets_of_aggregate_states[rollback_j]); - } throw; } @@ -839,13 +791,11 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const size_t row_begin, size_t row_end, ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions, - bool no_more_keys, - AggregateDataPtr overflow_row) const + AggregateFunctionInstruction * aggregate_instructions) const { #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ - return executeImpl(*result.NAME, result.aggregates_pool, row_begin, row_end, key_columns, aggregate_instructions, no_more_keys, overflow_row); + return executeImplBatch(*result.NAME, result.aggregates_pool, row_begin, row_end, key_columns, aggregate_instructions); if (false) {} // NOLINT APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) @@ -859,48 +809,19 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const * Inline does not make sense, since the inner loop is entirely inside this function. */ template -[[nodiscard]] bool NO_INLINE Aggregator::executeImpl( +[[nodiscard]] bool NO_INLINE Aggregator::executeImplBatch( Method & method, Arena * aggregates_pool, size_t row_begin, size_t row_end, ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions, - bool no_more_keys, - AggregateDataPtr overflow_row) const + AggregateFunctionInstruction * aggregate_instructions) const { typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - assert(!no_more_keys); - -#if USE_EMBEDDED_COMPILER - /// TODO: So far not support compiled functions with expanded data - if (compiled_aggregate_functions_holder && !hasExpandedData()) - { - return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); - } - else -#endif - { - return executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); - } -} -template -[[nodiscard]] bool NO_INLINE Aggregator::executeImplBatch( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions, - AggregateDataPtr overflow_row) const -{ /// Optimization for special case when there are no aggregate functions. - if (params.aggregates_size == 0 && !hasExpandedData()) + if (params.aggregates_size == 0 && !needTrackUpdates()) { - if constexpr (no_more_keys) - return false; - /// For all rows. AggregateDataPtr place = aggregates_pool->alloc(0); for (size_t i = row_begin; i < row_end; ++i) @@ -911,7 +832,7 @@ template bool need_finalization = false; /// Optimization for special case when aggregating by 8bit key. - if constexpr (!no_more_keys && std::is_same_v) + if constexpr (std::is_same_v) { /// We use another method if there are aggregate functions with -Array combinator. bool has_arrays = false; @@ -924,7 +845,7 @@ template } } - if (!has_arrays && !hasExpandedData()) + if (!has_arrays && !needTrackUpdates()) { for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -936,7 +857,7 @@ template [&](AggregateDataPtr & aggregate_data) { auto data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(data); + createAggregateStates(data, /*prefix_with_updates_tracking_state*/ false); aggregate_data = data; }, state.getKeyData(), @@ -968,7 +889,6 @@ template { AggregateDataPtr aggregate_data = nullptr; - assert(!no_more_keys); auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. @@ -978,7 +898,7 @@ template emplace_result.setMapped(nullptr); aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); + createAggregateStates(aggregate_data); emplace_result.setMapped(aggregate_data); } @@ -989,37 +909,9 @@ template places[i] = aggregate_data; } -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - std::vector columns_data; - - for (size_t i = 0; i < aggregate_functions.size(); ++i) - { - if (!is_aggregate_function_compiled[i]) - continue; - - AggregateFunctionInstruction * inst = aggregate_instructions + i; - size_t arguments_size = inst->that->getArgumentTypes().size(); - - for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index) - columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index])); - } - - auto add_into_aggregate_states_function = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function; - add_into_aggregate_states_function(row_begin, row_end, columns_data.data(), places.get()); - } -#endif - /// Add values to the aggregate functions. for (size_t i = 0; i < aggregate_functions.size(); ++i) { -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; -#endif - AggregateFunctionInstruction * inst = aggregate_instructions + i; if (inst->offsets) @@ -1043,13 +935,12 @@ template } } - if (hasExpandedData()) - UpdatedDataEx::addBatch(row_begin, row_end, places.get(), aggregate_instructions ? aggregate_instructions->delta_column : nullptr); + if (needTrackUpdates()) + TrackingUpdates::addBatch(row_begin, row_end, places.get(), aggregate_instructions ? aggregate_instructions->delta_column : nullptr); return need_finalization; } -template [[nodiscard]] bool NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t row_begin, @@ -1057,55 +948,12 @@ template AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const { -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - std::vector columns_data; - - for (size_t i = 0; i < aggregate_functions.size(); ++i) - { - if (!is_aggregate_function_compiled[i]) - continue; - - AggregateFunctionInstruction * inst = aggregate_instructions + i; - size_t arguments_size = inst->that->getArgumentTypes().size(); - - for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index) - { - columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index])); - } - } - - auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; - add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), res); - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = res + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif - } -#endif - /// Adding values bool should_finalize = false; for (size_t i = 0; i < aggregate_functions.size(); ++i) { AggregateFunctionInstruction * inst = aggregate_instructions + i; -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; -#endif if (inst->offsets) inst->batch_that->addBatchSinglePlace( inst->offsets[static_cast(row_begin) - 1], @@ -1134,8 +982,8 @@ template } } - if (hasExpandedData()) - UpdatedDataEx::addBatchSinglePlace(row_begin, row_end, res, aggregate_instructions ? aggregate_instructions->delta_column : nullptr); + if (needTrackUpdates()) + TrackingUpdates::addBatchSinglePlace(row_begin, row_end, res, aggregate_instructions ? aggregate_instructions->delta_column : nullptr); return should_finalize; } @@ -1201,8 +1049,7 @@ std::pair Aggregator::executeOnBlock( const Block & block, AggregatedDataVariants & result, ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool & no_more_keys) const + AggregateColumns & aggregate_columns) const { return executeOnBlock( block.getColumns(), @@ -1210,8 +1057,7 @@ std::pair Aggregator::executeOnBlock( block.rows(), result, key_columns, - aggregate_columns, - no_more_keys); + aggregate_columns); } /// return {should_abort, need_finalization} @@ -1221,8 +1067,7 @@ std::pair Aggregator::executeOnBlock( size_t row_end, AggregatedDataVariants & result, ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool & no_more_keys) const + AggregateColumns & aggregate_columns) const { std::pair return_result = {false, false}; auto & need_abort = return_result.first; @@ -1255,33 +1100,17 @@ std::pair Aggregator::executeOnBlock( AggregateFunctionInstructions aggregate_functions_instructions; prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder); - initStatesForWithoutKeyOrOverflow(result); + initStatesForWithoutKey(result); /// We select one of the aggregation methods and call it. /// For the case when there are no keys (all aggregate into one row). if (result.type == AggregatedDataVariants::Type::without_key) - { - /// TODO: So far not support compiled functions with expanded data -#if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder && !hasExpandedData()) - { - need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); - } - else -#endif - { - need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); - } - } + need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); else - { - /// This is where data is written that does not fit in `max_rows_to_group_by` with `group_by_overflow_mode = any`. - AggregateDataPtr overflow_row_ptr = params.overflow_row ? result.without_key : nullptr; - need_finalization = executeImpl(result, row_begin, row_end, key_columns, aggregate_functions_instructions.data(), no_more_keys, overflow_row_ptr); - } + need_finalization = executeImpl(result, row_begin, row_end, key_columns, aggregate_functions_instructions.data()); - need_abort = checkAndProcessResult(result, no_more_keys); + need_abort = checkAndProcessResult(result); return return_result; } @@ -1315,7 +1144,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co data_variants.init(data_variants.type); data_variants.aggregates_pools = Arenas(1, std::make_shared()); data_variants.aggregates_pool = data_variants.aggregates_pools.back().get(); - initStatesForWithoutKeyOrOverflow(data_variants); + initStatesForWithoutKey(data_variants); block_out.flush(); compressed_buf.next(); @@ -1367,11 +1196,11 @@ Block Aggregator::convertOneBucketToBlockImpl( bool final, bool clear_states, Int64 bucket, - AggregateStateType type) const + ConvertType type) const { Block block = convertToBlockImpl(method, method.data.impls[bucket], arena, data_variants.aggregates_pools, final, method.data.impls[bucket].size(), clear_states, type); block.info.bucket_num = static_cast(bucket); - method.data.resetUpdated(bucket); /// finalized + method.data.resetUpdatedBucket(bucket); /// finalized return block; } @@ -1402,13 +1231,6 @@ void Aggregator::writeToTemporaryFileImpl( update_max_sizes(block); } - if (params.overflow_row) - { - Block block = prepareBlockAndFillWithoutKey(data_variants, false, true, false); - out.write(block); - update_max_sizes(block); - } - /// Pass ownership of the aggregate functions states: /// `data_variants` will not destroy them in the destructor, they are now owned by ColumnAggregateFunction objects. data_variants.aggregator = nullptr; @@ -1417,9 +1239,9 @@ void Aggregator::writeToTemporaryFileImpl( } -bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const +bool Aggregator::checkLimits(size_t result_size) const { - if (!no_more_keys && params.max_rows_to_group_by && result_size > params.max_rows_to_group_by) + if (params.max_rows_to_group_by && result_size > params.max_rows_to_group_by) { switch (params.group_by_overflow_mode) { @@ -1432,8 +1254,7 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const return false; case OverflowMode::ANY: - no_more_keys = true; - break; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Streaming aggregation doesn't support 'OverflowMode::ANY'"); } } @@ -1446,7 +1267,7 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const template Block Aggregator::convertToBlockImpl( - Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, AggregateStateType type) const + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, ConvertType type) const { if (data.empty()) { @@ -1458,34 +1279,17 @@ Block Aggregator::convertToBlockImpl( if (final) { -#if USE_EMBEDDED_COMPILER - /// TODO: So far not support compiled functions with expanded data - if (compiled_aggregate_functions_holder && !hasExpandedData()) - { - static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization; - assert(type == AggregateStateType::Normal); - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); - } - else -#endif - { - if (type == AggregateStateType::OnlyUpdated) - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); - else if (type == AggregateStateType::OnlyRetracted) - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); - else - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states); - } + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows, clear_states, type); } else { - assert(type == AggregateStateType::Normal); + assert(type == ConvertType::Normal); res = convertToBlockImplNotFinal(method, data, aggregates_pools, rows); } /// In order to release memory early. /// proton: starts. For streaming aggr, we hold on to the states - if (clear_states && type == AggregateStateType::Normal) + if (clear_states) data.clearAndShrink(); /// proton: ends @@ -1570,7 +1374,6 @@ inline void Aggregator::insertAggregatesIntoColumns( std::rethrow_exception(exception); } -template Block Aggregator::insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool clear_states) const { std::exception_ptr exception; @@ -1578,40 +1381,8 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl try { - if constexpr (use_compiled_functions) - { - /** For JIT compiled functions we need to resize columns before pass them into compiled code. - * insert_aggregates_into_columns_function function does not throw exception. - */ - std::vector columns_data; - - auto compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - continue; - - auto & final_aggregate_column = out_cols.final_aggregate_columns[i]; - final_aggregate_column = final_aggregate_column->cloneResized(places.size()); - columns_data.emplace_back(getColumnData(final_aggregate_column.get())); - } - - auto insert_aggregates_into_columns_function = compiled_functions.insert_aggregates_into_columns_function; - insert_aggregates_into_columns_function(0, places.size(), columns_data.data(), places.data()); - } - for (; aggregate_functions_destroy_index < params.aggregates_size;) { - if constexpr (use_compiled_functions) - { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } - } - auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index]; size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; @@ -1659,15 +1430,6 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl for (; aggregate_functions_destroy_index < params.aggregates_size; ++aggregate_functions_destroy_index) { - if constexpr (use_compiled_functions) - { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } - } - bool is_state = aggregate_functions[aggregate_functions_destroy_index]->isState(); bool destroy_place_after_insert = !is_state && clear_states; if (destroy_place_after_insert) @@ -1698,9 +1460,9 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size()); } -template +template Block NO_INLINE Aggregator::convertToBlockImplFinal( - Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states) const + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states, ConvertType type) const { constexpr bool final = true; auto out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, rows); @@ -1709,7 +1471,7 @@ Block NO_INLINE Aggregator::convertToBlockImplFinal( { if (data.hasNullKeyData()) { - assert(type == AggregateStateType::Normal); + assert(type == ConvertType::Normal); out_cols.key_columns[0]->insertDefault(); insertAggregatesIntoColumns(data.getNullKeyData(), out_cols.final_aggregate_columns, arena, clear_states); } @@ -1721,27 +1483,25 @@ Block NO_INLINE Aggregator::convertToBlockImplFinal( PaddedPODArray places; places.reserve(rows); - constexpr bool only_updated = (type == AggregateStateType::OnlyUpdated); - constexpr bool only_retracted = (type == AggregateStateType::OnlyRetracted); + bool only_updates = (type == ConvertType::OnlyUpdates); data.forEachValue([&](const auto & key, auto & mapped) { - if constexpr (only_updated) + /// Ingore invalid mapped, there are two cases: + /// 1) mapped was destroyed (it's a bug) + /// 2) no mapped states for retracted data (means it's an new group key, but no retracted data) + if (!mapped) + return; + + if (only_updates) { - if (!UpdatedDataEx::isUpdated(mapped)) + if (!TrackingUpdates::updated(mapped)) return; /// Finalized it for current coverting - UpdatedDataEx::resetUpdated(mapped); - } - else if constexpr (only_retracted) - { - if (!RetractedDataEx::hasRetracted(mapped)) - return; + TrackingUpdates::resetUpdated(mapped); } - auto & place = RetractedDataEx::getData(mapped); - /// For UDA with own emit strategy, there are two special cases to be handled: /// 1. not all groups need to be emitted. therefore proton needs to pick groups /// that should emits, and only emit those groups while keep other groups unchanged. @@ -1754,7 +1514,7 @@ Block NO_INLINE Aggregator::convertToBlockImplFinal( if (params.group_by == Params::GroupBy::USER_DEFINED) { assert(aggregate_functions.size() == 1); - emit_times = aggregate_functions[0]->getEmitTimes(place + offsets_of_aggregate_states[0]); + emit_times = aggregate_functions[0]->getEmitTimes(mapped + offsets_of_aggregate_states[0]); } if (emit_times > 0) @@ -1763,17 +1523,17 @@ Block NO_INLINE Aggregator::convertToBlockImplFinal( for (size_t i = 0; i < emit_times; i++) method.insertKeyIntoColumns(key, out_cols.raw_key_columns, key_sizes_ref); - places.emplace_back(place); + places.emplace_back(mapped); /// Mark the cell as destroyed so it will not be destroyed in destructor. /// proton: starts. Here we push the `place` to `places`, for streaming /// case, we don't want aggregate function to destroy the places if (clear_states) - place = nullptr; + mapped = nullptr; } }); - return insertResultsIntoColumns(places, std::move(out_cols), arena, clear_states); + return insertResultsIntoColumns(places, std::move(out_cols), arena, clear_states); } template @@ -1840,30 +1600,32 @@ void Aggregator::addArenasToAggregateColumns( } } -Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states, AggregateStateType type) const +Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool clear_states, ConvertType type) const { + /// proton: starts. + if (!data_variants.without_key) + { + data_variants.invalidate(); + return {}; + } + /// proton: ends. + auto res_header = getHeader(final); size_t rows = 1; auto && out_cols = prepareOutputBlockColumns(params, aggregate_functions, res_header, data_variants.aggregates_pools, final, rows); auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; - /// TODO: support overflow row ? - assert(!is_overflows); - assert(!params.overflow_row); assert(data_variants.type == AggregatedDataVariants::Type::without_key); - if ((type == AggregateStateType::OnlyUpdated && !UpdatedDataEx::isUpdated(data_variants.without_key)) - || (type == AggregateStateType::OnlyRetracted && !RetractedDataEx::hasRetracted(data_variants.without_key))) + if (type == ConvertType::OnlyUpdates && !TrackingUpdates::updated(data_variants.without_key)) return res_header.cloneEmpty(); AggregatedDataWithoutKey & data = [&]() -> AggregateDataPtr & { - if (type == AggregateStateType::OnlyUpdated) + if (type == ConvertType::OnlyUpdates) { - UpdatedDataEx::resetUpdated( data_variants.without_key); + TrackingUpdates::resetUpdated(data_variants.without_key); return data_variants.without_key; } - else if (type == AggregateStateType::OnlyRetracted) - return RetractedDataEx::getRetracted(data_variants.without_key); else return data_variants.without_key; }(); @@ -1880,23 +1642,18 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va else { /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. - insertAggregatesIntoColumns(data, final_aggregate_columns, getArena(data_variants, type), clear_states); + insertAggregatesIntoColumns(data, final_aggregate_columns, data_variants.aggregates_pool, clear_states); } - Block block = finalizeBlock(params, res_header, std::move(out_cols), final, rows); - - if (is_overflows) - block.info.is_overflows = true; - - return block; + return finalizeBlock(params, res_header, std::move(out_cols), final, rows); } -Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, AggregateStateType type) const +Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, ConvertType type) const { const size_t rows = data_variants.sizeWithoutOverflowRow(); #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ - return convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, getArena(data_variants, type), data_variants.aggregates_pools, final, rows, clear_states, type); + return convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, data_variants.aggregates_pool, data_variants.aggregates_pools, final, rows, clear_states, type); if (false) {} // NOLINT APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) @@ -1904,13 +1661,13 @@ Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_v else throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } -BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, AggregateStateType type) const +BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, ConvertType type) const { /// TODO Make a custom threshold. /// TODO Use the shared thread pool with the `merge` function. std::unique_ptr thread_pool; if (max_threads > 1 && data_variants.sizeWithoutOverflowRow() > 100000 - && final && type == AggregateStateType::Normal) /// use single thread for non-final or retracted data or updated data + && final && type == ConvertType::Normal) /// use single thread for non-final or retracted data or updated data thread_pool = std::make_unique(max_threads); if (false) {} // NOLINT @@ -1931,23 +1688,18 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( bool final, bool clear_states, ThreadPool * thread_pool, - AggregateStateType type) const + ConvertType type) const { - return concurrentBucketConvert( - thread_pool, - method.data.buckets(), - getArena(data_variants, type), - data_variants.aggregates_pools, - [&](Int64 bucket, Arena * arena) -> BlocksList { - /// Skip no changed bucket if only updated is requested - if (type == AggregateStateType::OnlyUpdated && !method.data.isUpdatedBucket(bucket)) - return {}; - - return {convertOneBucketToBlockImpl(data_variants, method, arena, final, clear_states, bucket, type)}; - }); + return convertBucketsInParallel(thread_pool, method.data.buckets(), [&](Int64 bucket, Arena * arena) -> BlocksList { + /// Skip no changed bucket if only updated is requested + if (type == ConvertType::OnlyUpdates && !method.data.isBucketUpdated(bucket)) + return {}; + + return {convertOneBucketToBlockImpl(data_variants, method, arena, final, clear_states, bucket, type)}; + }); } -BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const +BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const { LOG_DEBUG(log, "Converting aggregated data to blocks"); @@ -1959,11 +1711,10 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b if (data_variants.empty()) return blocks; - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); + bool clear_states = final && !params.keep_state; if (data_variants.type == AggregatedDataVariants::Type::without_key) - blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states)); + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, clear_states)); else if (!data_variants.isTwoLevel()) blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states)); else @@ -2035,7 +1786,7 @@ void NO_INLINE Aggregator::mergeDataNullKey( } -template +template void NO_INLINE Aggregator::mergeDataImpl( Table & table_dst, Table & table_src, @@ -2056,7 +1807,7 @@ void NO_INLINE Aggregator::mergeDataImpl( /// that from the 'src' to store the final aggregated result, it will cause the data from other AggregatedDataVariant will be merged multiple times and /// generate incorrect aggregated result. auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); + createAggregateStates(aggregate_data); dst = aggregate_data; } @@ -2094,35 +1845,20 @@ template void NO_INLINE Aggregator::mergeSingleLevelDataImpl(ManyAggregatedDataVariants & non_empty_data, bool clear_states) const { AggregatedDataVariantsPtr & res = non_empty_data[0]; - bool no_more_keys = false; /// We merge all aggregation results to the first. for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) + if (!checkLimits(res->sizeWithoutOverflowRow())) break; AggregatedDataVariants & current = *non_empty_data[result_num]; - assert(!no_more_keys); -#if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - mergeDataImpl( - getDataVariant(*res).data, - getDataVariant(current).data, - res->aggregates_pool, - clear_states); - } - else - #endif - { - mergeDataImpl( - getDataVariant(*res).data, - getDataVariant(current).data, - res->aggregates_pool, - clear_states); - } + mergeDataImpl( + getDataVariant(*res).data, + getDataVariant(current).data, + res->aggregates_pool, + clear_states); /// In order to release memory early. if (clear_states) @@ -2138,21 +1874,19 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(ManyAggregatedDataVariants & BlocksList -Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const +Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) const { auto prepared_data_ptr = prepareVariantsToMerge(data_variants); if (prepared_data_ptr->empty()) return {}; - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - + bool clear_states = final && !params.keep_state; BlocksList blocks; auto & first = *prepared_data_ptr->at(0); if (first.type == AggregatedDataVariants::Type::without_key) { mergeWithoutKeyDataImpl(*prepared_data_ptr, clear_states); - blocks.emplace_back(prepareBlockAndFillWithoutKey(first, final, false, clear_states)); + blocks.emplace_back(prepareBlockAndFillWithoutKey(first, final, clear_states)); } else if (!first.isTwoLevel()) { @@ -2205,7 +1939,9 @@ BlocksList Aggregator::mergeAndConvertTwoLevelToBlocksImpl( std::vector buckets; if (first.isStaticBucketTwoLevel()) + { buckets = getDataVariant(first).data.buckets(); + } else { assert(first.isTimeBucketTwoLevel()); @@ -2218,11 +1954,10 @@ BlocksList Aggregator::mergeAndConvertTwoLevelToBlocksImpl( buckets.assign(buckets_set.begin(), buckets_set.end()); } - return concurrentBucketConvert( - thread_pool, buckets, first.aggregates_pool, first.aggregates_pools, [&](Int64 bucket, Arena * arena) -> BlocksList { - mergeBucketImpl(non_empty_data, bucket, arena, clear_states); - return {convertOneBucketToBlockImpl(first, getDataVariant(first), arena, final, clear_states, bucket)}; - }); + return convertBucketsInParallel(thread_pool, buckets, [&](Int64 bucket, Arena * arena) -> BlocksList { + mergeBucketImpl(non_empty_data, bucket, arena, clear_states); + return {convertOneBucketToBlockImpl(first, getDataVariant(first), arena, final, clear_states, bucket)}; + }); } template @@ -2237,27 +1972,14 @@ void NO_INLINE Aggregator::mergeBucketImpl( return; AggregatedDataVariants & current = *data[result_num]; -#if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], - getDataVariant(current).data.impls[bucket], - arena, - clear_states); - } - else -#endif - { - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], - getDataVariant(current).data.impls[bucket], - arena, - clear_states); - } + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket], + arena, + clear_states); /// Assume the current bucket has been finalized. - getDataVariant(current).data.resetUpdated(bucket); + getDataVariant(current).data.resetUpdatedBucket(bucket); } } @@ -2286,7 +2008,7 @@ ManyAggregatedDataVariantsPtr Aggregator::prepareVariantsToMerge(ManyAggregatedD auto result_variants = std::make_shared(false); result_variants->aggregator = this; initDataVariants(*result_variants, method_chosen, key_sizes, params); - initStatesForWithoutKeyOrOverflow(*result_variants); + initStatesForWithoutKey(*result_variants); non_empty_data->insert(non_empty_data->begin(), result_variants); } @@ -2335,427 +2057,66 @@ ManyAggregatedDataVariantsPtr Aggregator::prepareVariantsToMerge(ManyAggregatedD return non_empty_data; } -template -void NO_INLINE Aggregator::mergeStreamsImplCase( - Block & block, - Arena * aggregates_pool, - Method & method [[maybe_unused]], - Table & data, - AggregateDataPtr overflow_row) const +template +void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( + Method & method, + Arena * pool, + ColumnRawPtrs & key_columns, + const Block & source, + std::vector & destinations) const { - ColumnRawPtrs key_columns(params.keys_size); - AggregateColumnsConstData aggregate_columns(params.aggregates_size); - - /// Remember the columns we will work with - for (size_t i = 0; i < params.keys_size; ++i) - key_columns[i] = block.safeGetByPosition(i).column.get(); - - for (size_t i = 0; i < params.aggregates_size; ++i) - { - const auto & aggregate_column_name = params.aggregates[i].column_name; - aggregate_columns[i] = &typeid_cast(*block.getByName(aggregate_column_name).column).getData(); - } - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - /// For all rows. - size_t rows = block.rows(); - std::unique_ptr places(new AggregateDataPtr[rows]); + size_t rows = source.rows(); + size_t columns = source.columns(); + + /// Create a 'selector' that will contain bucket index for every row. It will be used to scatter rows to buckets. + IColumn::Selector selector(rows); + /// For every row. for (size_t i = 0; i < rows; ++i) { - AggregateDataPtr aggregate_data = nullptr; - - if (!no_more_keys) + if constexpr (Method::low_cardinality_optimization) { - auto emplace_result = state.emplaceKey(data, i, *aggregates_pool); - if (emplace_result.isInserted()) + if (state.isNullAt(i)) { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); - - emplace_result.setMapped(aggregate_data); + selector[i] = 0; + continue; } - else - aggregate_data = emplace_result.getMapped(); - } - else - { - auto find_result = state.findKey(data, i, *aggregates_pool); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); } - /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. + /// Calculate bucket number from row hash. + auto hash = state.getHash(method.data, i, *pool); + auto bucket = method.data.getBucketFromHash(hash); - AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row; - places[i] = value; + selector[i] = bucket; } - for (size_t j = 0; j < params.aggregates_size; ++j) + size_t num_buckets = destinations.size(); + + for (size_t column_idx = 0; column_idx < columns; ++column_idx) { - /// Merge state of aggregate functions. - aggregate_functions[j]->mergeBatch( - 0, rows, - places.get(), offsets_of_aggregate_states[j], - aggregate_columns[j]->data(), - aggregates_pool); - } + const ColumnWithTypeAndName & src_col = source.getByPosition(column_idx); + MutableColumns scattered_columns = src_col.column->scatter(num_buckets, selector); - /// Early release memory. - block.clear(); -} + for (size_t bucket = 0, size = num_buckets; bucket < size; ++bucket) + { + if (!scattered_columns[bucket]->empty()) + { + Block & dst = destinations[bucket]; + dst.info.bucket_num = static_cast(bucket); + dst.insert({std::move(scattered_columns[bucket]), src_col.type, src_col.name}); + } -template -void NO_INLINE Aggregator::mergeStreamsImpl( - Block & block, - Arena * aggregates_pool, - Method & method, - Table & data, - AggregateDataPtr overflow_row, - bool no_more_keys) const -{ - if (!no_more_keys) - mergeStreamsImplCase(block, aggregates_pool, method, data, overflow_row); - else - mergeStreamsImplCase(block, aggregates_pool, method, data, overflow_row); + /** Inserted columns of type ColumnAggregateFunction will own states of aggregate functions + * by holding shared_ptr to source column. See ColumnAggregateFunction.h + */ + } + } } -void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( - Block & block, - AggregatedDataVariants & result) const -{ - AggregateColumnsConstData aggregate_columns(params.aggregates_size); - - /// Remember the columns we will work with - for (size_t i = 0; i < params.aggregates_size; ++i) - { - const auto & aggregate_column_name = params.aggregates[i].column_name; - aggregate_columns[i] = &typeid_cast(*block.getByName(aggregate_column_name).column).getData(); - } - - AggregatedDataWithoutKey & res = result.without_key; - if (!res) - { - AggregateDataPtr place = result.aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(place); - res = place; - } - - for (size_t row = 0, rows = block.rows(); row < rows; ++row) - { - /// Adding Values - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(res + offsets_of_aggregate_states[i], (*aggregate_columns[i])[row], result.aggregates_pool); - } - - /// Early release memory. - block.clear(); -} - -bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const -{ - /// `result` will destroy the states of aggregate functions in the destructor - result.aggregator = this; - - /// How to perform the aggregation? - if (result.empty()) - { - result.init(method_chosen); - result.keys_size = params.keys_size; - result.key_sizes = key_sizes; - LOG_TRACE(log, "Aggregation method: {}", result.getMethodName()); - } - - if (result.type == AggregatedDataVariants::Type::without_key || block.info.is_overflows) - mergeWithoutKeyStreamsImpl(block, result); - -#define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - mergeStreamsImpl(block, result.aggregates_pool, *result.NAME, result.NAME->data, result.without_key, no_more_keys); - - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) -#undef M - else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - return checkAndProcessResult(result, no_more_keys); -} - - -void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads) -{ - if (bucket_to_blocks.empty()) - return; - - UInt64 total_input_rows = 0; - for (auto & bucket : bucket_to_blocks) - for (auto & block : bucket.second) - total_input_rows += block.rows(); - - /** `minus one` means the absence of information about the bucket - * - in the case of single-level aggregation, as well as for blocks with "overflowing" values. - * If there is at least one block with a bucket number greater or equal than zero, then there was a two-level aggregation. - */ - auto max_bucket = bucket_to_blocks.rbegin()->first; - bool has_two_level = max_bucket >= 0; - - if (has_two_level) - { - #define M(NAME) \ - if (method_chosen == AggregatedDataVariants::Type::NAME) \ - method_chosen = AggregatedDataVariants::Type::NAME ## _two_level; - - APPLY_FOR_VARIANTS_CONVERTIBLE_TO_STATIC_BUCKET_TWO_LEVEL(M) - - #undef M - } - - /// result will destroy the states of aggregate functions in the destructor - result.aggregator = this; - - result.init(method_chosen); - result.keys_size = params.keys_size; - result.key_sizes = key_sizes; - - bool has_blocks_with_unknown_bucket = bucket_to_blocks.contains(-1); - - /// First, parallel the merge for the individual buckets. Then we continue merge the data not allocated to the buckets. - if (has_two_level) - { - /** In this case, no_more_keys is not supported due to the fact that - * from different threads it is difficult to update the general state for "other" keys (overflows). - * That is, the keys in the end can be significantly larger than max_rows_to_group_by. - */ - - LOG_TRACE(log, "Merging partially aggregated two-level data."); - - auto merge_bucket = [&bucket_to_blocks, &result, this](size_t bucket, Arena * aggregates_pool, ThreadGroupStatusPtr thread_group) - { - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - ); - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - - for (Block & block : bucket_to_blocks[static_cast(bucket)]) - { - #define M(NAME) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - mergeStreamsImpl(block, aggregates_pool, *result.NAME, result.NAME->data.impls[bucket], nullptr, false); - - if (false) {} // NOLINT - APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) - #undef M - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - } - }; - - std::unique_ptr thread_pool; - if (max_threads > 1 && total_input_rows > 100000) /// TODO Make a custom threshold. - thread_pool = std::make_unique(max_threads); - - for (const auto & bucket_blocks : bucket_to_blocks) - { - const auto bucket = bucket_blocks.first; - - if (bucket == -1) - continue; - - result.aggregates_pools.push_back(std::make_shared()); - Arena * aggregates_pool = result.aggregates_pools.back().get(); - - auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ return merge_bucket(bucket, aggregates_pool, group); }; - - if (thread_pool) - thread_pool->scheduleOrThrowOnError(task); - else - task(); - } - - if (thread_pool) - thread_pool->wait(); - - LOG_TRACE(log, "Merged partially aggregated two-level data."); - } - - if (has_blocks_with_unknown_bucket) - { - LOG_TRACE(log, "Merging partially aggregated single-level data."); - - bool no_more_keys = false; - - BlocksList & blocks = bucket_to_blocks[-1]; - for (Block & block : blocks) - { - if (!checkLimits(result.sizeWithoutOverflowRow(), no_more_keys)) - break; - - if (result.type == AggregatedDataVariants::Type::without_key || block.info.is_overflows) - mergeWithoutKeyStreamsImpl(block, result); - - #define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - mergeStreamsImpl(block, result.aggregates_pool, *result.NAME, result.NAME->data, result.without_key, no_more_keys); - - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) - #undef M - else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - } - - LOG_TRACE(log, "Merged partially aggregated single-level data."); - } -} - - -Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, bool clear_states, bool only_updated) -{ - if (blocks.empty()) - return {}; - - auto bucket_num = blocks.front().info.bucket_num; - bool is_overflows = blocks.front().info.is_overflows; - - LOG_TRACE(log, "Merging partially aggregated blocks (bucket = {}).", bucket_num); - Stopwatch watch; - - /** If possible, change 'method' to some_hash64. Otherwise, leave as is. - * Better hash function is needed because during external aggregation, - * we may merge partitions of data with total number of keys far greater than 4 billion. - */ - auto merge_method = method_chosen; - -#define APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION(M) \ - M(key64) \ - M(key_string) \ - M(key_fixed_string) \ - M(keys128) \ - M(keys256) \ - M(serialized) \ - -#define M(NAME) \ - if (merge_method == AggregatedDataVariants::Type::NAME) \ - merge_method = AggregatedDataVariants::Type::NAME ## _hash64; \ - - APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION(M) -#undef M - -#undef APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION - - /// Temporary data for aggregation. - AggregatedDataVariants result; - - /// result will destroy the states of aggregate functions in the destructor - result.aggregator = this; - - /// proton: starts - initDataVariants(result, method_chosen, key_sizes, params); - /// proton: ends - - for (Block & block : blocks) - { - if (bucket_num >= 0 && block.info.bucket_num != bucket_num) - bucket_num = -1; - - if (result.type == AggregatedDataVariants::Type::without_key || is_overflows) - mergeWithoutKeyStreamsImpl(block, result); - - #define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - mergeStreamsImpl(block, result.aggregates_pool, *result.NAME, result.NAME->data, nullptr, false); - - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) - #undef M - else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - } - - Block block; - if (result.type == AggregatedDataVariants::Type::without_key || is_overflows) - block = prepareBlockAndFillWithoutKey(result, final, is_overflows, clear_states); - else - block = prepareBlockAndFillSingleLevel(result, final, clear_states); - /// NOTE: two-level data is not possible here - chooseAggregationMethod chooses only among single-level methods. - - size_t rows = block.rows(); - size_t bytes = block.bytes(); - double elapsed_seconds = watch.elapsedSeconds(); - LOG_DEBUG(log, "Merged partially aggregated blocks. {} rows, {}. in {} sec. ({:.3f} rows/sec., {}/sec.)", - rows, ReadableSize(bytes), - elapsed_seconds, rows / elapsed_seconds, - ReadableSize(bytes / elapsed_seconds)); - - block.info.bucket_num = bucket_num; - return block; -} - -template -void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( - Method & method, - Arena * pool, - ColumnRawPtrs & key_columns, - const Block & source, - std::vector & destinations) const -{ - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - - size_t rows = source.rows(); - size_t columns = source.columns(); - - /// Create a 'selector' that will contain bucket index for every row. It will be used to scatter rows to buckets. - IColumn::Selector selector(rows); - - /// For every row. - for (size_t i = 0; i < rows; ++i) - { - if constexpr (Method::low_cardinality_optimization) - { - if (state.isNullAt(i)) - { - selector[i] = 0; - continue; - } - } - - /// Calculate bucket number from row hash. - auto hash = state.getHash(method.data, i, *pool); - auto bucket = method.data.getBucketFromHash(hash); - - selector[i] = bucket; - } - - size_t num_buckets = destinations.size(); - - for (size_t column_idx = 0; column_idx < columns; ++column_idx) - { - const ColumnWithTypeAndName & src_col = source.getByPosition(column_idx); - MutableColumns scattered_columns = src_col.column->scatter(num_buckets, selector); - - for (size_t bucket = 0, size = num_buckets; bucket < size; ++bucket) - { - if (!scattered_columns[bucket]->empty()) - { - Block & dst = destinations[bucket]; - dst.info.bucket_num = static_cast(bucket); - dst.insert({std::move(scattered_columns[bucket]), src_col.type, src_col.name}); - } - - /** Inserted columns of type ColumnAggregateFunction will own states of aggregate functions - * by holding shared_ptr to source column. See ColumnAggregateFunction.h - */ - } - } -} - - -std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const +std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const { if (!block) return {}; @@ -2842,7 +2203,7 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons LOG_TRACE(log, "Destroying aggregate states"); /// In what data structure is the data aggregated? - if (result.type == AggregatedDataVariants::Type::without_key || params.overflow_row) + if (result.type == AggregatedDataVariants::Type::without_key) destroyWithoutKey(result); #define M(NAME, IS_TWO_LEVEL) \ @@ -2857,9 +2218,9 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons } /// proton: starts. for streaming processing -void Aggregator::initStatesForWithoutKeyOrOverflow(AggregatedDataVariants & data_variants) const +void Aggregator::initStatesForWithoutKey(AggregatedDataVariants & data_variants) const { - if (!data_variants.without_key && (params.overflow_row || data_variants.type == AggregatedDataVariants::Type::without_key)) + if (!data_variants.without_key && data_variants.type == AggregatedDataVariants::Type::without_key) { AggregateDataPtr place = data_variants.aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); createAggregateStates(place); @@ -3017,7 +2378,7 @@ void Aggregator::doCheckpointLegacy(const AggregatedDataVariants & data_variants /// FIXME, set a good max_threads /// For ConvertAction::Checkpoint, don't clear state `data_variants` - auto blocks = convertToBlocks(const_cast(data_variants), false, false, 8); + auto blocks = convertToBlocks(const_cast(data_variants), false, 8); /// assert(!blocks.empty()); @@ -3120,7 +2481,7 @@ void Aggregator::recoverStatesWithoutKey(AggregatedDataVariants & data_variants, /// may have internal states as well assert(!data_variants.without_key); - initStatesForWithoutKeyOrOverflow(data_variants); + initStatesForWithoutKey(data_variants); AggregatedDataWithoutKey & data = data_variants.without_key; AggregateColumnsData aggregate_columns(params.aggregates_size); @@ -3349,9 +2710,7 @@ void Aggregator::doRecoverV2(AggregatedDataVariants & data_variants, ReadBuffer VersionType Aggregator::getVersionFromRevision(UInt64 revision) const { - if (revision >= STATE_V3_MIN_REVISION) - return static_cast(3); - else if (revision >= STATE_V2_MIN_REVISION) + if (revision >= STATE_V2_MIN_REVISION) return static_cast(2); else throw Exception( @@ -3403,22 +2762,11 @@ void NO_INLINE Aggregator::spliceBucketsImpl( auto & table_dest = getDataVariant(data_dest).data.impls; auto & table_src = getDataVariant(data_src).data.impls; -#if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - for (auto bucket : gcd_buckets) - mergeDataImpl(table_dest[0], table_src[bucket], arena, clear_states, zero_out_window_keys_func); - } - else -#endif - { - for (auto bucket : gcd_buckets) - mergeDataImpl(table_dest[0], table_src[bucket], arena, clear_states, zero_out_window_keys_func); - } + for (auto bucket : gcd_buckets) + mergeDataImpl(table_dest[0], table_src[bucket], arena, clear_states, zero_out_window_keys_func); } -Block Aggregator::spliceAndConvertBucketsToBlock( - AggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const +Block Aggregator::spliceAndConvertBucketsToBlock(AggregatedDataVariants & variants, bool final, const std::vector & gcd_buckets) const { assert(variants.isTimeBucketTwoLevel()); @@ -3431,11 +2779,12 @@ Block Aggregator::spliceAndConvertBucketsToBlock( AggregatedDataVariants result_variants; \ result_variants.aggregator = this; \ initDataVariants(result_variants, method_chosen, key_sizes, params); \ - spliceBucketsImpl(result_variants, variants, gcd_buckets, result_variants.aggregates_pool, clear_states); \ + initStatesForWithoutKey(result_variants); \ + spliceBucketsImpl(result_variants, variants, gcd_buckets, result_variants.aggregates_pool, /*clear_states*/ false); \ return convertOneBucketToBlockImpl(result_variants, *result_variants.NAME, result_variants.aggregates_pool, final, /*clear_states*/ true, 0); \ } \ else \ - return convertOneBucketToBlockImpl(variants, *variants.NAME, variants.aggregates_pool, final, clear_states, gcd_buckets[0]); \ + return convertOneBucketToBlockImpl(variants, *variants.NAME, variants.aggregates_pool, final, /*clear_states*/ false, gcd_buckets[0]); \ } APPLY_FOR_VARIANTS_TIME_BUCKET_TWO_LEVEL(M) @@ -3446,8 +2795,7 @@ Block Aggregator::spliceAndConvertBucketsToBlock( UNREACHABLE(); } -Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock( - ManyAggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const +Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock(ManyAggregatedDataVariants & variants, bool final, const std::vector & gcd_buckets) const { bool need_splice = gcd_buckets.size() > 1; auto prepared_data = prepareVariantsToMerge(variants, /*always_merge_into_empty*/ need_splice); @@ -3464,14 +2812,14 @@ Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock( { \ using Method = decltype(first.NAME)::element_type; \ for (auto bucket : gcd_buckets) \ - mergeBucketImpl(*prepared_data, bucket, arena, clear_states); \ + mergeBucketImpl(*prepared_data, bucket, arena, /*clear_states*/ false); \ if (need_splice) \ { \ spliceBucketsImpl(first, first, gcd_buckets, arena, /*clear_states*/ true); \ return convertOneBucketToBlockImpl(first, *first.NAME, arena, final, /*clear_states*/ true, 0); \ } \ else \ - return convertOneBucketToBlockImpl(first, *first.NAME, arena, final, clear_states, gcd_buckets[0]); \ + return convertOneBucketToBlockImpl(first, *first.NAME, arena, final, /*clear_states*/ false, gcd_buckets[0]); \ } APPLY_FOR_VARIANTS_TIME_BUCKET_TWO_LEVEL(M) @@ -3482,36 +2830,367 @@ Block Aggregator::mergeAndSpliceAndConvertBucketsToBlock( UNREACHABLE(); } -void Aggregator::mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const +template +bool Aggregator::executeAndRetractImpl( + Method & method, + Arena * aggregates_pool, + Method & retracted_method, + Arena * retracted_pool, + size_t row_begin, + size_t row_end, + ColumnRawPtrs & key_columns, + AggregateFunctionInstruction * aggregate_instructions) const { - assert(src); - assert(dst); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - - if (clear_states) - destroyAggregateStates(src); -} + typename Method::State state(key_columns, key_sizes, aggregation_state_cache); + typename Method::State retracted_state(key_columns, key_sizes, nullptr); -void Aggregator::destroyAggregateStates(AggregateDataPtr & place) const -{ - if (place) + /// Optimization for special case when there are no aggregate functions. + if (params.aggregates_size == 0) { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy(place + offsets_of_aggregate_states[i]); + if (params.delta_col_pos >= 0) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changelog aggregating must have aggregate functions"); - place = nullptr; + /// For all rows. + AggregateDataPtr place = aggregates_pool->alloc(0); + for (size_t i = row_begin; i < row_end; ++i) + { + auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); + if (emplace_result.isInserted()) + { + emplace_result.setMapped(place); + /// Only add new key + retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool).setMapped(place); + } + } + return false; } -} -void Aggregator::serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const -{ - UInt8 has_states = place ? 1 : 0; - writeIntBinary(has_states, wb); - if (has_states) - { - for (size_t i = 0; i < params.aggregates_size; ++i) + bool need_finalization = false; + + /// NOTE: only row_end-row_start is required, but: + /// - this affects only optimize_aggregation_in_order, + /// - this is just a pointer, so it should not be significant, + /// - and plus this will require other changes in the interface. + std::unique_ptr places(new AggregateDataPtr[row_end]); + + /// For all rows. + for (size_t i = row_begin; i < row_end; ++i) + { + AggregateDataPtr aggregate_data = nullptr; + + auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); + + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); + + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + /// TODO: support use_compiled_functions + createAggregateStates(aggregate_data); + emplace_result.setMapped(aggregate_data); + + /// Save new group without retracted state (used for emit new key group) + /// FIXME: There is a bug when use hash table (key8 or key16), it use a optimzed FixedImplicitZeroHashMap that the empty mapped directly means zero (i.e. invalid insertion). + /// But in retract group scenario, we need to use an empty mapped to represent no ratracted value for new group + /// Use a non-optimized FixedHashMap ? or revisit retract implementation ? + retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool).setMapped(nullptr); + } + else + { + aggregate_data = emplace_result.getMapped(); + + /// Save changed group with retracted state (used for emit changed group) + auto retracted_result = retracted_state.emplaceKey(retracted_method.data, i, *retracted_pool); + if (retracted_result.isInserted()) + { + retracted_result.setMapped(nullptr); + auto retracted_data = retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(retracted_data); + /// Copy aggregate data to retracted data before changed + mergeAggregateStates(retracted_data, aggregate_data, retracted_pool, /*clear_states*/ false); + retracted_result.setMapped(retracted_data); + } + } + + assert(aggregate_data != nullptr); + places[i] = aggregate_data; + } + + /// Add values to the aggregate functions. + for (size_t i = 0; i < aggregate_functions.size(); ++i) + { + AggregateFunctionInstruction * inst = aggregate_instructions + i; + + if (inst->offsets) + inst->batch_that->addBatchArray(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool); + else + inst->batch_that->addBatch(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool, -1, inst->delta_column); + + if (inst->batch_that->isUserDefined()) + { + AggregateDataPtr * places_ptr = places.get(); + /// It is ok to re-flush if it is flush already, then we don't need maintain a map to check if it is ready flushed + for (size_t j = row_begin; j < row_end; ++j) + { + if (places_ptr[j]) + { + inst->batch_that->flush(places_ptr[j] + inst->state_offset); + if (!need_finalization) + need_finalization = (inst->batch_that->getEmitTimes(places_ptr[j] + inst->state_offset) > 0); + } + } + } + } + + return need_finalization; +} + +std::pair Aggregator::executeAndRetractOnBlock( + Columns columns, + size_t row_begin, + size_t row_end, + AggregatedDataVariants & result, + AggregatedDataVariants & retracted_result, + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns) const +{ + std::pair return_result = {false, false}; + auto & need_abort = return_result.first; + auto & need_finalization = return_result.second; + + if (unlikely(row_end <= row_begin)) + return return_result; + + result.aggregator = this; + if (result.empty()) + { + initDataVariants(result, method_chosen, key_sizes, params); + LOG_TRACE(log, "Aggregation method: {}", result.getMethodName()); + } + + Columns materialized_columns = materializeKeyColumns(columns, key_columns, params, result.isLowCardinality()); + + setupAggregatesPoolTimestamps(row_begin, row_end, key_columns, result.aggregates_pool); + + NestedColumnsHolder nested_columns_holder; + AggregateFunctionInstructions aggregate_functions_instructions; + prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder); + + retracted_result.aggregator = this; + if (result.type == AggregatedDataVariants::Type::without_key) + { + /// Save last finalization state into `retracted_result` before processing new data. + /// We shall clear and reset it after finalization + if (retracted_result.empty()) + { + initDataVariants(retracted_result, method_chosen, key_sizes, params); + + if (result.without_key) + { + initStatesForWithoutKey(retracted_result); + mergeAggregateStates(retracted_result.without_key, result.without_key, retracted_result.aggregates_pool, false); + } + } + + initStatesForWithoutKey(result); + need_finalization = executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); + } + else + { + if (retracted_result.empty()) + initDataVariants(retracted_result, method_chosen, key_sizes, params); + + if (result.isTwoLevel() && !retracted_result.isTwoLevel()) + retracted_result.convertToTwoLevel(); + + #define M(NAME, IS_TWO_LEVEL) \ + else if (result.type == AggregatedDataVariants::Type::NAME) \ + need_finalization = executeAndRetractImpl(*result.NAME, result.aggregates_pool, *retracted_result.NAME, retracted_result.aggregates_pool, row_begin, row_end, key_columns, aggregate_functions_instructions.data()); + + if (false) {} // NOLINT + APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) + #undef M + } + + need_abort = checkAndProcessResult(result); + /// it's possible for gloabl single level hash table was converted to two level table after `checkAndProcessResult`, + /// so we also convert retarcted data to two level. + if (result.isTwoLevel() && !retracted_result.isTwoLevel()) + retracted_result.convertToTwoLevel(); + + return return_result; +} + +std::pair +Aggregator::mergeRetractedGroups(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const +{ + auto prepared_data = prepareVariantsToMerge(aggregated_data, /*always_merge_into_empty*/ true); + if (prepared_data->empty()) + return {}; + + auto first = prepared_data->at(0); + + auto prepared_retracted_data = prepareVariantsToMerge(retracted_data, first->type != AggregatedDataVariants::Type::without_key); + assert(!prepared_retracted_data->empty()); + + /// So far, only global aggregation support emit changelog, so time bucket two level is not possible + +#define M(NAME, ...) \ + else if (first->type == AggregatedDataVariants::Type::NAME) \ + mergeRetractedGroupsImplNAME)::element_type>(*prepared_data, *prepared_retracted_data); + + if (first->type == AggregatedDataVariants::Type::without_key) + { + mergeWithoutKeyDataImpl(*prepared_retracted_data, true); + mergeWithoutKeyDataImpl(*prepared_data, false); + } + APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) + APPLY_FOR_VARIANTS_STATIC_BUCKET_TWO_LEVEL(M) +#undef M + else + throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + return {prepared_data->at(0), prepared_retracted_data->at(0)}; +} + +template +void Aggregator::mergeRetractedGroupsImpl( + ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const +{ + AggregatedDataVariantsPtr & res = aggregated_data[0]; + AggregatedDataVariantsPtr & retracted_res = retracted_data[0]; + + using Table = typename Method::Data; + Table & dst_table = getDataVariant(*res).data; + Table & dst_retracted_table = getDataVariant(*retracted_res).data; + + /// First data variants always is empty. + assert(dst_table.empty() && dst_retracted_table.empty()); + + /// For example: + /// thread-1 thread-2 + /// group-1 changed non-changed + /// group-2 non-changed changed + /// group-3 non-changed non-changed + + /// Collect all changed groups, then merge retracted/updated data + /// 1) Collect changed groups: + /// `dst_retracted` <= (thread-1: group-1) + (thread-2: group-2) + for (size_t result_num = 1, size = retracted_data.size(); result_num < size; ++result_num) + { + if (!checkLimits(retracted_res->sizeWithoutOverflowRow())) + break; + + auto & src_retracted_table = getDataVariant(*retracted_data[result_num]).data; + src_retracted_table.mergeToViaEmplace(dst_retracted_table, [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted) { + if (inserted) + dst = nullptr; + + mergeAggregateStates(dst, src, retracted_res->aggregates_pool, true); + }); + } + + /// 2) Merge retracted groups non-changed thread parts (based on all changed groups) + /// `dst_retracted` <= (thread-1: group-2) + (thread-2: group-1) + for (size_t result_num = 1, size = retracted_data.size(); result_num < size; ++result_num) + { + if (!checkLimits(retracted_res->sizeWithoutOverflowRow())) + break; + + auto & current_retracted = *retracted_data[result_num]; + Table & src_retracted_table = getDataVariant(current_retracted).data; + Table & src_aggregated_table = getDataVariant(*aggregated_data[result_num]).data; + dst_retracted_table.forEachValue([&](const auto & key, auto & mapped) { + /// Merge retracted groups non-changed thread parts + if (!src_retracted_table.find(key)) + { + auto find_it = src_aggregated_table.find(key); + if (find_it) + mergeAggregateStates( + mapped, + find_it->getMapped(), + retracted_res->aggregates_pool, + /*clear_states*/ false); + }}); + + /// Reset retracted data after finalization + current_retracted.reset(); + } + + /// 3) Merge new/updated groups (based on all changed groups) + /// `dst` <= (thread-1: group-1 group-2) + (thread-2: group-1 group-2) + for (size_t result_num = 1, size = aggregated_data.size(); result_num < size; ++result_num) + { + if (!checkLimits(res->sizeWithoutOverflowRow())) + break; + + Table & src_aggregated_table = getDataVariant(*aggregated_data[result_num]).data; + dst_retracted_table.forEachValue([&](const auto & key, auto & mapped) { + /// Merge new/updated groups + typename Table::LookupResult dst_it; + bool inserted; + + /// NOTE: For StringRef `key`, its memory was allocated in `retracted_res->aggregates_pool`, + /// we shall save this key in itself pool (i.e. res->aggregates_pool) if inserted + using KeyType = std::decay_t; + if constexpr (std::is_same_v) + dst_table.emplace(ArenaKeyHolder{key, *res->aggregates_pool}, dst_it, inserted); + else + dst_table.emplace(key, dst_it, inserted); + + if (inserted) + dst_it->getMapped() = nullptr; + + auto find_it = src_aggregated_table.find(key); + if (find_it) + mergeAggregateStates( + dst_it->getMapped(), + find_it->getMapped(), + res->aggregates_pool, + /*clear_states*/ false); + }); + } +} + +void Aggregator::mergeAggregateStates(AggregateDataPtr & dst, AggregateDataPtr & src, Arena * arena, bool clear_states) const +{ + if (!src) + return; + + if (!dst) + { + auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + dst = aggregate_data; + } + + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); + + if (clear_states) + destroyAggregateStates(src); +} + +void Aggregator::destroyAggregateStates(AggregateDataPtr & place) const +{ + if (place) + { + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->destroy(place + offsets_of_aggregate_states[i]); + + place = nullptr; + } +} + +void Aggregator::serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const +{ + UInt8 has_states = place ? 1 : 0; + writeIntBinary(has_states, wb); + if (has_states) + { + for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->serialize(place + offsets_of_aggregate_states[i], wb); } } @@ -3547,30 +3226,18 @@ void Aggregator::doCheckpointV3(const AggregatedDataVariants & data_variants, Wr writeIntBinary(static_cast(data_variants.type), wb); - writeIntBinary(static_cast(expanded_data_type), wb); + writeIntBinary(static_cast(trackingUpdatesType()), wb); auto state_serializer = [this](auto place, auto & wb_) { assert(place); - if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) - { - UpdatedDataEx::serialize(place, wb_); - - auto & retracted_place = RetractedDataEx::getRetracted(place); - bool has_retracted = retracted_place != nullptr; - writeBoolText(has_retracted, wb_); - if (has_retracted) - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->serialize(retracted_place + offsets_of_aggregate_states[i], wb_); - } - else if (expanded_data_type == ExpandedDataType::Updated) - UpdatedDataEx::serialize(place, wb_); + if (trackingUpdatesType() == TrackingUpdatesType::Updates) + TrackingUpdates::serialize(place, wb_); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->serialize(place + offsets_of_aggregate_states[i], wb_); }; /// [aggr-func-state-without-key] - assert(!params.overflow_row); if (data_variants.type == AggregatedDataVariants::Type::without_key) state_serializer(data_variants.without_key, wb); @@ -3617,13 +3284,13 @@ void Aggregator::doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer UInt8 recovered_expanded_data_type_uint8; readIntBinary(recovered_expanded_data_type_uint8, rb); - ExpandedDataType recovered_expanded_data_type = static_cast(recovered_expanded_data_type_uint8); - if (recovered_expanded_data_type != expanded_data_type) + TrackingUpdatesType recovered_expanded_data_type = static_cast(recovered_expanded_data_type_uint8); + if (recovered_expanded_data_type != trackingUpdatesType()) throw Exception( ErrorCodes::RECOVER_CHECKPOINT_FAILED, "Failed to recover aggregation checkpoint. Expanded data type is not the same, checkpointed={}, current={}", magic_enum::enum_name(recovered_expanded_data_type), - magic_enum::enum_name(expanded_data_type)); + magic_enum::enum_name(trackingUpdatesType())); auto state_deserializer = [this](auto & place, auto & rb_, Arena * arena) { place = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. @@ -3631,31 +3298,14 @@ void Aggregator::doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer createAggregateStates(aggregate_data); place = aggregate_data; - if (expanded_data_type == ExpandedDataType::UpdatedWithRetracted) - { - UpdatedDataEx::deserialize(place, rb_); - - auto & retracted = RetractedDataEx::getRetracted(place); - bool has_retracted = false; - readBoolText(has_retracted, rb_); - if (has_retracted) - { - auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(tmp_retracted); - retracted = tmp_retracted; - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->deserialize(retracted + offsets_of_aggregate_states[i], rb_, std::nullopt, arena); - } - } - else if (expanded_data_type == ExpandedDataType::Updated) - UpdatedDataEx::deserialize(place, rb_); + if (trackingUpdatesType() == TrackingUpdatesType::Updates) + TrackingUpdates::deserialize(place, rb_); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->deserialize(place + offsets_of_aggregate_states[i], rb_, std::nullopt, arena); }; /// [aggr-func-state-without-key] - assert(!params.overflow_row); if (data_variants.type == AggregatedDataVariants::Type::without_key) state_deserializer(data_variants.without_key, rb, data_variants.aggregates_pool); @@ -3674,7 +3324,7 @@ void Aggregator::doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } -bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const +bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result) const { size_t result_size = result.sizeWithoutOverflowRow(); Int64 current_memory_usage = 0; @@ -3695,7 +3345,7 @@ bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result, bool & n result.convertToTwoLevel(); /// Checking the constraints. - if (!checkLimits(result_size, no_more_keys)) + if (!checkLimits(result_size)) return true; /** Flush data to disk if too much RAM is consumed. @@ -3728,158 +3378,7 @@ bool Aggregator::checkAndProcessResult(AggregatedDataVariants & result, bool & n return false; } -template -bool Aggregator::executeAndRetractImpl( - Method & method, - Arena * aggregates_pool, - Arena * retracted_pool, - size_t row_begin, - size_t row_end, - ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions) const -{ - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - bool need_finalization = false; - - /// NOTE: only row_end-row_start is required, but: - /// - this affects only optimize_aggregation_in_order, - /// - this is just a pointer, so it should not be significant, - /// - and plus this will require other changes in the interface. - std::unique_ptr places(new AggregateDataPtr[row_end]); - - /// For all rows. - for (size_t i = row_begin; i < row_end; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - /// TODO: support use_compiled_functions - createAggregateStates(aggregate_data); - emplace_result.setMapped(aggregate_data); - } - else - { - aggregate_data = emplace_result.getMapped(); - - /// Save changed group with retracted state (used for emit changed group) - /// If there are aggregate data and no retracted data, copy aggregate data to retracted data before changed - if (!UpdatedDataEx::isEmpty(aggregate_data) && !RetractedDataEx::hasRetracted(aggregate_data)) - { - auto & retracted = RetractedDataEx::getRetracted(aggregate_data); - auto tmp_retracted = retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(tmp_retracted); - retracted = tmp_retracted; - mergeAggregateStates(retracted, aggregate_data, retracted_pool, /*clear_states*/ false); - } - } - - assert(aggregate_data != nullptr); - places[i] = aggregate_data; - } - - /// Add values to the aggregate functions. - for (size_t i = 0; i < aggregate_functions.size(); ++i) - { - AggregateFunctionInstruction * inst = aggregate_instructions + i; - - if (inst->offsets) - inst->batch_that->addBatchArray(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool); - else - inst->batch_that->addBatch(row_begin, row_end, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool, -1, inst->delta_column); - - if (inst->batch_that->isUserDefined()) - { - AggregateDataPtr * places_ptr = places.get(); - /// It is ok to re-flush if it is flush already, then we don't need maintain a map to check if it is ready flushed - for (size_t j = row_begin; j < row_end; ++j) - { - if (places_ptr[j]) - { - inst->batch_that->flush(places_ptr[j] + inst->state_offset); - if (!need_finalization) - need_finalization = (inst->batch_that->getEmitTimes(places_ptr[j] + inst->state_offset) > 0); - } - } - } - } - - if (hasExpandedData()) - UpdatedDataEx::addBatch(row_begin, row_end, places.get(), aggregate_instructions ? aggregate_instructions->delta_column : nullptr); - - return need_finalization; -} - -std::pair Aggregator::executeAndRetractOnBlock( - Columns columns, - size_t row_begin, - size_t row_end, - AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool & no_more_keys) const -{ - std::pair return_result = {false, false}; - auto & need_abort = return_result.first; - auto & need_finalization = return_result.second; - - if (unlikely(row_end <= row_begin)) - return return_result; - - result.aggregator = this; - if (result.empty()) - { - initDataVariants(result, method_chosen, key_sizes, params); - initStatesForWithoutKeyOrOverflow(result); - LOG_TRACE(log, "Aggregation method: {}", result.getMethodName()); - } - - Columns materialized_columns = materializeKeyColumns(columns, key_columns, params, result.isLowCardinality()); - - setupAggregatesPoolTimestamps(row_begin, row_end, key_columns, result.aggregates_pool); - - NestedColumnsHolder nested_columns_holder; - AggregateFunctionInstructions aggregate_functions_instructions; - prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder); - - assert(!params.overflow_row && !no_more_keys); - assert(expanded_data_type == ExpandedDataType::UpdatedWithRetracted); - if (result.type == AggregatedDataVariants::Type::without_key) - { - /// Save last finalization state into `retracted_result` before processing new data. - /// We shall clear and reset it after finalization - if (!UpdatedDataEx::isEmpty(result.without_key) && !RetractedDataEx::hasRetracted(result.without_key)) - { - auto & retracted = RetractedDataEx::getRetracted(result.without_key); - auto tmp_retracted = result.retracted_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(tmp_retracted); - retracted = tmp_retracted; - mergeAggregateStates(retracted, result.without_key, result.retracted_pool.get(), /*clear_states*/ false); - } - - need_finalization = executeWithoutKeyImpl( - result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); - } - -#define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - need_finalization = executeAndRetractImpl(*result.NAME, result.aggregates_pool, result.retracted_pool.get(), row_begin, row_end, key_columns, aggregate_functions_instructions.data()); - - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) -#undef M - - need_abort = checkAndProcessResult(result, no_more_keys); - return return_result; -} - -BlocksList Aggregator::convertUpdatedToBlocks(AggregatedDataVariants & data_variants) const +BlocksList Aggregator::convertUpdatesToBlocks(AggregatedDataVariants & data_variants) const { LOG_DEBUG(log, "Converting updated aggregated data to blocks"); @@ -3891,17 +3390,14 @@ BlocksList Aggregator::convertUpdatedToBlocks(AggregatedDataVariants & data_vari if (data_variants.empty()) return blocks; - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - constexpr bool final = true; constexpr bool clear_states = false; if (data_variants.type == AggregatedDataVariants::Type::without_key) - blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states, AggregateStateType::OnlyUpdated)); + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, clear_states, ConvertType::OnlyUpdates)); else if (!data_variants.isTwoLevel()) - blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, AggregateStateType::OnlyUpdated)); + blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, ConvertType::OnlyUpdates)); else - blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, AggregateStateType::OnlyUpdated)); + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, ConvertType::OnlyUpdates)); size_t rows = 0; size_t bytes = 0; @@ -3924,7 +3420,7 @@ BlocksList Aggregator::convertUpdatedToBlocks(AggregatedDataVariants & data_vari template -void NO_INLINE Aggregator::mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const +void NO_INLINE Aggregator::mergeUpdateGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const { AggregatedDataVariantsPtr & res = non_empty_data[0]; auto & dst_table = getDataVariant(*res).data; @@ -3939,19 +3435,16 @@ void NO_INLINE Aggregator::mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & n /// /// 1) Collect all updated groups /// `dst` <= (group-1, group-2) - bool no_more_keys = false; using Table = typename Method::Data; for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) + if (!checkLimits(res->sizeWithoutOverflowRow())) break; - assert(!no_more_keys); - auto & src_table = getDataVariant(*non_empty_data[result_num]).data; auto merge_updated_func = [&](const auto & key, auto & mapped) { /// Skip no updated group - if (!UpdatedDataEx::isUpdated(mapped)) + if (!TrackingUpdates::updated(mapped)) return; typename Table::LookupResult dst_it; @@ -3964,7 +3457,7 @@ void NO_INLINE Aggregator::mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & n auto & dst = dst_it->getMapped(); dst = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); + createAggregateStates(aggregate_data, /*prefix_with_updates_tracking_state*/ false); dst = aggregate_data; } }; @@ -3985,27 +3478,24 @@ void NO_INLINE Aggregator::mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & n { mergeAggregateStates(mapped, find_it->getMapped(), arena, /*clear_states*/ false); /// NOTE: We always reset the updated flag after merged - UpdatedDataEx::resetUpdated(find_it->getMapped()); + TrackingUpdates::resetUpdated(find_it->getMapped()); } }); } } -AggregatedDataVariantsPtr Aggregator::mergeUpdatedGroups(ManyAggregatedDataVariants & data_variants) const +AggregatedDataVariantsPtr Aggregator::mergeUpdateGroups(ManyAggregatedDataVariants & data_variants) const { auto prepared_data_ptr = prepareVariantsToMerge(data_variants, /*always_merge_into_empty*/ true); if (prepared_data_ptr->empty()) return {}; - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - BlocksList blocks; auto & first = *prepared_data_ptr->at(0); if (first.type == AggregatedDataVariants::Type::without_key) { if (std::ranges::none_of(*prepared_data_ptr, [](auto & variants) { - return variants->without_key && UpdatedDataEx::isUpdated(variants->without_key); + return variants->without_key && TrackingUpdates::updated(variants->without_key); })) return {}; @@ -4014,7 +3504,7 @@ AggregatedDataVariantsPtr Aggregator::mergeUpdatedGroups(ManyAggregatedDataVaria #define M(NAME, IS_TWO_LEVEL) \ else if (first.type == AggregatedDataVariants::Type::NAME) \ - mergeUpdatedGroupsImpl(*prepared_data_ptr, first.aggregates_pool); + mergeUpdateGroupsImpl(*prepared_data_ptr, first.aggregates_pool); APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) #undef M @@ -4023,218 +3513,6 @@ AggregatedDataVariantsPtr Aggregator::mergeUpdatedGroups(ManyAggregatedDataVaria return prepared_data_ptr->at(0); } -BlocksList Aggregator::convertRetractedToBlocks(AggregatedDataVariants & data_variants) const -{ - LOG_DEBUG(log, "Converting retracted aggregated data to blocks"); - - Stopwatch watch; - - BlocksList blocks; - - /// In what data structure is the data aggregated? - if (data_variants.empty()) - return blocks; - - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - - constexpr bool final = true; - constexpr bool clear_states = true; - if (data_variants.type == AggregatedDataVariants::Type::without_key) - blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, false, clear_states, AggregateStateType::OnlyRetracted)); - else if (!data_variants.isTwoLevel()) - blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, AggregateStateType::OnlyRetracted)); - else - blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, AggregateStateType::OnlyRetracted)); - - size_t rows = 0; - size_t bytes = 0; - - for (const auto & block : blocks) - { - rows += block.rows(); - bytes += block.bytes(); - } - - double elapsed_seconds = watch.elapsedSeconds(); - LOG_DEBUG(log, - "Converted retracted aggregated data to blocks. {} rows, {} in {} sec. ({:.3f} rows/sec., {}/sec.)", - rows, ReadableSize(bytes), - elapsed_seconds, rows / elapsed_seconds, - ReadableSize(bytes / elapsed_seconds)); - - return blocks; -} - -template -void Aggregator::mergeRetractedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const -{ - AggregatedDataVariantsPtr & res = non_empty_data[0]; - auto & dst_table = getDataVariant(*res).data; - /// Always merge retracted data into empty first. - assert(dst_table.empty()); - - /// For example: - /// thread-1 thread-2 - /// group-1 retracted non-retracted - /// group-2 non-retracted retracted - /// group-3 non-retracted non-retracted - /// - /// 1) Collect all retracted groups - /// `dst` <= (group-1, group-2) - bool no_more_keys = false; - using Table = typename Method::Data; - for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) - { - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) - break; - - assert(!no_more_keys); - - auto & src_table = getDataVariant(*non_empty_data[result_num]).data; - src_table.forEachValue([&](const auto & key, auto & mapped) { - /// Skip no retracted group - if (!RetractedDataEx::hasRetracted(mapped)) - return; - - typename Table::LookupResult dst_it; - bool inserted; - /// For StringRef `key`, it is safe to store to `dst_table` - /// since the `dst_table` is temporary and the `src_table` will not be cleaned in the meantime - dst_table.emplace(key, dst_it, inserted); - if (inserted) - { - auto & dst = dst_it->getMapped(); - dst = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); - dst = aggregate_data; - } - }); - } - - /// 2) Merge all retracted groups parts for each thread (based on `1)` ) - /// `dst` <= (thread-1: group-1 group-2) + (thread-2: group-1 group-2) - for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) - { - auto & current = *non_empty_data[result_num]; - auto & src_table = getDataVariant(current).data; - dst_table.forEachValue([&](const auto & key, auto & mapped) { - if (auto find_it = src_table.find(key)) - { - auto & src_mapped = find_it->getMapped(); - if (RetractedDataEx::hasRetracted(src_mapped)) - mergeAggregateStates(mapped, RetractedDataEx::getRetracted(src_mapped), arena, /*clear_states*/ true); - else - /// If retracted data not exist, assume it does't be changed, we should use original data - mergeAggregateStates(mapped, src_mapped, arena, /*clear_states*/ false); - } - }); - - current.resetRetractedPool(); - } -} - -AggregatedDataVariantsPtr Aggregator::mergeRetractedGroups(ManyAggregatedDataVariants & data_variants) const -{ - auto prepared_data_ptr = prepareVariantsToMerge(data_variants, /*always_merge_into_empty*/ true); - if (prepared_data_ptr->empty()) - return {}; - - if (unlikely(params.overflow_row)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in streaming aggregation"); - - auto & first = *prepared_data_ptr->at(0); - if (first.type == AggregatedDataVariants::Type::without_key) - { - if (std::ranges::none_of(*prepared_data_ptr, [](auto & variants) { return RetractedDataEx::hasRetracted(variants->without_key); })) - return {}; /// Skip if no retracted - - for (size_t result_num = 1, size = prepared_data_ptr->size(); result_num < size; ++result_num) - { - auto & src_without_key = (*prepared_data_ptr)[result_num]->without_key; - if (RetractedDataEx::hasRetracted(src_without_key)) - mergeAggregateStates(first.without_key, RetractedDataEx::getRetracted(src_without_key), first.aggregates_pool, /*clear_states*/ true); - else - /// If retracted data not exist, assume it does't be changed, we should use original data - mergeAggregateStates(first.without_key, src_without_key, first.aggregates_pool, /*clear_states*/ false); - } - } - -#define M(NAME) \ - else if (first.type == AggregatedDataVariants::Type::NAME) \ - mergeRetractedGroupsImpl(*prepared_data_ptr, first.aggregates_pool); - - APPLY_FOR_VARIANTS_SINGLE_LEVEL_STREAMING(M) - APPLY_FOR_VARIANTS_ALL_TWO_LEVEL(M) -#undef M - else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - return prepared_data_ptr->at(0); -} - -template -void Aggregator::mergeRetractedIntoImpl(Method & method, Method & retracted_method, Arena * arena) const -{ - using Table = typename Method::Data; - Table & table = method.data; - Table & retracted_table = retracted_method.data; - - retracted_table.forEachValue([&](const auto & key, auto & retracted_mapped) { - - auto find_it = table.find(key); - assert(find_it); - - auto & mapped = find_it->getMapped(); - assert(!RetractedDataEx::hasRetracted(mapped)); - UpdatedDataEx::setUpdated(mapped); - - /// For old impl, no retracted data for new group - if (!retracted_mapped) - return; - - auto & retracted = RetractedDataEx::getRetracted(mapped); - auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(tmp_retracted); - retracted = tmp_retracted; - mergeAggregateStates(retracted, retracted_mapped, arena, /*clear_states*/ true); - }); -} - -void Aggregator::mergeRetractedInto(AggregatedDataVariants & result, AggregatedDataVariants && retracted_result) const -{ - assert(expanded_data_type == ExpandedDataType::UpdatedWithRetracted); - if (result.type != retracted_result.type) [[unlikely]] - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Don't merge retracted aggregation result, the current data variants type is {}, but retracted data variants type is {}", - magic_enum::enum_name(result.type), - magic_enum::enum_name(retracted_result.type)); - - Arena * arena = result.retracted_pool.get(); - if (result.type == AggregatedDataVariants::Type::without_key) - { - if (retracted_result.without_key) - { - assert(!RetractedDataEx::hasRetracted(result.without_key)); - auto & retracted = RetractedDataEx::getRetracted(result.without_key); - auto tmp_retracted = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(tmp_retracted); - retracted = tmp_retracted; - mergeAggregateStates(retracted, retracted_result.without_key, arena, /*clear_states*/ true); - } - } - -#define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - mergeRetractedIntoImpl(*result.NAME, *retracted_result.NAME, arena); - - APPLY_FOR_AGGREGATED_VARIANTS_STREAMING(M) -#undef M - - retracted_result.reset(); -} - void Aggregator::updateMetrics(const AggregatedDataVariants & variants, AggregatedDataMetrics & metrics) const { switch (variants.type) diff --git a/src/Interpreters/Streaming/Aggregator.h b/src/Interpreters/Streaming/Aggregator.h index 945260170a0..32b1a2e141d 100644 --- a/src/Interpreters/Streaming/Aggregator.h +++ b/src/Interpreters/Streaming/Aggregator.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -76,11 +76,10 @@ namespace Streaming * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons. */ -enum class AggregateStateType +enum class ConvertType : uint8_t { - Normal, - OnlyUpdated, - OnlyRetracted, + Normal = 0, + OnlyUpdates = 1, }; /// using TimeBucketAggregatedDataWithUInt16Key = TimeBucketHashMap>; @@ -129,7 +128,6 @@ SERDE struct AggregatedDataVariants : private boost::noncopyable /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction. Arenas aggregates_pools; Arena * aggregates_pool{}; /// The pool that is currently used for allocation. - std::unique_ptr retracted_pool; /// Use an independent pool to manage retracted data, which will be cleared after each finalization /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by. */ @@ -371,17 +369,16 @@ SERDE struct AggregatedDataVariants : private boost::noncopyable /// proton: ends; } + /// \param reset - clean up all in memory states and the corresponding arena pools used to hold these states void reset(); - void resetAggregatesPool() + void resetAndCreateAggregatesPools() { aggregates_pools = Arenas(1, std::make_shared()); aggregates_pool = aggregates_pools.back().get(); aggregates_pool->enableRecycle(true); } - void resetRetractedPool() { retracted_pool = std::make_unique(); } - /// Number of rows (different keys). size_t size() const { @@ -667,8 +664,7 @@ class Aggregator final WindowParamsPtr window_params; - bool tracking_changes = false; - bool tracking_updated = false; + TrackingUpdatesType tracking_updates_type; /// proton: ends /// proton: starts @@ -690,8 +686,7 @@ class Aggregator final ssize_t delta_col_pos_ = -1, size_t window_keys_num_ = 0, WindowParamsPtr window_params_ = nullptr, - bool tracking_changes_ = false, - bool tracking_updated_ = false) + TrackingUpdatesType tracking_updates_type_ = TrackingUpdatesType::None) : src_header(src_header_), intermediate_header(intermediate_header_), keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), @@ -709,8 +704,7 @@ class Aggregator final delta_col_pos(delta_col_pos_), window_keys_num(window_keys_num_), window_params(window_params_), - tracking_changes(tracking_changes_), - tracking_updated(tracking_updated_) + tracking_updates_type(tracking_updates_type_) { } /// proton: ends @@ -750,17 +744,21 @@ class Aggregator final /// Process one block. Return {should_abort, need_finalization} pair /// should_abort: if the processing should be aborted (with group_by_overflow_mode = 'break') return true, otherwise false. /// need_finalization : only for UDA aggregation. If there is no UDA, always false - std::pair executeOnBlock(const Block & block, + std::pair executeOnBlock( + const Block & block, AggregatedDataVariants & result, ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys) const; + AggregateColumns & aggregate_columns /// Passed to not create them anew for each block + ) const; - std::pair executeOnBlock(Columns columns, - size_t row_begin, size_t row_end, + std::pair executeOnBlock( + Columns columns, + size_t row_begin, + size_t row_end, AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys) const; + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns /// Passed to not create them anew for each block + ) const; /// Execute and retract state for changed groups: /// 1) For new group: @@ -780,15 +778,12 @@ class Aggregator final size_t row_begin, size_t row_end, AggregatedDataVariants & result, + AggregatedDataVariants & retracted_result, ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys) const; - - bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const; + AggregateColumns & aggregate_columns /// Passed to not create them anew for each block + ) const; /** Convert the aggregation data structure into a block. - * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. - * * If final = false, then ColumnAggregateFunction is created as the aggregation columns with the state of the calculations, * which can then be combined with other states (for distributed query processing or checkpoint). * If final = true, then columns with ready values are created as aggregate columns. @@ -808,9 +803,11 @@ class Aggregator final * a. SELECT count(), avg(i), sum(k) FROM ( <-- second level global aggr, need prune its state at this level * SELECT avg(i) AS i, sum(k) AS k FROM my_stream GROUP BY device_id <-- first level global aggr, don't prune states * ); + * + * \param max_threads - limits max threads for converting two level aggregate state in parallel */ - BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const; - BlocksList mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads) const; + BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; + BlocksList mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) const; /// For Tumble/Session window function, there is only one bucket /// For Hop window function, merge multiple gcd windows (buckets) to a hop window @@ -818,30 +815,22 @@ class Aggregator final /// gcd_bucket1 - [00:00, 00:02) /// => result block - [00:00, 00:04) /// gcd_bucket2 - [00:02, 00:04) - Block spliceAndConvertBucketsToBlock( - AggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const; - Block mergeAndSpliceAndConvertBucketsToBlock( - ManyAggregatedDataVariants & variants, bool final, bool clear_states, const std::vector & gcd_buckets) const; - - /// Convert the `updated data` (different with `normal data`) - BlocksList convertUpdatedToBlocks(AggregatedDataVariants & data_variants) const; + Block spliceAndConvertBucketsToBlock(AggregatedDataVariants & variants, bool final, const std::vector & gcd_buckets) const; + Block mergeAndSpliceAndConvertBucketsToBlock(ManyAggregatedDataVariants & variants, bool final, const std::vector & gcd_buckets) const; - /// \return: merged updated data if exists - /// NOTE: The merged data is as `normal data`, which should use `convertToBlocks` to convert - AggregatedDataVariantsPtr mergeUpdatedGroups(ManyAggregatedDataVariants & data_variants) const; + /// Only convert the states of update groups tracked + BlocksList convertUpdatesToBlocks(AggregatedDataVariants & data_variants) const; - /// Convert the `retracted data` (different with `normal data`) - BlocksList convertRetractedToBlocks(AggregatedDataVariants & data_variants) const; + /// \return: merged updated data if exists, when there is no update data, return nullptr + AggregatedDataVariantsPtr mergeUpdateGroups(ManyAggregatedDataVariants & data_variants) const; - /// \return: merged retracted data if exists - /// NOTE: The merged data is as `normal data`, which should use `convertToBlocks` to convert - AggregatedDataVariantsPtr mergeRetractedGroups(ManyAggregatedDataVariants & data_variants) const; + /// For some streaming queries with `emit on update` or `emit changelog`, need tracking updates (with retract) + bool needTrackUpdates() const { return params.tracking_updates_type != TrackingUpdatesType::None; } + TrackingUpdatesType trackingUpdatesType() const { return params.tracking_updates_type; } - /// Used for merge legacy retracted data into result - void mergeRetractedInto(AggregatedDataVariants & result, AggregatedDataVariants && retracted_result) const; - - bool hasExpandedData() const { return expanded_data_type != ExpandedDataType::None; } - ExpandedDataType expandedDataType() const { return expanded_data_type; } + /// Used for merge changed groups and return the of changed groups + std::pair + mergeRetractedGroups(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const; std::vector bucketsBefore(const AggregatedDataVariants & result, Int64 max_bucket) const; void removeBucketsBefore(AggregatedDataVariants & result, Int64 max_bucket) const; @@ -849,22 +838,12 @@ class Aggregator final /// If @p always_merge_into_empty is true, always add an empty variants at front even if there is only one ManyAggregatedDataVariantsPtr prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants, bool always_merge_into_empty = false) const; - using BucketToBlocks = std::map; - /// Merge partially aggregated blocks separated to buckets into one data structure. - void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads); - - /// Merge several partially aggregated blocks into one. - /// Precondition: for all blocks block.info.is_overflows flag must be the same. - /// (either all blocks are from overflow data or none blocks are). - /// The resulting block has the same value of is_overflows flag. - Block mergeBlocks(BlocksList & blocks, bool final, bool clear_states, bool only_updated); - /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. * This is needed to simplify merging of that data with other results, that are already two-level. */ std::vector convertBlockToTwoLevel(const Block & block) const; - void initStatesForWithoutKeyOrOverflow(AggregatedDataVariants & data_variants) const; + void initStatesForWithoutKey(AggregatedDataVariants & data_variants) const; /// For external aggregation. void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const; @@ -940,8 +919,6 @@ class Aggregator final bool all_aggregates_has_trivial_destructor = false; - ExpandedDataType expanded_data_type = ExpandedDataType::None; - /// How many RAM were used to process the query before processing the first block. Int64 memory_usage_before_aggregation = 0; @@ -971,8 +948,7 @@ class Aggregator final /** Create states of aggregate functions for one key. */ - template - void createAggregateStates(AggregateDataPtr & aggregate_data) const; + void createAggregateStates(AggregateDataPtr & aggregate_data, bool prefix_with_updates_tracking_state = true) const; /** Call `destroy` methods for states of aggregate functions. * Used in the exception handler for aggregation, since RAII in this case is not applicable. @@ -984,35 +960,19 @@ class Aggregator final size_t row_begin, size_t row_end, ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions, - bool no_more_keys, - AggregateDataPtr overflow_row = nullptr) const; + AggregateFunctionInstruction * aggregate_instructions) const; /// Process one data block, aggregate the data into a hash table. template - bool executeImpl( - Method & method, - Arena * aggregates_pool, - size_t row_begin, - size_t row_end, - ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions, - bool no_more_keys, - AggregateDataPtr overflow_row) const; - - /// Specialization for a particular value no_more_keys. - template bool executeImplBatch( Method & method, - typename Method::State & state, Arena * aggregates_pool, size_t row_begin, size_t row_end, - AggregateFunctionInstruction * aggregate_instructions, - AggregateDataPtr overflow_row) const; + ColumnRawPtrs & key_columns, + AggregateFunctionInstruction * aggregate_instructions) const; /// For case when there are no keys (all aggregate into one row). For UDA with own strategy, return 'true' means the UDA should emit after execution - template bool executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t row_begin, @@ -1020,16 +980,6 @@ class Aggregator final AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; -#if 0 /// Unused for now - static void executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions, - Arena * arena, - const IColumn * delta_col); -#endif - template void writeToTemporaryFileImpl( AggregatedDataVariants & data_variants, @@ -1046,7 +996,7 @@ class Aggregator final /// Merge data from hash table `src` into `dst`. using EmptyKeyHandler = void *; - template + template void mergeDataImpl( Table & table_dst, Table & table_src, @@ -1061,7 +1011,7 @@ class Aggregator final template Block convertToBlockImpl( - Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, AggregateStateType type) const; + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows, bool clear_states, ConvertType type) const; template void insertAggregatesIntoColumns( @@ -1070,13 +1020,12 @@ class Aggregator final Arena * arena, bool clear_states) const; - template Block insertResultsIntoColumns( PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool clear_states) const; - template + template Block convertToBlockImplFinal( - Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states) const; + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows, bool clear_states, ConvertType type) const; template Block convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t rows) const; @@ -1089,7 +1038,7 @@ class Aggregator final bool final, bool clear_states, Int64 bucket, - AggregateStateType type = AggregateStateType::Normal) const; + ConvertType type = ConvertType::Normal) const; /// proton: starts. template @@ -1111,58 +1060,34 @@ class Aggregator final void serializeAggregateStates(const AggregateDataPtr & place, WriteBuffer & wb) const; void deserializeAggregateStates(AggregateDataPtr & place, ReadBuffer & rb, Arena * arena) const; - void clearDataVariants(AggregatedDataVariants & data_variants) const; - - /// @return does need abort ? - bool checkAndProcessResult(AggregatedDataVariants & result, bool & no_more_keys) const; + /// \return true means execution must be aborted, false means normal + bool checkAndProcessResult(AggregatedDataVariants & result) const; template bool executeAndRetractImpl( Method & method, Arena * aggregates_pool, + Method & retracted_method, Arena * retracted_pool, size_t row_begin, size_t row_end, ColumnRawPtrs & key_columns, AggregateFunctionInstruction * aggregate_instructions) const; - template - void mergeUpdatedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const; template - void mergeRetractedGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const; + void mergeRetractedGroupsImpl(ManyAggregatedDataVariants & aggregated_data, ManyAggregatedDataVariants & retracted_data) const; - template - void mergeRetractedIntoImpl(Method & method, Method & retracted_method, Arena * arena) const; + template + void mergeUpdateGroupsImpl(ManyAggregatedDataVariants & non_empty_data, Arena * arena) const; /// proton: ends. - Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows, bool clear_states, AggregateStateType type = AggregateStateType::Normal) const; - Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, AggregateStateType type = AggregateStateType::Normal) const; - BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, AggregateStateType type = AggregateStateType::Normal) const; + Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool clear_states, ConvertType type = ConvertType::Normal) const; + Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, ConvertType type = ConvertType::Normal) const; + BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, bool clear_states, size_t max_threads, ConvertType type = ConvertType::Normal) const; template BlocksList prepareBlocksAndFillTwoLevelImpl( - AggregatedDataVariants & data_variants, Method & method, bool final, bool clear_states, ThreadPool * thread_pool, AggregateStateType type) const; - - template - void mergeStreamsImplCase( - Block & block, - Arena * aggregates_pool, - Method & method, - Table & data, - AggregateDataPtr overflow_row) const; - - template - void mergeStreamsImpl( - Block & block, - Arena * aggregates_pool, - Method & method, - Table & data, - AggregateDataPtr overflow_row, - bool no_more_keys) const; - - void mergeWithoutKeyStreamsImpl( - Block & block, - AggregatedDataVariants & result) const; + AggregatedDataVariants & data_variants, Method & method, bool final, bool clear_states, ThreadPool * thread_pool, ConvertType type) const; template void mergeBucketImpl( @@ -1187,9 +1112,8 @@ class Aggregator final * If it is exceeded, then, depending on the group_by_overflow_mode, either * - throws an exception; * - returns false, which means that execution must be aborted; - * - sets the variable no_more_keys to true. */ - bool checkLimits(size_t result_size, bool & no_more_keys) const; + bool checkLimits(size_t result_size) const; void prepareAggregateInstructions( Columns columns, @@ -1213,9 +1137,9 @@ class Aggregator final /// Existed versions: /// STATE V1 - Legacy version (REVISION 1) /// STATE V2 - REVISION 1 (Enable revision) - /// STATE V3 - REVISION 3 (Add expanded data) + /// STATE V3 - REVISION 3 (Add updates tracking state) static constexpr UInt64 STATE_V2_MIN_REVISION = 1; - static constexpr UInt64 STATE_V3_MIN_REVISION = 3; + // static constexpr UInt64 STATE_V3_MIN_REVISION = 3; /// will enable it later VersionType getVersionFromRevision(UInt64 revision) const; VersionType getVersion() const; @@ -1224,11 +1148,9 @@ class Aggregator final void recover(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; private: - /// [Version-3] void doCheckpointV3(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const; void doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; - /// [Version-2] void doCheckpointV2(const AggregatedDataVariants & data_variants, WriteBuffer & wb) const; void doRecoverV2(AggregatedDataVariants & data_variants, ReadBuffer & rb) const; diff --git a/src/Interpreters/Streaming/UpdatesTrackingData.h b/src/Interpreters/Streaming/UpdatesTrackingData.h new file mode 100644 index 00000000000..bd44c79e3d7 --- /dev/null +++ b/src/Interpreters/Streaming/UpdatesTrackingData.h @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +using AggregateDataPtr = char *; +using ConstAggregateDataPtr = const char *; + +namespace Streaming +{ +SERDE struct TrackingUpdates +{ + static ALWAYS_INLINE TrackingUpdates & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + static ALWAYS_INLINE const TrackingUpdates & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + + static ALWAYS_INLINE bool empty(ConstAggregateDataPtr __restrict place) { return data(place).updates == 0; } + static ALWAYS_INLINE bool updated(ConstAggregateDataPtr __restrict place) { return data(place).updated_since_last_finalization; } + static ALWAYS_INLINE void setUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = true; } + static ALWAYS_INLINE void resetUpdated(AggregateDataPtr __restrict place) { data(place).updated_since_last_finalization = false; } + + static void addBatch(size_t row_begin, size_t row_end, AggregateDataPtr * places, const IColumn * delta_col) + { + if (delta_col == nullptr) + { + for (size_t i = row_begin; i < row_end; ++i) + if (places[i]) + data(places[i]).add(); + } + else + { + const auto & delta_flags = assert_cast(*delta_col).getData(); + for (size_t i = row_begin; i < row_end; ++i) + { + if (places[i]) + { + if (delta_flags[i] >= 0) + data(places[i]).add(); + else + data(places[i]).negate(); + } + } + } + } + + static void addBatchSinglePlace(size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn * delta_col) + { + if (!place) + return; + + auto & data_ex = data(place); + if (delta_col == nullptr) + data_ex.updates += row_end - row_begin; + else + { + const auto & delta_flags = assert_cast(*delta_col).getData(); + data_ex.updates = std::accumulate(delta_flags.begin(), delta_flags.end(), data_ex.updates); + } + + data_ex.updated_since_last_finalization = true; + } + + static void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & wb) + { + const auto & data_ex = data(place); + writeIntBinary(data_ex.updates, wb); + writeBinary(data_ex.updated_since_last_finalization, wb); + } + + static void deserialize(AggregateDataPtr __restrict place, ReadBuffer & rb) + { + auto & data_ex = data(place); + readIntBinary(data_ex.updates, rb); + readBinary(data_ex.updated_since_last_finalization, rb); + } + + ALWAYS_INLINE void add() + { + ++updates; + updated_since_last_finalization = true; + } + + ALWAYS_INLINE void negate() + { + --updates; + updated_since_last_finalization = true; + } + + /// Used to track if the target to be tracked has zero sum changes + UInt64 updates = 0; + + /// Used to track if the target group tracked has updates since last finalization + bool updated_since_last_finalization = true; +}; + +enum class TrackingUpdatesType : uint8_t +{ + None = 0, + Updates = 1, +}; + +} +} diff --git a/src/Processors/Transforms/Streaming/AggregatingHelper.cpp b/src/Processors/Transforms/Streaming/AggregatingHelper.cpp index 849b82b802c..c6bca59183f 100644 --- a/src/Processors/Transforms/Streaming/AggregatingHelper.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingHelper.cpp @@ -30,18 +30,12 @@ Chunk mergeBlocksToChunk(BlocksList && blocks) return merged_chunk; } -Chunk convertToChunkImpl(AggregatedDataVariants & data, const AggregatingTransformParams & params, AggregateStateType type) +Chunk convertToChunkImpl(AggregatedDataVariants & data, const AggregatingTransformParams & params) { if (data.empty()) return {}; - BlocksList blocks; - if (type == AggregateStateType::OnlyUpdated) - blocks = params.aggregator.convertUpdatedToBlocks(data); - else if (type == AggregateStateType::OnlyRetracted) - blocks = params.aggregator.convertRetractedToBlocks(data); - else - blocks = params.aggregator.convertToBlocks(data, params.final, !params.params.keep_state, params.params.max_threads); + BlocksList blocks = params.aggregator.convertToBlocks(data, params.final, params.params.max_threads); /// FIXME: When global aggr states was converted two level hash table, the merged chunk may be too large return mergeBlocksToChunk(std::move(blocks)); @@ -52,12 +46,12 @@ namespace AggregatingHelper { Chunk convertToChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params) { - return convertToChunkImpl(data, params, AggregateStateType::Normal); + return convertToChunkImpl(data, params); } Chunk mergeAndConvertToChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params) { - auto blocks = params.aggregator.mergeAndConvertToBlocks(data, params.final, !params.params.keep_state, params.params.max_threads); + auto blocks = params.aggregator.mergeAndConvertToBlocks(data, params.final, params.params.max_threads); /// FIXME: When global aggr states was converted two level hash table, the merged chunk may be too large return mergeBlocksToChunk(std::move(blocks)); } @@ -65,68 +59,51 @@ Chunk mergeAndConvertToChunk(ManyAggregatedDataVariants & data, const Aggregatin Chunk spliceAndConvertBucketsToChunk( AggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets) { - return convertToChunk(params.aggregator.spliceAndConvertBucketsToBlock(data, params.final, /*clear_states*/ false, buckets)); + return convertToChunk(params.aggregator.spliceAndConvertBucketsToBlock(data, params.final, buckets)); } Chunk mergeAndSpliceAndConvertBucketsToChunk( ManyAggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets) { - return convertToChunk(params.aggregator.mergeAndSpliceAndConvertBucketsToBlock(data, params.final, /*clear_states*/ false, buckets)); + return convertToChunk(params.aggregator.mergeAndSpliceAndConvertBucketsToBlock(data, params.final, buckets)); } -ChunkPair convertToChangelogChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params) -{ - if (data.empty()) - return {}; - - auto retracted_chunk = convertToChunkImpl(data, params, AggregateStateType::OnlyRetracted); - if (retracted_chunk) - { - auto retracted_delta_col = ColumnInt8::create(retracted_chunk.rows(), Int8(-1)); - retracted_chunk.addColumn(std::move(retracted_delta_col)); - retracted_chunk.setConsecutiveDataFlag(); - } - - auto chunk = convertToChunkImpl(data, params, AggregateStateType::OnlyUpdated); - if (chunk) - { - auto delta_col = ColumnInt8::create(chunk.rows(), Int8(1)); - chunk.addColumn(std::move(delta_col)); - } - return {std::move(retracted_chunk), std::move(chunk)}; -} - -ChunkPair mergeAndConvertToChangelogChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params) -{ - if (data.size() == 1) - return convertToChangelogChunk(*data[0], params); - - ChunkPair results; - auto & [retracted_chunk, chunk] = results; - - auto merged_retracted_data = params.aggregator.mergeRetractedGroups(data); - if (merged_retracted_data) - { - retracted_chunk = convertToChunk(*merged_retracted_data, params); - if (retracted_chunk) - { - auto retracted_delta_col = ColumnInt8::create(retracted_chunk.rows(), Int8(-1)); - retracted_chunk.addColumn(std::move(retracted_delta_col)); - retracted_chunk.setConsecutiveDataFlag(); - } - } - - auto merged_updated_data = params.aggregator.mergeUpdatedGroups(data); - if (merged_updated_data) - { - chunk = convertToChunk(*merged_updated_data, params); - if (chunk) - { - auto delta_col = ColumnInt8::create(chunk.rows(), Int8(1)); - chunk.addColumn(std::move(delta_col)); - } - } - return results; +ChunkPair + convertToChangelogChunk(AggregatedDataVariants & data, RetractedDataVariants & retracted_data, const AggregatingTransformParams & params) + { + if (data.empty()) + return {}; + + assert(!retracted_data.empty()); + + auto retracted_chunk = convertToChunk(retracted_data, params); + if (retracted_chunk) + { + auto retracted_delta_col = ColumnInt8::create(retracted_chunk.rows(), Int8(-1)); + retracted_chunk.addColumn(std::move(retracted_delta_col)); + retracted_chunk.setConsecutiveDataFlag(); + } + retracted_data.reset(); /// Clean up retract data after finalized + + auto chunk = convertToChunk(data, params); + if (chunk) + { + auto delta_col = ColumnInt8::create(chunk.rows(), Int8(1)); + chunk.addColumn(std::move(delta_col)); + } + + return {std::move(retracted_chunk), std::move(chunk)}; + } + + ChunkPair mergeAndConvertToChangelogChunk( + ManyAggregatedDataVariants & data, ManyRetractedDataVariants & retracted_data, const AggregatingTransformParams & params) + { + auto [merged_data, merged_retracted_data] = params.aggregator.mergeRetractedGroups(data, retracted_data); + if (!merged_data) + return {}; + + assert(merged_retracted_data); + return convertToChangelogChunk(*merged_data, *merged_retracted_data, params); } } } diff --git a/src/Processors/Transforms/Streaming/AggregatingHelper.h b/src/Processors/Transforms/Streaming/AggregatingHelper.h index 85b177b5b51..5ca32f6fc00 100644 --- a/src/Processors/Transforms/Streaming/AggregatingHelper.h +++ b/src/Processors/Transforms/Streaming/AggregatingHelper.h @@ -38,13 +38,16 @@ Chunk mergeAndSpliceAndConvertBucketsToChunk( ManyAggregatedDataVariants & data, const AggregatingTransformParams & params, const std::vector & buckets); /// Only used for emit changelog -/// @brief only convert the state of changed groups (retracted: last state, aggregated: current state) -/// \data: current aggregated state of all groups (contains retracted states and updated states) +/// @brief Based on new/updated groups @p retracted_data , only convert the state of changed groups (retracted: last state, aggregated: current state) +/// \data: current aggregated state of all groups +/// \retracted_data: only have last state of changed groups (i.e. new/updated/deleted) /// @returns /// retracted_chunk: just contains retracted data of changed groups /// aggregated_chunk: just contains aggregated data of changed groups -ChunkPair convertToChangelogChunk(AggregatedDataVariants & data, const AggregatingTransformParams & params); -ChunkPair mergeAndConvertToChangelogChunk(ManyAggregatedDataVariants & data, const AggregatingTransformParams & params); +ChunkPair +convertToChangelogChunk(AggregatedDataVariants & data, RetractedDataVariants & retracted_data, const AggregatingTransformParams & params); +ChunkPair mergeAndConvertToChangelogChunk( + ManyAggregatedDataVariants & data, ManyRetractedDataVariants & retracted_data, const AggregatingTransformParams & params); } } diff --git a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp index b9fa8205e75..22489fc9bc3 100644 --- a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp @@ -189,22 +189,11 @@ void AggregatingTransform::consume(Chunk chunk) std::pair AggregatingTransform::executeOrMergeColumns(Chunk & chunk, size_t num_rows) { auto columns = chunk.detachColumns(); - if (params->only_merge) - { - auto block = getInputs().front().getHeader().cloneWithColumns(columns); - materializeBlockInplace(block); - /// FIXME - /// Blocking finalization during execution on current variant - std::lock_guard lock(variants_mutex); - auto success = params->aggregator.mergeOnBlock(block, variants, no_more_keys); - return {!success, false}; - } - else - { - /// Blocking finalization during execution on current variant - std::lock_guard lock(variants_mutex); - return params->aggregator.executeOnBlock(std::move(columns), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); - } + assert(!params->only_merge && !no_more_keys); + + /// Blocking finalization during execution on current variant + std::lock_guard lock(variants_mutex); + return params->aggregator.executeOnBlock(std::move(columns), 0, num_rows, variants, key_columns, aggregate_columns); } void AggregatingTransform::emitVersion(Chunk & chunk) diff --git a/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp b/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp index fd1cda27554..208549bb6f4 100644 --- a/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingTransformWithSubstream.cpp @@ -226,10 +226,10 @@ std::pair AggregatingTransformWithSubstream::executeOrMergeColumns(C /// according to partition keys auto num_rows = chunk.getNumRows(); - assert(!params->only_merge); + assert(!params->only_merge && !no_more_keys); return params->aggregator.executeOnBlock( - chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns, no_more_keys); + chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns); } SubstreamContextPtr AggregatingTransformWithSubstream::getOrCreateSubstreamContext(const SubstreamID & id) @@ -270,7 +270,7 @@ void AggregatingTransformWithSubstream::checkpoint(CheckpointContextPtr ckpt_ctx for (const auto & [id, substream_ctx] : substream_contexts) { assert(id == substream_ctx->id); - serialize(*substream_ctx, wb, getVersion()); + substream_ctx->serialize(wb, getVersion()); } }); } @@ -284,7 +284,7 @@ void AggregatingTransformWithSubstream::recover(CheckpointContextPtr ckpt_ctx) for (size_t i = 0; i < num_substreams; ++i) { auto substream_ctx = std::make_shared(this); - deserialize(*substream_ctx, rb, version_); + substream_ctx->deserialize(rb, version_); substream_contexts.emplace(substream_ctx->id, std::move(substream_ctx)); } }); @@ -294,7 +294,7 @@ void SubstreamContext::serialize(WriteBuffer & wb, VersionType version) const { DB::Streaming::serialize(id, wb); - DB::serialize(variants, wb, aggregating_transform->params->aggregator); + variants.serialize(wb, aggregating_transform->params->aggregator); DB::writeIntBinary(finalized_watermark, wb); @@ -312,7 +312,7 @@ void SubstreamContext::deserialize(ReadBuffer & rb, VersionType version) { DB::Streaming::deserialize(id, rb); - DB::deserialize(variants, rb, aggregating_transform->params->aggregator); + variants.deserialize(rb, aggregating_transform->params->aggregator); DB::readIntBinary(finalized_watermark, rb); diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp index 3049e4bebce..365c5621a37 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.cpp @@ -9,7 +9,6 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int UNSUPPORTED; -extern const int RECOVER_CHECKPOINT_FAILED; } namespace Streaming @@ -41,58 +40,35 @@ GlobalAggregatingTransform::GlobalAggregatingTransform( if (unlikely(params->params.overflow_row)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Overflow row processing is not implemented in global aggregation"); + /// Need extra retracted data if (params->emit_changelog) { if (params->emit_version) throw Exception(ErrorCodes::UNSUPPORTED, "'emit_version()' is not supported in global aggregation emit changelog"); - bool retract_enabled = false; + ManyRetractedDataVariants retracted_data(many_data->variants.size()); + for (auto & elem : retracted_data) + elem = std::make_shared(); + many_data->setField( - {retract_enabled, + {std::move(retracted_data), /// Field serializer - [](const std::any & field, WriteBuffer & wb, [[maybe_unused]] VersionType version) { - assert(version >= IMPL_V2_MIN_VERSION); - DB::writeBoolText(std::any_cast(field), wb); + [this](const std::any & field, WriteBuffer & wb, VersionType) { + const auto & data = std::any_cast(field); + DB::writeIntBinary(data.size(), wb); + for (const auto & elem : data) + elem->serialize(wb, params->aggregator); }, /// Field deserializer - [this](std::any & field, ReadBuffer & rb, VersionType version) { - if (version >= IMPL_V2_MIN_VERSION) - { - DB::readBoolText(std::any_cast(field), rb); - } - else + [this](std::any & field, ReadBuffer & rb, VersionType) { + auto & data = std::any_cast(field); + size_t num; + DB::readIntBinary(num, rb); + data.resize(num); + for (auto & elem : data) { - /// Convert old impl to new impl V2 - if (params->aggregator.expandedDataType() != ExpandedDataType::UpdatedWithRetracted) - throw Exception( - ErrorCodes::RECOVER_CHECKPOINT_FAILED, - "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint, checkpointed need retracted, " - "but " - "current not need", - version); - - size_t retracted_num; - DB::readIntBinary(retracted_num, rb); - if (retracted_num != many_data->variants.size()) - throw Exception( - ErrorCodes::RECOVER_CHECKPOINT_FAILED, - "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint but the scale of the pipeline " - "is " - "inconsistent, checkpointed={}, current={}", - version, - retracted_num, - many_data->variants.size()); - - bool has_retracted = false; - for (auto & current : many_data->variants) - { - AggregatedDataVariants retracted; - DB::deserialize(retracted, rb, params->aggregator); - has_retracted |= retracted.size() > 0; - params->aggregator.mergeRetractedInto(*current, std::move(retracted)); - } - - std::any_cast(field) = many_data->emited_version > 0 || has_retracted; /// retracted enabled + elem = std::make_shared(); + elem->deserialize(rb, params->aggregator); } }}); } @@ -126,18 +102,15 @@ std::pair GlobalAggregatingTransform::executeOrMergeColumns(Chunk & { if (params->emit_changelog) { - assert(!params->only_merge); + assert(!params->only_merge && !no_more_keys); + + auto & retracted_variants = many_data->getField()[current_variant]; + auto & aggregated_variants = many_data->variants[current_variant]; + /// Blocking finalization during execution on current variant std::lock_guard lock(variants_mutex); - - /// Enable retract after first finalization - auto retract_enabled = many_data->getField(); - if (retract_enabled) [[likely]] - return params->aggregator.executeAndRetractOnBlock( - chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); - else - return params->aggregator.executeOnBlock( - chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); + return params->aggregator.executeAndRetractOnBlock( + chunk.detachColumns(), 0, num_rows, *aggregated_variants, *retracted_variants, key_columns, aggregate_columns); } else return AggregatingTransform::executeOrMergeColumns(chunk, num_rows); @@ -154,9 +127,8 @@ void GlobalAggregatingTransform::finalize(const ChunkContextPtr & chunk_ctx) if (params->emit_changelog) { - auto [retracted_chunk, chunk] = AggregatingHelper::mergeAndConvertToChangelogChunk(many_data->variants, *params); - /// Enable retract after first finalization - many_data->getField() |= chunk.rows(); + auto [retracted_chunk, chunk] = AggregatingHelper::mergeAndConvertToChangelogChunk( + many_data->variants, many_data->getField(), *params); chunk.setChunkContext(chunk_ctx); setCurrentChunk(std::move(chunk), std::move(retracted_chunk)); diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h index 975fe4e115f..474824e1977 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransform.h @@ -28,8 +28,6 @@ class GlobalAggregatingTransform final : public AggregatingTransform bool prepareFinalization(Int64 min_watermark) override; void finalize(const ChunkContextPtr & chunk_ctx) override; - - static constexpr VersionType IMPL_V2_MIN_VERSION = 3; }; } diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp index d59f40c2199..b682e02ea85 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.cpp @@ -9,7 +9,6 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int UNSUPPORTED; -extern const int RECOVER_CHECKPOINT_FAILED; } namespace Streaming @@ -29,41 +28,19 @@ GlobalAggregatingTransformWithSubstream::GlobalAggregatingTransformWithSubstream SubstreamContextPtr GlobalAggregatingTransformWithSubstream::getOrCreateSubstreamContext(const SubstreamID & id) { auto substream_ctx = AggregatingTransformWithSubstream::getOrCreateSubstreamContext(id); - /// Need extra retracted data for old version impl if (params->emit_changelog && !substream_ctx->hasField()) { - bool retract_enabled = false; substream_ctx->setField( - {retract_enabled, + {std::make_shared(), /// Field serializer - [](const std::any & field, WriteBuffer & wb, VersionType version) { - assert(version >= IMPL_V2_MIN_VERSION); - DB::writeBoolText(std::any_cast(field), wb); + [this](const std::any & field, WriteBuffer & wb, VersionType) { + const auto & data = std::any_cast(field); + data->serialize(wb, params->aggregator); }, /// Field deserializer - [substream_ctx, this](std::any & field, ReadBuffer & rb, VersionType version) { - if (version >= IMPL_V2_MIN_VERSION) - { - DB::readBoolText(std::any_cast(field), rb); - } - else - { - /// Convert old impl to new impl V2 - if (params->aggregator.expandedDataType() != ExpandedDataType::UpdatedWithRetracted) - throw Exception( - ErrorCodes::RECOVER_CHECKPOINT_FAILED, - "Failed to recover aggregation checkpoint. Recover old version '{}' checkpoint, checkpointed need retracted, " - "but " - "current not need", - version); - - AggregatedDataVariants retracted; - DB::deserialize(retracted, rb, params->aggregator); - bool has_retracted = retracted.size() > 0; - params->aggregator.mergeRetractedInto(substream_ctx->variants, std::move(retracted)); - - std::any_cast(field) = substream_ctx->emited_version > 0 || has_retracted; /// retracted enabled - } + [this](std::any & field, ReadBuffer & rb, VersionType) { + auto & data = std::any_cast(field); + data->deserialize(rb, params->aggregator); }}); } return substream_ctx; @@ -74,15 +51,14 @@ GlobalAggregatingTransformWithSubstream::executeOrMergeColumns(Chunk & chunk, co { if (params->emit_changelog) { - assert(!params->only_merge); + assert(!params->only_merge && !no_more_keys); + auto num_rows = chunk.getNumRows(); - auto retract_enabled = substream_ctx->getField(); - if (retract_enabled) [[likely]] - return params->aggregator.executeAndRetractOnBlock( - chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns, no_more_keys); - else - return params->aggregator.executeOnBlock( - chunk.detachColumns(), 0, num_rows, substream_ctx->variants, key_columns, aggregate_columns, no_more_keys); + auto & retracted_variants = substream_ctx->getField(); + auto & aggregated_variants = substream_ctx->variants; + + return params->aggregator.executeAndRetractOnBlock( + chunk.detachColumns(), 0, num_rows, aggregated_variants, *retracted_variants, key_columns, aggregate_columns); } else return AggregatingTransformWithSubstream::executeOrMergeColumns(chunk, substream_ctx); @@ -111,10 +87,8 @@ void GlobalAggregatingTransformWithSubstream::finalize(const SubstreamContextPtr auto start = MonotonicMilliseconds::now(); if (params->emit_changelog) { - auto [retracted_chunk, chunk] = AggregatingHelper::convertToChangelogChunk(variants, *params); - /// Enable retract after first finalization - substream_ctx->getField() |= chunk.rows(); - + auto [retracted_chunk, chunk] + = AggregatingHelper::convertToChangelogChunk(variants, *substream_ctx->getField(), *params); chunk.setChunkContext(chunk_ctx); setCurrentChunk(std::move(chunk), std::move(retracted_chunk)); } diff --git a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h index 72bc161bf7c..27c69ba6ac5 100644 --- a/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h +++ b/src/Processors/Transforms/Streaming/GlobalAggregatingTransformWithSubstream.h @@ -21,8 +21,6 @@ class GlobalAggregatingTransformWithSubstream final : public AggregatingTransfor private: void finalize(const SubstreamContextPtr & substream_ctx, const ChunkContextPtr & chunk_ctx) override; - - static constexpr VersionType IMPL_V2_MIN_VERSION = 3; }; } From 7a78f20e1dd244775b2d2f92fb8da74c2510bfcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisen=20=E6=9D=A8?= Date: Sat, 3 Feb 2024 15:13:44 +0800 Subject: [PATCH 5/5] fix comments 2 * rename UpdatesTrackingData.h to TrackingUpdatesData.h * use temp arena instread of shared ptr of arena --- src/Common/serde.h | 44 ------------------ src/Interpreters/Streaming/Aggregator.cpp | 45 ++++++++----------- src/Interpreters/Streaming/Aggregator.h | 5 ++- src/Interpreters/Streaming/HashJoin.cpp | 32 ++++++------- ...esTrackingData.h => TrackingUpdatesData.h} | 0 src/Interpreters/Streaming/joinData.cpp | 12 ++--- .../tests/gtest_streaming_hash_join.cpp | 6 +-- .../Streaming/AggregatingTransform.cpp | 4 +- .../Streaming/ChangelogConvertTransform.cpp | 8 ++-- 9 files changed, 53 insertions(+), 103 deletions(-) rename src/Interpreters/Streaming/{UpdatesTrackingData.h => TrackingUpdatesData.h} (100%) diff --git a/src/Common/serde.h b/src/Common/serde.h index d6e51e17dc0..b6bfa951081 100644 --- a/src/Common/serde.h +++ b/src/Common/serde.h @@ -1,53 +1,9 @@ #pragma once -#include -#include #include namespace DB { -/// REQUIRES: The object must support versioned serialization/deserialization -template -concept VersionedSerializable - = requires(const S & s, WB & wb, VersionType version, Args &&... args) { s.serialize(wb, version, std::forward(args)...); }; - -template -concept VersionedDeserializable - = requires(S & s, RB & rb, VersionType version, Args &&... args) { s.deserialize(rb, version, std::forward(args)...); }; - -template S> -void ALWAYS_INLINE serialize(const S & s, WB & wb, VersionType version, Args &&... args) -{ - s.serialize(wb, version, std::forward(args)...); -} - -template S> -void ALWAYS_INLINE deserialize(S & s, RB & rb, VersionType version, Args &&... args) -{ - s.deserialize(rb, version, std::forward(args)...); -} - -/// With owned versions -template -concept Serializable - = requires(const S & s, WB & wb, Args &&... args) { s.serialize(wb, std::forward(args)...); }; - -template -concept Deserializable - = requires(S & s, RB & rb, Args &&... args) { s.deserialize(rb, std::forward(args)...); }; - -template S> -void ALWAYS_INLINE serialize(const S & s, WB & wb, Args &&... args) -{ - s.serialize(wb, std::forward(args)...); -} - -template S> -void ALWAYS_INLINE deserialize(S & s, RB & rb, Args &&... args) -{ - s.deserialize(rb, std::forward(args)...); -} - /// macro tag to indicate the data members or struct or class will /// be serialized / deserialized via network or file system IO. /// Hence, data structure versioning / backward / forward compatibility diff --git a/src/Interpreters/Streaming/Aggregator.cpp b/src/Interpreters/Streaming/Aggregator.cpp index d82dc8b1f8e..8de5a063791 100644 --- a/src/Interpreters/Streaming/Aggregator.cpp +++ b/src/Interpreters/Streaming/Aggregator.cpp @@ -109,8 +109,9 @@ template BlocksList convertBucketsInParallel(ThreadPool * thread_pool, const std::vector & buckets, BucketConverter && bucket_converter) { std::atomic next_bucket_idx_to_merge = 0; - auto converter = [&](Arena * pool, const std::atomic_flag * cancelled) { + auto converter = [&](const std::atomic_flag * cancelled) { BlocksList blocks; + Arena arena; while (true) { if (cancelled && cancelled->test()) @@ -121,7 +122,7 @@ BlocksList convertBucketsInParallel(ThreadPool * thread_pool, const std::vector< break; auto bucket = buckets[bucket_idx]; - blocks.splice(blocks.end(), bucket_converter(bucket, pool)); + blocks.splice(blocks.end(), bucket_converter(bucket, &arena)); } return blocks; }; @@ -129,16 +130,10 @@ BlocksList convertBucketsInParallel(ThreadPool * thread_pool, const std::vector< size_t num_threads = thread_pool ? std::min(thread_pool->getMaxThreads(), buckets.size()) : 1; if (num_threads <= 1) { - auto arena = std::make_shared(); - return converter(arena.get(), nullptr); + return converter(nullptr); } /// Process in parallel - Arenas pools; - pools.reserve(num_threads); - for (size_t i = pools.size(); i < num_threads; ++i) - pools.push_back(std::make_shared()); - auto results = std::make_shared>(); results->resize(num_threads); thread_pool->setMaxThreads(num_threads); @@ -148,10 +143,10 @@ BlocksList convertBucketsInParallel(ThreadPool * thread_pool, const std::vector< for (size_t thread_id = 0; thread_id < num_threads; ++thread_id) { - thread_pool->scheduleOrThrowOnError([&pools, thread_id, group = CurrentThread::getGroup(), results, &converter, &cancelled] { + thread_pool->scheduleOrThrowOnError([thread_id, group = CurrentThread::getGroup(), results, &converter, &cancelled] { CurrentThread::attachToIfDetached(group); SCOPE_EXIT_SAFE( CurrentThread::detachQueryIfNotDetached() ); - (*results)[thread_id] = converter(pools[thread_id].get(), &cancelled); + (*results)[thread_id] = converter(&cancelled); }); } @@ -857,7 +852,7 @@ template [&](AggregateDataPtr & aggregate_data) { auto data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(data, /*prefix_with_updates_tracking_state*/ false); + createAggregateStates(data, /*prefix_with_updates_tracking_state=*/ false); aggregate_data = data; }, state.getKeyData(), @@ -1483,7 +1478,7 @@ Block NO_INLINE Aggregator::convertToBlockImplFinal( PaddedPODArray places; places.reserve(rows); - bool only_updates = (type == ConvertType::OnlyUpdates); + bool only_updates = (type == ConvertType::Updates); data.forEachValue([&](const auto & key, auto & mapped) { @@ -1617,11 +1612,11 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va assert(data_variants.type == AggregatedDataVariants::Type::without_key); - if (type == ConvertType::OnlyUpdates && !TrackingUpdates::updated(data_variants.without_key)) + if (type == ConvertType::Updates && !TrackingUpdates::updated(data_variants.without_key)) return res_header.cloneEmpty(); AggregatedDataWithoutKey & data = [&]() -> AggregateDataPtr & { - if (type == ConvertType::OnlyUpdates) + if (type == ConvertType::Updates) { TrackingUpdates::resetUpdated(data_variants.without_key); return data_variants.without_key; @@ -1692,7 +1687,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( { return convertBucketsInParallel(thread_pool, method.data.buckets(), [&](Int64 bucket, Arena * arena) -> BlocksList { /// Skip no changed bucket if only updated is requested - if (type == ConvertType::OnlyUpdates && !method.data.isBucketUpdated(bucket)) + if (type == ConvertType::Updates && !method.data.isBucketUpdated(bucket)) return {}; return {convertOneBucketToBlockImpl(data_variants, method, arena, final, clear_states, bucket, type)}; @@ -2331,8 +2326,7 @@ void Aggregator::checkpoint(const AggregatedDataVariants & data_variants, WriteB if (version <= 1) return const_cast(this)->doCheckpointLegacy(data_variants, wb); - - if (version <= 2) + else if (version <= 2) return doCheckpointV2(data_variants, wb); else return doCheckpointV3(data_variants, wb); @@ -2351,8 +2345,7 @@ void Aggregator::recover(AggregatedDataVariants & data_variants, ReadBuffer & rb /// FIXME: Legacy layout needs to be cleaned after no use if (recovered_version <= 1) return const_cast(this)->doRecoverLegacy(data_variants, rb); - - if (recovered_version <= 2) + else if (recovered_version <= 2) return doRecoverV2(data_variants, rb); else return doRecoverV3(data_variants, rb); @@ -3220,7 +3213,7 @@ void Aggregator::doCheckpointV3(const AggregatedDataVariants & data_variants, Wr /// 1) Without key: [uint8][uint16][aggr-func-state-without-key] /// 2) Otherwise: [uint8][uint16][aggr-func-state-for-overflow-row][is_two_level][aggr-func-state-in-hash-map] bool inited = !data_variants.empty(); - writeBoolText(inited, wb); + writeBinary(inited, wb); if (!inited) return; /// No aggregated data yet @@ -3259,7 +3252,7 @@ void Aggregator::doCheckpointV3(const AggregatedDataVariants & data_variants, Wr void Aggregator::doRecoverV3(AggregatedDataVariants & data_variants, ReadBuffer & rb) const { bool inited = !data_variants.empty(); - readBoolText(inited, rb); + readBinary(inited, rb); if (!inited) return; @@ -3393,11 +3386,11 @@ BlocksList Aggregator::convertUpdatesToBlocks(AggregatedDataVariants & data_vari constexpr bool final = true; constexpr bool clear_states = false; if (data_variants.type == AggregatedDataVariants::Type::without_key) - blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, clear_states, ConvertType::OnlyUpdates)); + blocks.emplace_back(prepareBlockAndFillWithoutKey(data_variants, final, clear_states, ConvertType::Updates)); else if (!data_variants.isTwoLevel()) - blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, ConvertType::OnlyUpdates)); + blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final, clear_states, ConvertType::Updates)); else - blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, ConvertType::OnlyUpdates)); + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, clear_states, /*max_threads*/ 1, ConvertType::Updates)); size_t rows = 0; size_t bytes = 0; @@ -3457,7 +3450,7 @@ void NO_INLINE Aggregator::mergeUpdateGroupsImpl(ManyAggregatedDataVariants & no auto & dst = dst_it->getMapped(); dst = nullptr; /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. auto aggregate_data = arena->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data, /*prefix_with_updates_tracking_state*/ false); + createAggregateStates(aggregate_data, /*prefix_with_updates_tracking_state=*/ false); dst = aggregate_data; } }; diff --git a/src/Interpreters/Streaming/Aggregator.h b/src/Interpreters/Streaming/Aggregator.h index 32b1a2e141d..37aeac85bfe 100644 --- a/src/Interpreters/Streaming/Aggregator.h +++ b/src/Interpreters/Streaming/Aggregator.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -79,7 +79,7 @@ namespace Streaming enum class ConvertType : uint8_t { Normal = 0, - OnlyUpdates = 1, + Updates = 1, }; /// using TimeBucketAggregatedDataWithUInt16Key = TimeBucketHashMap>; @@ -376,6 +376,7 @@ SERDE struct AggregatedDataVariants : private boost::noncopyable { aggregates_pools = Arenas(1, std::make_shared()); aggregates_pool = aggregates_pools.back().get(); + /// Enable GC for arena by default. For cases like global aggregation, we will disable it further in \init aggregates_pool->enableRecycle(true); } diff --git a/src/Interpreters/Streaming/HashJoin.cpp b/src/Interpreters/Streaming/HashJoin.cpp index 108cc609cc1..91da977f3eb 100644 --- a/src/Interpreters/Streaming/HashJoin.cpp +++ b/src/Interpreters/Streaming/HashJoin.cpp @@ -2546,9 +2546,9 @@ void HashJoin::serialize(WriteBuffer & wb, VersionType version) const /// Part-4: Buffered data of left/right join stream if (bidirectional_hash_join) - DB::serialize(left_data, wb, version); + left_data.serialize(wb, version); - DB::serialize(right_data, wb, version); + right_data.serialize(wb, version); /// Part-5: Asof type (Optional) bool need_asof = streaming_strictness == Strictness::Range || streaming_strictness == Strictness::Asof; @@ -2564,12 +2564,12 @@ void HashJoin::serialize(WriteBuffer & wb, VersionType version) const if (join_results.has_value()) { assert(retract_push_down && emit_changelog); - DB::serialize(*join_results, wb, version, *this); + join_results->serialize(wb, version, *this); } /// Part-7: Others DB::writeIntBinary(combined_watermark.load(), wb); - DB::serialize(join_metrics, wb, version); + join_metrics.serialize(wb, version); } void HashJoin::deserialize(ReadBuffer & rb, VersionType version) @@ -2664,9 +2664,9 @@ void HashJoin::deserialize(ReadBuffer & rb, VersionType version) /// Part-4: Buffered data of left/right join stream if (bidirectional_hash_join) - DB::deserialize(left_data, rb, version); + left_data.deserialize(rb, version); - DB::deserialize(right_data, rb, version); + right_data.deserialize(rb, version); /// Part-5: Asof type (Optional) bool need_asof = streaming_strictness == Strictness::Range || streaming_strictness == Strictness::Asof; @@ -2705,7 +2705,7 @@ void HashJoin::deserialize(ReadBuffer & rb, VersionType version) join_results.has_value()); assert(retract_push_down && emit_changelog); - DB::deserialize(*join_results, rb, version, *this); + join_results->deserialize(rb, version, *this); } /// Part-7: Others @@ -2713,7 +2713,7 @@ void HashJoin::deserialize(ReadBuffer & rb, VersionType version) DB::readIntBinary(recovered_combined_watermark, rb); combined_watermark = recovered_combined_watermark; - DB::deserialize(join_metrics, rb, version); + join_metrics.deserialize(rb, version); } void HashJoin::JoinResults::serialize(WriteBuffer & wb, VersionType version, const HashJoin & join) const @@ -2723,7 +2723,7 @@ void HashJoin::JoinResults::serialize(WriteBuffer & wb, VersionType version, con serializeHashJoinMapsVariants(blocks, *maps, wb, version, sample_block, join); if (version <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::serialize(metrics, wb, version); + metrics.serialize(wb, version); } void HashJoin::JoinResults::deserialize(ReadBuffer & rb, VersionType version, const HashJoin & join) @@ -2733,7 +2733,7 @@ void HashJoin::JoinResults::deserialize(ReadBuffer & rb, VersionType version, co deserializeHashJoinMapsVariants(blocks, *maps, rb, version, pool, sample_block, join); if (version <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::deserialize(metrics, rb, version); + metrics.deserialize(rb, version); } void HashJoin::JoinData::serialize(WriteBuffer & wb, VersionType version) const @@ -2748,7 +2748,7 @@ void HashJoin::JoinData::serialize(WriteBuffer & wb, VersionType version) const if (has_primary_key_hash_table) { SerializedRowRefListMultipleToIndices serialized_row_ref_list_multiple_to_indices; - DB::serialize(*buffered_data, wb, version, &serialized_row_ref_list_multiple_to_indices); + buffered_data->serialize(wb, version, &serialized_row_ref_list_multiple_to_indices); primary_key_hash_table->map.serialize( /*MappedSerializer*/ @@ -2758,7 +2758,7 @@ void HashJoin::JoinData::serialize(WriteBuffer & wb, VersionType version) const wb); } else - DB::serialize(*buffered_data, wb, version, nullptr); + buffered_data->serialize(wb, version, nullptr); } void HashJoin::JoinData::deserialize(ReadBuffer & rb, VersionType version) @@ -2789,7 +2789,7 @@ void HashJoin::JoinData::deserialize(ReadBuffer & rb, VersionType version) if (has_primary_key_hash_table) { DeserializedIndicesToRowRefListMultiple deserialized_indices_to_multiple_ref; - DB::deserialize(*buffered_data, rb, version, &deserialized_indices_to_multiple_ref); + buffered_data->deserialize(rb, version, &deserialized_indices_to_multiple_ref); primary_key_hash_table->map.deserialize( /*MappedDeserializer*/ @@ -2801,7 +2801,7 @@ void HashJoin::JoinData::deserialize(ReadBuffer & rb, VersionType version) rb); } else - DB::deserialize(*buffered_data, rb, version, nullptr); + buffered_data->deserialize(rb, version, nullptr); } void HashJoin::JoinGlobalMetrics::serialize(WriteBuffer & wb, VersionType) const @@ -2828,7 +2828,7 @@ void serializeHashJoinMapsVariants( SerializedRowRefListMultipleToIndices * serialized_row_ref_list_multiple_to_indices) { SerializedBlocksToIndices serialized_blocks_to_indices; - DB::serialize(blocks, wb, version, header, &serialized_blocks_to_indices); + blocks.serialize(wb, version, header, &serialized_blocks_to_indices); assert(maps.map_variants.size() >= 1); DB::writeIntBinary(static_cast(maps.map_variants.size()), wb); @@ -2883,7 +2883,7 @@ void deserializeHashJoinMapsVariants( DeserializedIndicesToRowRefListMultiple * deserialized_indices_to_multiple_ref) { DeserializedIndicesToBlocks deserialized_indices_to_blocks; - DB::deserialize(blocks, rb, version, header, &deserialized_indices_to_blocks); + blocks.deserialize(rb, version, header, &deserialized_indices_to_blocks); UInt16 maps_size; DB::readIntBinary(maps_size, rb); diff --git a/src/Interpreters/Streaming/UpdatesTrackingData.h b/src/Interpreters/Streaming/TrackingUpdatesData.h similarity index 100% rename from src/Interpreters/Streaming/UpdatesTrackingData.h rename to src/Interpreters/Streaming/TrackingUpdatesData.h diff --git a/src/Interpreters/Streaming/joinData.cpp b/src/Interpreters/Streaming/joinData.cpp index 7896803d732..9c2853a3d31 100644 --- a/src/Interpreters/Streaming/joinData.cpp +++ b/src/Interpreters/Streaming/joinData.cpp @@ -326,18 +326,18 @@ void BufferedStreamData::serialize( DB::writeIntBinary(block_id, wb); assert(current_hash_blocks); - DB::serialize(*current_hash_blocks, wb, version, sample_block, *join, serialized_row_ref_list_multiple_to_indices); + current_hash_blocks->serialize(wb, version, sample_block, *join, serialized_row_ref_list_multiple_to_indices); DB::writeIntBinary(static_cast(range_bucket_hash_blocks.size()), wb); for (const auto & [bucket, hash_blocks] : range_bucket_hash_blocks) { DB::writeIntBinary(bucket, wb); assert(hash_blocks); - DB::serialize(*hash_blocks, wb, version, sample_block, *join, serialized_row_ref_list_multiple_to_indices); + hash_blocks->serialize(wb, version, sample_block, *join, serialized_row_ref_list_multiple_to_indices); } if (version <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::serialize(metrics, wb, version); + metrics.serialize(wb, version); } void BufferedStreamData::deserialize( @@ -358,7 +358,7 @@ void BufferedStreamData::deserialize( DB::readIntBinary(block_id, rb); assert(current_hash_blocks); - DB::deserialize(*current_hash_blocks, rb, version, sample_block, *join, deserialized_indices_to_row_ref_list_multiple); + current_hash_blocks->deserialize(rb, version, sample_block, *join, deserialized_indices_to_row_ref_list_multiple); UInt32 size; Int64 bucket; @@ -370,11 +370,11 @@ void BufferedStreamData::deserialize( assert(inserted); /// Init hash table join->initHashMaps(iter->second->maps->map_variants); - DB::deserialize(*iter->second, rb, version, sample_block, *join, deserialized_indices_to_row_ref_list_multiple); + iter->second->deserialize(rb, version, sample_block, *join, deserialized_indices_to_row_ref_list_multiple); } if (version <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::deserialize(metrics, rb, version); + metrics.deserialize(rb, version); } HashBlocksPtr BufferedStreamData::newHashBlocks() diff --git a/src/Interpreters/Streaming/tests/gtest_streaming_hash_join.cpp b/src/Interpreters/Streaming/tests/gtest_streaming_hash_join.cpp index 244eb64a6a6..7b0fdefedfe 100644 --- a/src/Interpreters/Streaming/tests/gtest_streaming_hash_join.cpp +++ b/src/Interpreters/Streaming/tests/gtest_streaming_hash_join.cpp @@ -258,14 +258,14 @@ std::shared_ptr initHashJoin( void serdeAndCheck(const Streaming::HashJoin & join, Streaming::HashJoin & recovered_join, std::string_view msg) { WriteBufferFromOwnString wb; - DB::serialize(join, wb, ProtonRevision::getVersionRevision()); + join.serialize(wb, ProtonRevision::getVersionRevision()); auto original_string = wb.str(); ReadBufferFromOwnString rb(original_string); - DB::deserialize(recovered_join, rb, ProtonRevision::getVersionRevision()); + recovered_join.deserialize(rb, ProtonRevision::getVersionRevision()); WriteBufferFromOwnString wb2; - DB::serialize(recovered_join, wb2, ProtonRevision::getVersionRevision()); + recovered_join.serialize(wb2, ProtonRevision::getVersionRevision()); auto recovered_string = wb2.str(); ASSERT_EQ(original_string, recovered_string) << msg << ": (FAILED)\n"; diff --git a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp index 22489fc9bc3..1bd1c496237 100644 --- a/src/Processors/Transforms/Streaming/AggregatingTransform.cpp +++ b/src/Processors/Transforms/Streaming/AggregatingTransform.cpp @@ -487,7 +487,7 @@ void AggregatingTransform::checkpoint(CheckpointContextPtr ckpt_ctx) } /// Serializing no shared data - DB::serialize(variants, wb, params->aggregator); + variants.serialize(wb, params->aggregator); DB::writeIntBinary(watermark, wb); @@ -543,7 +543,7 @@ void AggregatingTransform::recover(CheckpointContextPtr ckpt_ctx) } /// Serializing local or stable data during checkpointing - DB::deserialize(variants, rb, params->aggregator); + variants.deserialize(rb, params->aggregator); DB::readIntBinary(watermark, rb); diff --git a/src/Processors/Transforms/Streaming/ChangelogConvertTransform.cpp b/src/Processors/Transforms/Streaming/ChangelogConvertTransform.cpp index 2b9f8ceb183..7521957516b 100644 --- a/src/Processors/Transforms/Streaming/ChangelogConvertTransform.cpp +++ b/src/Processors/Transforms/Streaming/ChangelogConvertTransform.cpp @@ -407,7 +407,7 @@ void ChangelogConvertTransform::checkpoint(CheckpointContextPtr ckpt_ctx) { ckpt_ctx->coordinator->checkpoint(getVersion(), getLogicID(), ckpt_ctx, [this](WriteBuffer & wb) { SerializedBlocksToIndices serialized_blocks_to_indices; - DB::serialize(source_chunks, wb, getVersion(), getInputs().front().getHeader(), &serialized_blocks_to_indices); + source_chunks.serialize(wb, getVersion(), getInputs().front().getHeader(), &serialized_blocks_to_indices); index.serialize( /*MappedSerializer*/ @@ -420,7 +420,7 @@ void ChangelogConvertTransform::checkpoint(CheckpointContextPtr ckpt_ctx) DB::writeIntBinary(late_rows, wb); if (version <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::serialize(cached_block_metrics, wb, getVersion()); + cached_block_metrics.serialize(wb, getVersion()); }); } @@ -428,7 +428,7 @@ void ChangelogConvertTransform::recover(CheckpointContextPtr ckpt_ctx) { ckpt_ctx->coordinator->recover(getLogicID(), ckpt_ctx, [this](VersionType version_, ReadBuffer & rb) { DeserializedIndicesToBlocks deserialized_indices_to_blocks; - DB::deserialize(source_chunks, rb, version_, getInputs().front().getHeader(), &deserialized_indices_to_blocks); + source_chunks.deserialize(rb, version_, getInputs().front().getHeader(), &deserialized_indices_to_blocks); index.deserialize( /*MappedDeserializer*/ @@ -442,7 +442,7 @@ void ChangelogConvertTransform::recover(CheckpointContextPtr ckpt_ctx) DB::readIntBinary(late_rows, rb); if (version_ <= CachedBlockMetrics::SERDE_REQUIRED_MAX_VERSION) - DB::deserialize(cached_block_metrics, rb, version_); + cached_block_metrics.deserialize(rb, version_); }); } }