From c472e5b79f10046faea99ce861ef9fc14ac327d7 Mon Sep 17 00:00:00 2001 From: Stas Klinov Date: Thu, 6 Jul 2023 23:00:11 +0200 Subject: [PATCH] fixed crash at count distinct with multiple indexes; fixed #3635; added regressions to test 444 --- src/distinct.cpp | 91 ++++++++++++++++++++++++++++++----------- src/distinct.h | 69 ++++++------------------------- src/sphinxsort.cpp | 1 + test/test_444/model.bin | 2 +- test/test_444/test.xml | 55 +++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 82 deletions(-) diff --git a/src/distinct.cpp b/src/distinct.cpp index f2ed732975..d37d482f8f 100644 --- a/src/distinct.cpp +++ b/src/distinct.cpp @@ -205,7 +205,7 @@ void UniqHLLTraits_c::ConvertToHash ( Container_t & tContainer ) for ( const auto & i : *tContainer.m_pArray ) pHash->Add(i); - m_dUnusedArray.Add ( tContainer.m_pArray ); + FreeContainer ( tContainer ); tContainer.m_pHash = pHash; tContainer.m_iHashIdx = 0; @@ -221,8 +221,7 @@ void UniqHLLTraits_c::ConvertToHLLDensePacked ( Container_t & tContainer ) assert ( pHLL && tContainer.m_pHash ); CopyHashToHLL ( *pHLL, *tContainer.m_pHash ); - - m_dUnusedHashes[tContainer.m_iHashIdx].Add ( tContainer.m_pHash ); + FreeContainer ( tContainer ); tContainer.m_pHLLDensePacked = pHLL; tContainer.m_eType = ContainterType_e::HLL_DENSE_PACKED; @@ -237,8 +236,7 @@ void UniqHLLTraits_c::ConvertToHLLDenseNonPacked ( Container_t & tContainer ) assert ( pHLL && tContainer.m_pHash ); CopyHashToHLL ( *pHLL, *tContainer.m_pHash ); - - m_dUnusedHashes[tContainer.m_iHashIdx].Add ( tContainer.m_pHash ); + FreeContainer ( tContainer ); tContainer.m_pHLLDenseNonPacked = pHLL; tContainer.m_eType = ContainterType_e::HLL_DENSE_NONPACKED; @@ -302,6 +300,64 @@ void UniqHLLTraits_c::SetAccuracy ( int iAccuracy ) m_dUnusedHashes.Resize ( sphLog2 ( m_iMaxHashSize ) - sphLog2const ( MIN_HASH_SIZE ) + 1 ); } +UniqHLLTraits_c::Container_t & UniqHLL_c::Get ( SphGroupKey_t tGroup ) +{ + int iLen = m_hGroups.GetLength(); + UniqHLLTraits_c::Container_t & tCont = m_hGroups.Acquire ( tGroup ); + + // need reset value in case of the Container_t just added + if ( iLen!=m_hGroups.GetLength() ) + tCont = UniqHLLTraits_c::Container_t(); + + return tCont; +} + +template +void CopyContainerTo ( SphGroupKey_t tGroup, const UniqHLLTraits_c::Container_t & tFrom, T & tRhs ) +{ + if ( tFrom.IsEmpty() ) + return; + + UniqHLLTraits_c::Container_t & tTo = tRhs.Get ( tGroup ); + if ( tTo.IsEmpty() ) + tTo.m_pArray = tRhs.AllocateArray(); + + if ( tFrom.m_eType==UniqHLLTraits_c::ContainterType_e::ARRAY ) + { + for ( auto i : *tFrom.m_pArray ) + tRhs.Add ( { tGroup, i, 1 } ); + + } else if ( tFrom.m_eType==UniqHLLTraits_c::ContainterType_e::HASH ) + { + int64_t i = 0; + SphAttr_t * pRes; + while ( ( pRes = tFrom.m_pHash->Iterate(i) ) != nullptr ) + tRhs.Add ( { tGroup, *pRes, 1 } ); + + } else + { + if ( tTo.m_eType==UniqHLLTraits_c::ContainterType_e::ARRAY ) + tRhs.ConvertToHash ( tTo ); + + if ( tTo.m_eType==UniqHLLTraits_c::ContainterType_e::HASH ) + { + assert ( tRhs.m_iAccuracy==tRhs.m_iAccuracy ); + + if ( tRhs.m_iAccuracy > UniqHLLTraits_c::NON_PACKED_HLL_THRESH ) + { + tRhs.ConvertToHLLDensePacked ( tTo ); + tTo.m_pHLLDensePacked->Merge ( *tFrom.m_pHLLDensePacked ); + } + else + { + tRhs.ConvertToHLLDenseNonPacked ( tTo ); + tTo.m_pHLLDenseNonPacked->Merge ( *tFrom.m_pHLLDenseNonPacked ); + } + } + } +} + + ///////////////////////////////////////////////////////////////////// UniqHLL_c & UniqHLL_c::operator = ( UniqHLL_c && tRhs ) { @@ -356,27 +412,14 @@ void UniqHLL_c::Reset() } -void UniqHLL_c::CopyTo ( UniqHLL_c & tRhs ) +void UniqHLL_c::CopyTo ( UniqHLL_c & tRhs ) const { int64_t iIterator = 0; std::pair tRes; while ( ( tRes = m_hGroups.Iterate ( iIterator ) ).second ) { - Container_t * pFrom = tRes.second; - if ( pFrom->IsEmpty() ) - continue; - - Container_t * pTo = tRhs.m_hGroups.Find ( tRes.first ); - // just move whole group if absent in RHS - if ( !pTo ) - { - tRhs.m_hGroups.Add ( tRes.first, *tRes.second ); - m_hGroups.Delete ( tRes.first ); - continue; - } - - // containers need to be merged - CopyContainerTo ( tRes.first, *pFrom, *pTo, tRhs ); + // can not move Container_t into dRhs as move invalidates this + CopyContainerTo ( tRes.first, *tRes.second, tRhs ); } } @@ -392,9 +435,9 @@ UniqHLLSingle_c & UniqHLLSingle_c::operator = ( UniqHLLSingle_c && tRhs ) } -void UniqHLLSingle_c::CopyTo ( UniqHLLSingle_c & tRhs ) +void UniqHLLSingle_c::CopyTo ( UniqHLLSingle_c & tRhs ) const { - CopyContainerTo ( 0, m_tContainer, tRhs.m_tContainer, tRhs ); + CopyContainerTo ( 0, m_tContainer, tRhs ); } @@ -402,4 +445,4 @@ void UniqHLLSingle_c::Reset() { m_tContainer.Reset(); BASE::Reset(); -} \ No newline at end of file +} diff --git a/src/distinct.h b/src/distinct.h index a7974a656d..0e15e90189 100644 --- a/src/distinct.h +++ b/src/distinct.h @@ -98,7 +98,7 @@ class UniqGrouped_T : public CSphVector int CountStart ( SphGroupKey_t & tOutGroup ); ///< starting counting distinct values, returns count and group key (0 if empty) int CountNext ( SphGroupKey_t & tOutGroup ); ///< continues counting distinct values, returns count and group key (0 if done) void Compact ( VecTraits_T & dRemoveGroups ); - void CopyTo (UniqGrouped_T & dRhs ); + void CopyTo (UniqGrouped_T & dRhs ) const; void Reset() { BASE::Resize(0); } void SetAccuracy ( int iAccuracy ) {} @@ -226,7 +226,7 @@ void UniqGrouped_T::Compact ( VecTraits_T& dRemoveGroups ) } template -void UniqGrouped_T::CopyTo ( UniqGrouped_T & dRhs ) +void UniqGrouped_T::CopyTo ( UniqGrouped_T & dRhs ) const { if ( m_bSorted && dRhs.m_bSorted ) { @@ -256,7 +256,7 @@ class UniqSingle_T : public CSphVector void Add ( const ValueWithGroupCount_t & tValue ); void Sort(); void Compact(); - void CopyTo ( UniqSingle_T & dRhs ); + void CopyTo ( UniqSingle_T & dRhs ) const; void Reset() { BASE::Resize(0); } int CountDistinct(); @@ -296,7 +296,7 @@ void UniqSingle_T::Compact() } template -void UniqSingle_T::CopyTo ( UniqSingle_T & dRhs ) +void UniqSingle_T::CopyTo ( UniqSingle_T & dRhs ) const { if ( m_bSorted && dRhs.m_bSorted ) { @@ -436,63 +436,13 @@ class UniqHLLTraits_c case ContainterType_e::HLL_DENSE_NONPACKED: m_dUnusedHLLDenseNonPacked.Add ( tContainer.m_pHLLDenseNonPacked ); break; default: assert ( 0 && "Unknown container type" ); break; } + tContainer.m_pArray = nullptr; } void ConvertToHash ( Container_t & tContainer ); void ConvertToHLLDensePacked ( Container_t & tContainer ); void ConvertToHLLDenseNonPacked ( Container_t & tContainer ); - template - void CopyContainerTo ( SphGroupKey_t tGroup, const Container_t & tFrom, Container_t & tTo, T & tRhs ) - { - if ( tFrom.IsEmpty() ) - return; - - if ( tTo.IsEmpty() ) - { - tTo.Reset(); - tTo = tFrom; - tTo.m_eType = ContainterType_e::ARRAY; - tTo.m_pArray = nullptr; - } - else - { - if ( tFrom.m_eType==ContainterType_e::ARRAY ) - { - for ( auto i : *tFrom.m_pArray ) - tRhs.Add ( { tGroup, i, 1 } ); - } - else if ( tFrom.m_eType==ContainterType_e::HASH ) - { - int64_t i = 0; - SphAttr_t * pRes; - while ( ( pRes = tFrom.m_pHash->Iterate(i) ) != nullptr ) - tRhs.Add ( { tGroup, *pRes, 1 } ); - } - else - { - if ( tTo.m_eType==ContainterType_e::ARRAY ) - tRhs.ConvertToHash ( tTo ); - - if ( tTo.m_eType==ContainterType_e::HASH ) - { - assert ( m_iAccuracy==tRhs.m_iAccuracy ); - - if ( m_iAccuracy > NON_PACKED_HLL_THRESH ) - { - tRhs.ConvertToHLLDensePacked ( tTo ); - tTo.m_pHLLDensePacked->Merge ( *tFrom.m_pHLLDensePacked ); - } - else - { - tRhs.ConvertToHLLDenseNonPacked ( tTo ); - tTo.m_pHLLDenseNonPacked->Merge ( *tFrom.m_pHLLDenseNonPacked ); - } - } - } - } - } - private: CSphVector m_dUnusedArray; CSphVector> m_dUnusedHashes; @@ -505,6 +455,9 @@ class UniqHLLTraits_c HLLDenseNonPacked_c * AllocateHLLDenseNonPacked(); Hash_c * AllocateHash ( int iIdx ); void MoveToLargerHash ( Container_t & tContainer ); + + template + friend void CopyContainerTo ( SphGroupKey_t tGroup, const UniqHLLTraits_c::Container_t & tFrom, T & tRhs ); }; class UniqHLL_c : public UniqHLLTraits_c @@ -522,7 +475,8 @@ class UniqHLL_c : public UniqHLLTraits_c int CountNext ( SphGroupKey_t & tOutGroup ); void Compact ( VecTraits_T& dRemoveGroups ); void Reset(); - void CopyTo ( UniqHLL_c & tRhs ); + void CopyTo ( UniqHLL_c & tRhs ) const; + Container_t & Get ( SphGroupKey_t tGroup ); private: OpenHashTable_T m_hGroups; @@ -557,8 +511,9 @@ class UniqHLLSingle_c : public UniqHLLTraits_c inline void Add ( const ValueWithGroupCount_t & tValue ) { AddToContainer ( m_tContainer, tValue.m_tValue ); } void Compact() {} int CountDistinct() { return m_tContainer.Estimate(); } - void CopyTo ( UniqHLLSingle_c & tRhs ); + void CopyTo ( UniqHLLSingle_c & tRhs ) const; void Reset(); + Container_t & Get ( SphGroupKey_t tGroup ) { return m_tContainer; } private: Container_t m_tContainer; diff --git a/src/sphinxsort.cpp b/src/sphinxsort.cpp index 7e98a6d933..bfc3c4e3d9 100644 --- a/src/sphinxsort.cpp +++ b/src/sphinxsort.cpp @@ -3042,6 +3042,7 @@ class CSphKBufferGroupSorter : public KBufferGroupSorter_T/idx2 } + +source src11 +{ + type = mysql + + sql_attr_uint = tag + sql_attr_uint = gr + sql_query = SELECT * FROM test_table1 WHERE document_id <= 1000 +} + +source src12 : src11 +{ + sql_query = SELECT * FROM test_table1 WHERE document_id >= 1001 +} + +index i11 +{ + source = src11 + path = /i11 +} + +index i12 +{ + source = src12 + path = /i12 +} @@ -139,6 +165,9 @@ select a, count(distinct id) from idx2, idx group by a; select a, count(distinct id) from idx group by a; select a, count(distinct id) from idx2 group by a; +select id, count(distinct tag) from i11, i12 group by gr limit 10 option max_matches = 100; +select id, count(distinct tag) from i11, i12 group by gr limit 10 option max_matches = 1000; + @@ -164,4 +193,30 @@ INSERT INTO `test_table` VALUES ( 4, 3, 1, 'main' ) + +CREATE TABLE `test_table1` +( + `document_id` int(11) NOT NULL default '0', + `tag` int(11) NOT NULL default '0', + `gr` int(11) NOT NULL default '0', + `body` varchar(255) NOT NULL default '' +) + + + +DROP TABLE IF EXISTS `test_table1` + + + + + (,,, 'test 1') + INSERT INTO test_table1 VALUES + + + + + (,,, 'test 12') + INSERT INTO test_table1 VALUES + +