Skip to content

Commit

Permalink
fixed crash at count distinct with multiple indexes; fixed #3635; add…
Browse files Browse the repository at this point in the history
…ed regressions to test 444
  • Loading branch information
tomatolog committed Jul 6, 2023
1 parent 1399e6f commit c472e5b
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 82 deletions.
91 changes: 67 additions & 24 deletions src/distinct.cpp
Expand Up @@ -205,7 +205,7 @@ void UniqHLLTraits_c::ConvertToHash ( Container_t & tContainer )
for ( const auto & i : *tContainer.m_pArray )
pHash->Add(i);

m_dUnusedArray.Add ( tContainer.m_pArray );
FreeContainer ( tContainer );

tContainer.m_pHash = pHash;
tContainer.m_iHashIdx = 0;
Expand All @@ -221,8 +221,7 @@ void UniqHLLTraits_c::ConvertToHLLDensePacked ( Container_t & tContainer )
assert ( pHLL && tContainer.m_pHash );

CopyHashToHLL ( *pHLL, *tContainer.m_pHash );

m_dUnusedHashes[tContainer.m_iHashIdx].Add ( tContainer.m_pHash );
FreeContainer ( tContainer );

tContainer.m_pHLLDensePacked = pHLL;
tContainer.m_eType = ContainterType_e::HLL_DENSE_PACKED;
Expand All @@ -237,8 +236,7 @@ void UniqHLLTraits_c::ConvertToHLLDenseNonPacked ( Container_t & tContainer )
assert ( pHLL && tContainer.m_pHash );

CopyHashToHLL ( *pHLL, *tContainer.m_pHash );

m_dUnusedHashes[tContainer.m_iHashIdx].Add ( tContainer.m_pHash );
FreeContainer ( tContainer );

tContainer.m_pHLLDenseNonPacked = pHLL;
tContainer.m_eType = ContainterType_e::HLL_DENSE_NONPACKED;
Expand Down Expand Up @@ -302,6 +300,64 @@ void UniqHLLTraits_c::SetAccuracy ( int iAccuracy )
m_dUnusedHashes.Resize ( sphLog2 ( m_iMaxHashSize ) - sphLog2const ( MIN_HASH_SIZE ) + 1 );
}

UniqHLLTraits_c::Container_t & UniqHLL_c::Get ( SphGroupKey_t tGroup )
{
int iLen = m_hGroups.GetLength();
UniqHLLTraits_c::Container_t & tCont = m_hGroups.Acquire ( tGroup );

// need reset value in case of the Container_t just added
if ( iLen!=m_hGroups.GetLength() )
tCont = UniqHLLTraits_c::Container_t();

return tCont;
}

template <typename T>
void CopyContainerTo ( SphGroupKey_t tGroup, const UniqHLLTraits_c::Container_t & tFrom, T & tRhs )
{
if ( tFrom.IsEmpty() )
return;

UniqHLLTraits_c::Container_t & tTo = tRhs.Get ( tGroup );
if ( tTo.IsEmpty() )
tTo.m_pArray = tRhs.AllocateArray();

if ( tFrom.m_eType==UniqHLLTraits_c::ContainterType_e::ARRAY )
{
for ( auto i : *tFrom.m_pArray )
tRhs.Add ( { tGroup, i, 1 } );

} else if ( tFrom.m_eType==UniqHLLTraits_c::ContainterType_e::HASH )
{
int64_t i = 0;
SphAttr_t * pRes;
while ( ( pRes = tFrom.m_pHash->Iterate(i) ) != nullptr )
tRhs.Add ( { tGroup, *pRes, 1 } );

} else
{
if ( tTo.m_eType==UniqHLLTraits_c::ContainterType_e::ARRAY )
tRhs.ConvertToHash ( tTo );

if ( tTo.m_eType==UniqHLLTraits_c::ContainterType_e::HASH )
{
assert ( tRhs.m_iAccuracy==tRhs.m_iAccuracy );

if ( tRhs.m_iAccuracy > UniqHLLTraits_c::NON_PACKED_HLL_THRESH )
{
tRhs.ConvertToHLLDensePacked ( tTo );
tTo.m_pHLLDensePacked->Merge ( *tFrom.m_pHLLDensePacked );
}
else
{
tRhs.ConvertToHLLDenseNonPacked ( tTo );
tTo.m_pHLLDenseNonPacked->Merge ( *tFrom.m_pHLLDenseNonPacked );
}
}
}
}


/////////////////////////////////////////////////////////////////////
UniqHLL_c & UniqHLL_c::operator = ( UniqHLL_c && tRhs )
{
Expand Down Expand Up @@ -356,27 +412,14 @@ void UniqHLL_c::Reset()
}


void UniqHLL_c::CopyTo ( UniqHLL_c & tRhs )
void UniqHLL_c::CopyTo ( UniqHLL_c & tRhs ) const
{
int64_t iIterator = 0;
std::pair<SphGroupKey_t, Container_t*> tRes;
while ( ( tRes = m_hGroups.Iterate ( iIterator ) ).second )
{
Container_t * pFrom = tRes.second;
if ( pFrom->IsEmpty() )
continue;

Container_t * pTo = tRhs.m_hGroups.Find ( tRes.first );
// just move whole group if absent in RHS
if ( !pTo )
{
tRhs.m_hGroups.Add ( tRes.first, *tRes.second );
m_hGroups.Delete ( tRes.first );
continue;
}

// containers need to be merged
CopyContainerTo ( tRes.first, *pFrom, *pTo, tRhs );
// can not move Container_t into dRhs as move invalidates this
CopyContainerTo ( tRes.first, *tRes.second, tRhs );
}
}

Expand All @@ -392,14 +435,14 @@ UniqHLLSingle_c & UniqHLLSingle_c::operator = ( UniqHLLSingle_c && tRhs )
}


void UniqHLLSingle_c::CopyTo ( UniqHLLSingle_c & tRhs )
void UniqHLLSingle_c::CopyTo ( UniqHLLSingle_c & tRhs ) const
{
CopyContainerTo ( 0, m_tContainer, tRhs.m_tContainer, tRhs );
CopyContainerTo ( 0, m_tContainer, tRhs );
}


void UniqHLLSingle_c::Reset()
{
m_tContainer.Reset();
BASE::Reset();
}
}
69 changes: 12 additions & 57 deletions src/distinct.h
Expand Up @@ -98,7 +98,7 @@ class UniqGrouped_T : public CSphVector<T>
int CountStart ( SphGroupKey_t & tOutGroup ); ///< starting counting distinct values, returns count and group key (0 if empty)
int CountNext ( SphGroupKey_t & tOutGroup ); ///< continues counting distinct values, returns count and group key (0 if done)
void Compact ( VecTraits_T<SphGroupKey_t> & dRemoveGroups );
void CopyTo (UniqGrouped_T & dRhs );
void CopyTo (UniqGrouped_T & dRhs ) const;
void Reset() { BASE::Resize(0); }
void SetAccuracy ( int iAccuracy ) {}

Expand Down Expand Up @@ -226,7 +226,7 @@ void UniqGrouped_T<T>::Compact ( VecTraits_T<SphGroupKey_t>& dRemoveGroups )
}

template <typename T>
void UniqGrouped_T<T>::CopyTo ( UniqGrouped_T & dRhs )
void UniqGrouped_T<T>::CopyTo ( UniqGrouped_T & dRhs ) const
{
if ( m_bSorted && dRhs.m_bSorted )
{
Expand Down Expand Up @@ -256,7 +256,7 @@ class UniqSingle_T : public CSphVector<T>
void Add ( const ValueWithGroupCount_t & tValue );
void Sort();
void Compact();
void CopyTo ( UniqSingle_T & dRhs );
void CopyTo ( UniqSingle_T & dRhs ) const;
void Reset() { BASE::Resize(0); }
int CountDistinct();

Expand Down Expand Up @@ -296,7 +296,7 @@ void UniqSingle_T<T>::Compact()
}

template <typename T>
void UniqSingle_T<T>::CopyTo ( UniqSingle_T & dRhs )
void UniqSingle_T<T>::CopyTo ( UniqSingle_T & dRhs ) const
{
if ( m_bSorted && dRhs.m_bSorted )
{
Expand Down Expand Up @@ -436,63 +436,13 @@ class UniqHLLTraits_c
case ContainterType_e::HLL_DENSE_NONPACKED: m_dUnusedHLLDenseNonPacked.Add ( tContainer.m_pHLLDenseNonPacked ); break;
default: assert ( 0 && "Unknown container type" ); break;
}
tContainer.m_pArray = nullptr;
}

void ConvertToHash ( Container_t & tContainer );
void ConvertToHLLDensePacked ( Container_t & tContainer );
void ConvertToHLLDenseNonPacked ( Container_t & tContainer );

template <typename T>
void CopyContainerTo ( SphGroupKey_t tGroup, const Container_t & tFrom, Container_t & tTo, T & tRhs )
{
if ( tFrom.IsEmpty() )
return;

if ( tTo.IsEmpty() )
{
tTo.Reset();
tTo = tFrom;
tTo.m_eType = ContainterType_e::ARRAY;
tTo.m_pArray = nullptr;
}
else
{
if ( tFrom.m_eType==ContainterType_e::ARRAY )
{
for ( auto i : *tFrom.m_pArray )
tRhs.Add ( { tGroup, i, 1 } );
}
else if ( tFrom.m_eType==ContainterType_e::HASH )
{
int64_t i = 0;
SphAttr_t * pRes;
while ( ( pRes = tFrom.m_pHash->Iterate(i) ) != nullptr )
tRhs.Add ( { tGroup, *pRes, 1 } );
}
else
{
if ( tTo.m_eType==ContainterType_e::ARRAY )
tRhs.ConvertToHash ( tTo );

if ( tTo.m_eType==ContainterType_e::HASH )
{
assert ( m_iAccuracy==tRhs.m_iAccuracy );

if ( m_iAccuracy > NON_PACKED_HLL_THRESH )
{
tRhs.ConvertToHLLDensePacked ( tTo );
tTo.m_pHLLDensePacked->Merge ( *tFrom.m_pHLLDensePacked );
}
else
{
tRhs.ConvertToHLLDenseNonPacked ( tTo );
tTo.m_pHLLDenseNonPacked->Merge ( *tFrom.m_pHLLDenseNonPacked );
}
}
}
}
}

private:
CSphVector<SmallArray_c *> m_dUnusedArray;
CSphVector<CSphVector<Hash_c *>> m_dUnusedHashes;
Expand All @@ -505,6 +455,9 @@ class UniqHLLTraits_c
HLLDenseNonPacked_c * AllocateHLLDenseNonPacked();
Hash_c * AllocateHash ( int iIdx );
void MoveToLargerHash ( Container_t & tContainer );

template <typename T>
friend void CopyContainerTo ( SphGroupKey_t tGroup, const UniqHLLTraits_c::Container_t & tFrom, T & tRhs );
};

class UniqHLL_c : public UniqHLLTraits_c
Expand All @@ -522,7 +475,8 @@ class UniqHLL_c : public UniqHLLTraits_c
int CountNext ( SphGroupKey_t & tOutGroup );
void Compact ( VecTraits_T<SphGroupKey_t>& dRemoveGroups );
void Reset();
void CopyTo ( UniqHLL_c & tRhs );
void CopyTo ( UniqHLL_c & tRhs ) const;
Container_t & Get ( SphGroupKey_t tGroup );

private:
OpenHashTable_T<SphGroupKey_t, Container_t> m_hGroups;
Expand Down Expand Up @@ -557,8 +511,9 @@ class UniqHLLSingle_c : public UniqHLLTraits_c
inline void Add ( const ValueWithGroupCount_t & tValue ) { AddToContainer ( m_tContainer, tValue.m_tValue ); }
void Compact() {}
int CountDistinct() { return m_tContainer.Estimate(); }
void CopyTo ( UniqHLLSingle_c & tRhs );
void CopyTo ( UniqHLLSingle_c & tRhs ) const;
void Reset();
Container_t & Get ( SphGroupKey_t tGroup ) { return m_tContainer; }

private:
Container_t m_tContainer;
Expand Down
1 change: 1 addition & 0 deletions src/sphinxsort.cpp
Expand Up @@ -3042,6 +3042,7 @@ class CSphKBufferGroupSorter : public KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTIN
bool bUniqUpdated = false;
if ( !m_bMatchesFinalized && bCopyMeta )
{
// can not move m_tUniq into dRhs as move invalidates m_tUniq then breaks FinalizeMatches
m_tUniq.CopyTo ( dRhs.m_tUniq );
bUniqUpdated = true;
}
Expand Down

0 comments on commit c472e5b

Please sign in to comment.