Skip to content

Commit

Permalink
query optimizer improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
glookka committed Jul 21, 2022
1 parent 86d30f1 commit 68cf78a
Show file tree
Hide file tree
Showing 23 changed files with 1,633 additions and 537 deletions.
5 changes: 3 additions & 2 deletions src/CMakeLists.txt
Expand Up @@ -50,7 +50,7 @@ add_library ( lmanticore STATIC sphinx.cpp sphinxexcerpt.cpp sphinxquery.cpp sph
columnarlib.cpp collation.cpp fnv64.cpp histogram.cpp threads_detached.cpp hazard_pointer.cpp
timeout_queue.cpp dynamic_idx.cpp columnarrt.cpp columnarmisc.cpp exprtraits.cpp columnarexpr.cpp
sphinx_alter.cpp columnarsort.cpp binlog.cpp chunksearchctx.cpp client_task_info.cpp
indexfiles.cpp attrindex_builder.cpp queryfilter.cpp aggregate.cpp secondarylib.cpp )
indexfiles.cpp attrindex_builder.cpp queryfilter.cpp aggregate.cpp secondarylib.cpp costestimate.cpp )

add_library ( lstem STATIC sphinxsoundex.cpp sphinxmetaphone.cpp sphinxstemen.cpp sphinxstemru.cpp sphinxstemru.inl
sphinxstemcz.cpp sphinxstemar.cpp )
Expand Down Expand Up @@ -135,7 +135,8 @@ set ( HEADERS sphinxexcerpt.h sphinxfilter.h sphinxint.h sphinxjsonquery.h sphin
hazard_pointer.h task_info.h mini_timer.h collation.h fnv64.h histogram.h sortsetup.h dynamic_idx.h
indexsettings.h columnarlib.h fileio.h memio.h queryprofile.h columnarfilter.h columnargrouper.h fileutils.h
libutils.h conversion.h columnarsort.h sortcomp.h binlog_defs.h binlog.h ${MANTICORE_BINARY_DIR}/config/config.h
chunksearchctx.h lrucache.h indexfiles.h attrindex_builder.h queryfilter.h aggregate.h openhash.h secondarylib.h )
chunksearchctx.h lrucache.h indexfiles.h attrindex_builder.h queryfilter.h aggregate.h openhash.h secondarylib.h
costestimate.h )

set ( SEARCHD_H searchdaemon.h searchdconfig.h searchdddl.h searchdexpr.h searchdha.h searchdreplication.h searchdsql.h
searchdtask.h client_task_info.h taskflushattrs.h taskflushbinlog.h taskflushmutable.h taskglobalidf.h
Expand Down
218 changes: 218 additions & 0 deletions src/costestimate.cpp
@@ -0,0 +1,218 @@
//
//
// Copyright (c) 2018-2022, Manticore Software LTD (https://manticoresearch.com)
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//

#include "costestimate.h"

#include "sphinxint.h"
#include <math.h>

class CostEstimate_c : public CostEstimate_i
{
public:
CostEstimate_c ( const CSphVector<SecondaryIndexInfo_t> & dSIInfo, const CSphVector<CSphFilterSettings> & dFilters, int64_t iTotalDocs, int iCutoff );

float CalcQueryCost() final;

private:
static constexpr float SCALE = 1.0f/1000000.0f;

static constexpr float COST_PUSH = 12.5f;
static constexpr float COST_FILTER = 8.5f;
static constexpr float COST_COLUMNAR_FILTER = 1.5f;
static constexpr float COST_INTERSECT = 5.0f;
static constexpr float COST_INDEX_READ = 1.0f;
static constexpr float COST_INDEX_UNION_BITMAP = 6.0f;
static constexpr float COST_INDEX_UNION_COEFF = 0.9f;
static constexpr float COST_LOOKUP_READ = 33.0f;

const CSphVector<SecondaryIndexInfo_t> & m_dSIInfo;
const CSphVector<CSphFilterSettings> & m_dFilters;

int64_t m_iTotalDocs = 0;
int m_iCutoff = 0;

float Cost_Filter ( int64_t iDocs ) const { return COST_FILTER*float(iDocs)*SCALE; }
float Cost_BlockFilter ( int64_t iDocs ) const { return Cost_Filter ( iDocs/DOCINFO_INDEX_FREQ ); }
float Cost_ColumnarFilter ( int64_t iDocs ) const { return COST_COLUMNAR_FILTER*float(iDocs)*SCALE; }
float Cost_Push ( int64_t iDocs ) const { return COST_PUSH*float(iDocs)*SCALE; }
float Cost_Intersect ( int64_t iDocs ) const { return COST_INTERSECT*float(iDocs)*SCALE; }
float Cost_IndexRead ( int64_t iDocs ) const { return COST_INDEX_READ*float(iDocs)*SCALE; }
float Cost_IndexUnionBitmap ( int64_t iDocs ) const { return COST_INDEX_UNION_BITMAP*float(iDocs)*SCALE; }
float Cost_IndexUnionQueue ( int64_t iDocs ) const { return COST_INDEX_UNION_COEFF*float(iDocs)*log2f(iDocs)*SCALE; }
float Cost_LookupRead ( int64_t iDocs ) const { return COST_LOOKUP_READ*float(iDocs)*SCALE; }

bool NeedBitmapUnion ( const CSphFilterSettings & tFilter, int64_t iRsetSize ) const;
};


CostEstimate_c::CostEstimate_c ( const CSphVector<SecondaryIndexInfo_t> & dSIInfo, const CSphVector<CSphFilterSettings> & dFilters, int64_t iTotalDocs, int iCutoff )
: m_dSIInfo ( dSIInfo )
, m_dFilters ( dFilters )
, m_iTotalDocs ( iTotalDocs )
, m_iCutoff ( iCutoff )
{}


bool CostEstimate_c::NeedBitmapUnion ( const CSphFilterSettings & tFilter, int64_t iRsetSize ) const
{
// this needs to be in sync with iterator construction code
const size_t BITMAP_ITERATOR_THRESH = 16;
const float BITMAP_RATIO_THRESH = 0.002;

bool bFitsIteratorThresh = false;
if ( tFilter.m_eType==SPH_FILTER_RANGE )
bFitsIteratorThresh = ( tFilter.m_iMaxValue-tFilter.m_iMinValue+1 ) >= BITMAP_ITERATOR_THRESH;
else if ( tFilter.m_eType==SPH_FILTER_FLOATRANGE )
bFitsIteratorThresh = true;

if ( m_iCutoff>=0 )
iRsetSize = Min ( iRsetSize, m_iCutoff );

float fRsetRatio = float ( iRsetSize ) / m_iTotalDocs;
return bFitsIteratorThresh && fRsetRatio >= BITMAP_RATIO_THRESH;
}


float CostEstimate_c::CalcQueryCost()
{
int64_t iDocsToReadLookup = 0;
int64_t iDocsToReadAnalyzer = 0;
int64_t iDocsToReadIndex = 0;

float fCost = 0.0f;
int iSumDocsFromIndexes = 0;
int iSumDocsFromFilters = 0;
float fDocsAfterIndexes = 1.0f;
float fDocsAfterFilters = 1.0f;
int iToIntersect = 0;
int iNumFilters = 0;
int iNumAnalyzers = 0;
int iNumIndexes = 0;

for ( const auto & i : m_dSIInfo )
{
int64_t iDocs = i.m_iRsetEstimate;
float fIndexProbability = float(iDocs) / m_iTotalDocs;

if ( i.m_eType==SecondaryIndexType_e::LOOKUP ||
i.m_eType==SecondaryIndexType_e::ANALYZER ||
i.m_eType==SecondaryIndexType_e::INDEX )
{
iSumDocsFromIndexes += iDocs;
fDocsAfterIndexes *= fIndexProbability;
iToIntersect++;
}
else
iSumDocsFromFilters += iDocs;

fDocsAfterFilters *= fIndexProbability;

switch ( i.m_eType )
{
case SecondaryIndexType_e::LOOKUP:
iDocsToReadLookup += iDocs;
break;

case SecondaryIndexType_e::ANALYZER:
iDocsToReadAnalyzer += iDocs;
iNumAnalyzers++;
break;

case SecondaryIndexType_e::INDEX:
iDocsToReadIndex += iDocs;
iNumIndexes++;
break;

case SecondaryIndexType_e::FILTER:
iNumFilters++;
break;

default:
break;
}
}

if ( iNumFilters )
{
bool bFromIterator = iToIntersect>0;
if ( bFromIterator )
{
int64_t iDocsLeftToFilter = uint64_t(fDocsAfterIndexes*m_iTotalDocs);
fCost += Cost_Filter ( iDocsLeftToFilter );
}
else
{
// the idea is that block filter rejects most docs and 50% of the remaining docs are filtered out
fCost += Cost_Filter ( Min ( iSumDocsFromFilters*2, m_iTotalDocs ) );
fCost += Cost_BlockFilter ( m_iTotalDocs*iNumFilters );
}
}

fCost += Cost_LookupRead ( iDocsToReadLookup );

if ( iNumAnalyzers )
{
// minmax tree eval
fCost += Cost_Filter ( m_iTotalDocs/1024*1.33f*iNumAnalyzers );

// the idea is that minmax rejects most docs and 50% of the remaining docs are filtered out
fCost += Cost_ColumnarFilter ( Min ( iDocsToReadAnalyzer*2, m_iTotalDocs ) );
}

if ( iNumIndexes )
{
fCost += Cost_IndexRead ( iDocsToReadIndex );

ARRAY_FOREACH ( i, m_dSIInfo )
{
const auto & tSIInfo = m_dSIInfo[i];
if ( tSIInfo.m_eType!=SecondaryIndexType_e::INDEX )
continue;

int64_t iDocs = tSIInfo.m_iRsetEstimate;
if ( NeedBitmapUnion ( m_dFilters[i], iDocs ) )
fCost += Cost_IndexUnionBitmap ( iDocs );
else
fCost += Cost_IndexUnionQueue ( iDocs );
}
}

if ( iToIntersect>1 )
fCost += Cost_Intersect ( iSumDocsFromIndexes );

int64_t iDocsToPush = uint64_t(fDocsAfterFilters*m_iTotalDocs);
if ( m_iCutoff>=0 )
iDocsToPush = Min ( iDocsToPush, m_iCutoff );

fCost += Cost_Push ( iDocsToPush );

/* {
fCost += LookupRead ( iDocsProcessedByIndexes );
if ( m_dSecondaryIndexes.GetLength()>1 )
{
fCost += IndexIntersect ( iDocsProcessedByIndexes );
fCost += IndexFilter ( uint64_t(fTotalIndexProbability*m_iTotalDocs), m_dFilters.GetLength()-iNumEnabled+1 );
} else
fCost += IndexFilter ( iDocsProcessedByIndexes, m_dFilters.GetLength()-iNumEnabled+1 );
}
*/

return fCost;
}

/////////////////////////////////////////////////////////////////////

CostEstimate_i * CreateCostEstimate ( const CSphVector<SecondaryIndexInfo_t> & dSIInfo, const CSphVector<CSphFilterSettings> & dFilters, int64_t iTotalDocs, int iCutoff )
{
return new CostEstimate_c ( dSIInfo, dFilters, iTotalDocs, iCutoff );
}
34 changes: 34 additions & 0 deletions src/costestimate.h
@@ -0,0 +1,34 @@
//
// Copyright (c) 2018-2022, Manticore Software LTD (https://manticoresearch.com)
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//

#ifndef _costestimate_
#define _costestimate_

#include "sphinx.h"

class CostEstimate_i
{
public:
virtual ~CostEstimate_i() = default;
virtual float CalcQueryCost() = 0;
};

struct SecondaryIndexInfo_t
{
CSphVector<SecondaryIndexType_e> m_dCapabilities;
SecondaryIndexType_e m_eType = SecondaryIndexType_e::FILTER;
SecondaryIndexType_e m_eForce = SecondaryIndexType_e::NONE;
int64_t m_iRsetEstimate = 0;
bool m_bCreated = false;
};

CostEstimate_i * CreateCostEstimate ( const CSphVector<SecondaryIndexInfo_t> & dSIInfo, const CSphVector<CSphFilterSettings> & dFilters, int64_t iTotalDocs, int iCutoff = -1 );

#endif // _costestimate_
1 change: 0 additions & 1 deletion src/queryprofile.cpp
Expand Up @@ -36,7 +36,6 @@ void QueryProfile_c::Start ( ESphQueryState eNew )
memset ( m_tmTotal, 0, sizeof(m_tmTotal) );
m_eState = eNew;
m_tmStamp = sphMicroTimer();
m_sEnablesIndexes = "";
}


Expand Down
4 changes: 1 addition & 3 deletions src/queryprofile.h
Expand Up @@ -79,9 +79,7 @@ class QueryProfile_c
int m_dSwitches [ SPH_QSTATE_TOTAL+1 ]; ///< number of switches to given state
int64_t m_tmTotal [ SPH_QSTATE_TOTAL+1 ]; ///< total time spent per state
CSphVector<BYTE> m_dPlan; ///< bson with plan
CSphString m_sEnablesIndexes; ///< enabled secondary indexes

/// create empty and stopped profile
/// create empty and stopped profile
QueryProfile_c();
virtual ~QueryProfile_c() {}

Expand Down
17 changes: 6 additions & 11 deletions src/searchd.cpp
Expand Up @@ -1829,7 +1829,7 @@ void SearchRequestBuilder_c::SendQuery ( const char * sIndexes, ISphOutputBuffer
tOut.SendInt ( q.m_dIndexHints.GetLength() );
for ( const auto & i : q.m_dIndexHints )
{
tOut.SendDword ( i.m_eHint );
tOut.SendDword ( (DWORD)i.m_dHints[int(SecondaryIndexType_e::INDEX)] );
tOut.SendString ( i.m_sIndex.cstr() );
}
}
Expand Down Expand Up @@ -2647,7 +2647,7 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery
tQuery.m_dIndexHints.Resize ( tReq.GetDword() );
for ( auto & i : tQuery.m_dIndexHints )
{
i.m_eHint = (IndexHint_e)tReq.GetDword();
i.m_dHints[int(SecondaryIndexType_e::INDEX)] = (IndexHint_e)tReq.GetDword();
i.m_sIndex = tReq.GetString();
}
}
Expand Down Expand Up @@ -2986,15 +2986,15 @@ static void FormatIndexHints ( const CSphQuery & tQuery, StringBuilder_c & tBuf
StrVec_t dUse, dForce, dIgnore;
for ( const auto & i : tQuery.m_dIndexHints )
{
switch ( i.m_eHint )
switch ( i.m_dHints[int(SecondaryIndexType_e::INDEX)] )
{
case INDEX_HINT_USE:
case IndexHint_e::USE:
dUse.Add(i.m_sIndex);
break;
case INDEX_HINT_FORCE:
case IndexHint_e::FORCE:
dForce.Add(i.m_sIndex);
break;
case INDEX_HINT_IGNORE:
case IndexHint_e::IGNORE_:
dIgnore.Add(i.m_sIndex);
break;
default:
Expand Down Expand Up @@ -15657,11 +15657,6 @@ static void HandleMysqlShowPlan ( RowBuffer_i & tOut, const QueryProfile_c & p,
tOut.PutString ( sPlan );
tOut.Commit();

tOut.PutString ( "enabled_indexes" );
tOut.PutString ( p.m_sEnablesIndexes.cstr() );

tOut.Commit();

tOut.Eof ( bMoreResultsFollow );
}

Expand Down
11 changes: 9 additions & 2 deletions src/searchdsql.cpp
Expand Up @@ -816,7 +816,7 @@ void SqlParser_c::AddIndexHint ( IndexHint_e eHint, const SqlNode_t & tValue )
{
IndexHint_t & tHint = m_pQuery->m_dIndexHints.Add();
tHint.m_sIndex = i;
tHint.m_eHint = eHint;
tHint.m_dHints[int(SecondaryIndexType_e::INDEX)] = eHint;
}
}

Expand Down Expand Up @@ -1398,7 +1398,14 @@ struct HintComp_fn

bool IsEq ( const IndexHint_t & tA, const IndexHint_t & tB ) const
{
return tA.m_sIndex==tB.m_sIndex && tA.m_eHint==tB.m_eHint;
if ( tA.m_sIndex!=tB.m_sIndex )
return false;

for ( int i = 0; i < int(SecondaryIndexType_e::TOTAL); i++ )
if ( tA.m_dHints[i] != tB.m_dHints[i] )
return false;

return true;
}
};

Expand Down

0 comments on commit 68cf78a

Please sign in to comment.