Skip to content

Commit

Permalink
fixed #234; added warning on unsupported infix length; fixed infix vs…
Browse files Browse the repository at this point in the history
… prefix length set; fixed models at tests 1,2,3,12,54,173,350
  • Loading branch information
tomatolog committed Apr 19, 2018
1 parent 546e229 commit 0c45098
Show file tree
Hide file tree
Showing 12 changed files with 31 additions and 58 deletions.
5 changes: 5 additions & 0 deletions src/searchd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19484,6 +19484,11 @@ ESphAddIndex AddIndex ( const char * szIndexName, const CSphConfigSection & hInd
sphWarning ( "dict=crc deprecated, use dict=keywords instead" );
if ( bWordDict && ( tSettings.m_dPrefixFields.GetLength() || tSettings.m_dInfixFields.GetLength() ) )
sphWarning ( "WARNING: index '%s': prefix_fields and infix_fields has no effect with dict=keywords, ignoring\n", szIndexName );
if ( bWordDict && tSettings.m_iMinInfixLen==1 )
{
sphWarn ( "min_infix_len must be greater than 1, changed to 2" );
tSettings.m_iMinInfixLen = 2;
}

if ( !bPercolate )
tIdx.m_pIndex = sphCreateIndexRT ( tSchema, szIndexName, iRamSize, hIndex["path"].cstr(), bWordDict );
Expand Down
23 changes: 15 additions & 8 deletions src/sphinx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,6 @@ class CSphTokenizerIndex : public CSphIndex
virtual bool Rename ( const char * ) { return false; }
virtual bool Lock () { return false; }
virtual void Unlock () {}
virtual void PostSetup() {}
virtual bool EarlyReject ( CSphQueryContext * , CSphMatch & ) const { return false; }
virtual const CSphSourceStats & GetStats () const { return g_tTmpDummyStat; }
virtual void GetStatus ( CSphIndexStatus* pRes ) const { assert (pRes); if ( pRes ) { pRes->m_iDiskUse = 0; pRes->m_iRamUse = 0;}}
Expand Down Expand Up @@ -1393,7 +1392,6 @@ class CSphIndex_VLN : public CSphIndex

virtual bool Lock ();
virtual void Unlock ();
virtual void PostSetup() {}

virtual bool MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult, int iSorters, ISphMatchSorter ** ppSorters, const CSphMultiQueryArgs & tArgs ) const;
virtual bool MultiQueryEx ( int iQueries, const CSphQuery * pQueries, CSphQueryResult ** ppResults, ISphMatchSorter ** ppSorters, const CSphMultiQueryArgs & tArgs ) const;
Expand Down Expand Up @@ -8637,6 +8635,12 @@ void CSphIndex::Setup ( const CSphIndexSettings & tSettings )
m_tSettings = tSettings;
}

void CSphIndex::PostSetup ()
{
// in case infixes got enabled and no prefix set let's enable prefix of any length
if ( m_pDict && m_pDict->GetSettings().m_bWordDict && m_tSettings.m_iMinPrefixLen==0 && m_tSettings.m_iMinInfixLen>0 )
m_tSettings.m_iMinPrefixLen = 1;
}

void CSphIndex::SetCacheSize ( int iMaxCachedDocs, int iMaxCachedHits )
{
Expand Down Expand Up @@ -9968,7 +9972,7 @@ class CSphHitBuilder
bool CreateIndexFiles ( const char * sDocName, const char * sHitName, const char * sSkipName, bool bInplace, int iWriteBuffer, CSphAutofile & tHit, SphOffset_t * pSharedOffset );
void HitReset ();
void cidxHit ( CSphAggregateHit * pHit, const CSphRowitem * pAttrs );
bool cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader );
bool cidxDone ( int iMemLimit, int & iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader );
int cidxWriteRawVLB ( int fd, CSphWordHit * pHit, int iHits, DWORD * pDocinfo, int iDocinfos, int iStride );

SphOffset_t GetHitfilePos () const { return m_wrHitlist.GetPos (); }
Expand Down Expand Up @@ -10574,7 +10578,7 @@ bool IndexBuildDone ( const BuildHeader_t & tBuildHeader, const WriteHeader_t &
}


bool CSphHitBuilder::cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader )
bool CSphHitBuilder::cidxDone ( int iMemLimit, int & iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader )
{
assert ( pDictHeader );

Expand All @@ -10593,7 +10597,10 @@ bool CSphHitBuilder::cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepoi
{
pDictHeader->m_iInfixCodepointBytes = iMaxCodepointLen;
if ( iMinInfixLen==1 )
sphWarn ( "min_infix_len must be greater 1, clamped" );
{
sphWarn ( "min_infix_len must be greater than 1, changed to 2" );
iMinInfixLen = 2;
}
}

if ( !m_pDict->DictEnd ( pDictHeader, iMemLimit, *m_pLastError, m_pThrottle ) )
Expand Down Expand Up @@ -14234,7 +14241,8 @@ bool CSphIndex_VLN::DoMerge ( const CSphIndex_VLN * pDstIndex, const CSphIndex_V
tFlush.m_dFieldMask.UnsetAll();
tHitBuilder.cidxHit ( &tFlush, NULL );

if ( !tHitBuilder.cidxDone ( iHitBufferSize, pSettings->m_tSettings.m_iMinInfixLen,
int iMinInfixLen = pSettings->m_tSettings.m_iMinInfixLen;
if ( !tHitBuilder.cidxDone ( iHitBufferSize, iMinInfixLen,
pSettings->m_pTokenizer->GetMaxCodepointLength(), &tBuildHeader ) )
return false;

Expand Down Expand Up @@ -17688,8 +17696,7 @@ bool sphExpandGetWords ( const char * sWord, const ExpansionContext_t & tCtx, IS
iPrefix++;

// do not expand prefixes under min length
int iMinLen = Max ( tCtx.m_iMinPrefixLen, tCtx.m_iMinInfixLen );
if ( iPrefix<iMinLen )
if ( iPrefix<tCtx.m_iMinPrefixLen )
return false;

int iBytes = sCodes - sPrefix;
Expand Down
2 changes: 1 addition & 1 deletion src/sphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -3367,7 +3367,7 @@ class CSphIndex : public ISphKeywordsStat
virtual void Unlock () = 0;

/// called when index is loaded and prepared to work
virtual void PostSetup() = 0;
virtual void PostSetup();

public:
/// return index document, bytes totals (FIXME? remove this in favor of GetStatus() maybe?)
Expand Down
12 changes: 3 additions & 9 deletions src/sphinxrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1264,8 +1264,6 @@ struct RtIndex_t : public ISphRtIndex, public ISphNoncopyable, public ISphWordli
virtual void PostSetup();
virtual bool IsRT() const { return true; }

virtual void Setup ( const CSphIndexSettings & tSettings );

virtual int UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphString & sError, CSphString & sWarning );
virtual bool SaveAttributes ( CSphString & sError ) const;
virtual DWORD GetAttributeStatus () const { return m_uDiskAttrStatus; }
Expand Down Expand Up @@ -4690,15 +4688,10 @@ bool RtIndex_t::LoadRamChunk ( DWORD uVersion, bool bRebuildInfixes )
}


void RtIndex_t::Setup ( const CSphIndexSettings & tSettings )
{
m_bStripperInited = true;
m_tSettings = tSettings;
}


void RtIndex_t::PostSetup()
{
ISphRtIndex::PostSetup();

m_iMaxCodepointLength = m_pTokenizer->GetMaxCodepointLength();

// bigram filter
Expand Down Expand Up @@ -12279,6 +12272,7 @@ bool PercolateIndex_c::MultiQueryEx ( int , const CSphQuery * , CSphQueryResult

void PercolateIndex_c::PostSetup()
{
PercolateIndex_i::PostSetup();
m_iMaxCodepointLength = m_pTokenizer->GetMaxCodepointLength();

// bigram filter
Expand Down
33 changes: 0 additions & 33 deletions src/sphinxsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9366,36 +9366,6 @@ class ExtRanker_Export_c : public ExtRanker_T<RankerState_Export_fn>
// RANKER FACTORY
//////////////////////////////////////////////////////////////////////////

static void CheckQueryWord ( const char * szWord, CSphQueryResult * pResult, const CSphIndexSettings & tSettings )
{
if ( ( !tSettings.m_iMinPrefixLen && !tSettings.m_iMinInfixLen ) || !szWord )
return;

int iLen = strlen ( szWord );
bool bHeadStar = szWord[0]=='*';
bool bTailStar = szWord[iLen-1]=='*';
int iLenWOStars = iLen - ( bHeadStar ? 1 : 0 ) - ( bTailStar ? 1 : 0 );
if ( bHeadStar || bTailStar )
{
if ( tSettings.m_iMinInfixLen > 0 && iLenWOStars < tSettings.m_iMinInfixLen )
pResult->m_sWarning.SetSprintf ( "Query word length is less than min infix length. word: '%s' ", szWord );
else
if ( tSettings.m_iMinPrefixLen > 0 && iLenWOStars < tSettings.m_iMinPrefixLen )
pResult->m_sWarning.SetSprintf ( "Query word length is less than min prefix length. word: '%s' ", szWord );
}
}


static void CheckExtendedQuery ( const XQNode_t * pNode, CSphQueryResult * pResult, const CSphIndexSettings & tSettings )
{
ARRAY_FOREACH ( i, pNode->m_dWords )
CheckQueryWord ( pNode->m_dWords[i].m_sWord.cstr(), pResult, tSettings );

ARRAY_FOREACH ( i, pNode->m_dChildren )
CheckExtendedQuery ( pNode->m_dChildren[i], pResult, tSettings );
}


struct ExtQwordOrderbyQueryPos_t
{
bool IsLess ( const ExtQword_t * pA, const ExtQword_t * pB ) const
Expand Down Expand Up @@ -9430,9 +9400,6 @@ ISphRanker * sphCreateRanker ( const XQQuery_t & tXQ, const CSphQuery * pQuery,
// shortcut
const CSphIndex * pIndex = tTermSetup.m_pIndex;

// check the keywords
CheckExtendedQuery ( tXQ.m_pRoot, pResult, pIndex->GetSettings() );

// fill payload mask
DWORD uPayloadMask = 0;
for ( int i=0; i < pIndex->GetMatchSchema().GetFieldsCount(); i++ )
Expand Down
2 changes: 1 addition & 1 deletion test/test_001/model.bin

Large diffs are not rendered by default.

Loading

0 comments on commit 0c45098

Please sign in to comment.