Permalink
Browse files

fixed #234; added warning on unsupported infix length; fixed infix vs…

… prefix length set; fixed models at tests 1,2,3,12,54,173,350
  • Loading branch information...
tomatolog committed Apr 19, 2018
1 parent 546e229 commit 0c4509898393993bc87194d2aca2070395ff7f83
@@ -19484,6 +19484,11 @@ ESphAddIndex AddIndex ( const char * szIndexName, const CSphConfigSection & hInd
sphWarning ( "dict=crc deprecated, use dict=keywords instead" );
if ( bWordDict && ( tSettings.m_dPrefixFields.GetLength() || tSettings.m_dInfixFields.GetLength() ) )
sphWarning ( "WARNING: index '%s': prefix_fields and infix_fields has no effect with dict=keywords, ignoring\n", szIndexName );
if ( bWordDict && tSettings.m_iMinInfixLen==1 )
{
sphWarn ( "min_infix_len must be greater than 1, changed to 2" );
tSettings.m_iMinInfixLen = 2;
}
if ( !bPercolate )
tIdx.m_pIndex = sphCreateIndexRT ( tSchema, szIndexName, iRamSize, hIndex["path"].cstr(), bWordDict );
@@ -1251,7 +1251,6 @@ class CSphTokenizerIndex : public CSphIndex
virtual bool Rename ( const char * ) { return false; }
virtual bool Lock () { return false; }
virtual void Unlock () {}
virtual void PostSetup() {}
virtual bool EarlyReject ( CSphQueryContext * , CSphMatch & ) const { return false; }
virtual const CSphSourceStats & GetStats () const { return g_tTmpDummyStat; }
virtual void GetStatus ( CSphIndexStatus* pRes ) const { assert (pRes); if ( pRes ) { pRes->m_iDiskUse = 0; pRes->m_iRamUse = 0;}}
@@ -1393,7 +1392,6 @@ class CSphIndex_VLN : public CSphIndex
virtual bool Lock ();
virtual void Unlock ();
virtual void PostSetup() {}
virtual bool MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult, int iSorters, ISphMatchSorter ** ppSorters, const CSphMultiQueryArgs & tArgs ) const;
virtual bool MultiQueryEx ( int iQueries, const CSphQuery * pQueries, CSphQueryResult ** ppResults, ISphMatchSorter ** ppSorters, const CSphMultiQueryArgs & tArgs ) const;
@@ -8637,6 +8635,12 @@ void CSphIndex::Setup ( const CSphIndexSettings & tSettings )
m_tSettings = tSettings;
}
void CSphIndex::PostSetup ()
{
// in case infixes got enabled and no prefix set let's enable prefix of any length
if ( m_pDict && m_pDict->GetSettings().m_bWordDict && m_tSettings.m_iMinPrefixLen==0 && m_tSettings.m_iMinInfixLen>0 )
m_tSettings.m_iMinPrefixLen = 1;
}
void CSphIndex::SetCacheSize ( int iMaxCachedDocs, int iMaxCachedHits )
{
@@ -9968,7 +9972,7 @@ class CSphHitBuilder
bool CreateIndexFiles ( const char * sDocName, const char * sHitName, const char * sSkipName, bool bInplace, int iWriteBuffer, CSphAutofile & tHit, SphOffset_t * pSharedOffset );
void HitReset ();
void cidxHit ( CSphAggregateHit * pHit, const CSphRowitem * pAttrs );
bool cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader );
bool cidxDone ( int iMemLimit, int & iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader );
int cidxWriteRawVLB ( int fd, CSphWordHit * pHit, int iHits, DWORD * pDocinfo, int iDocinfos, int iStride );
SphOffset_t GetHitfilePos () const { return m_wrHitlist.GetPos (); }
@@ -10574,7 +10578,7 @@ bool IndexBuildDone ( const BuildHeader_t & tBuildHeader, const WriteHeader_t &
}
bool CSphHitBuilder::cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader )
bool CSphHitBuilder::cidxDone ( int iMemLimit, int & iMinInfixLen, int iMaxCodepointLen, DictHeader_t * pDictHeader )
{
assert ( pDictHeader );
@@ -10593,7 +10597,10 @@ bool CSphHitBuilder::cidxDone ( int iMemLimit, int iMinInfixLen, int iMaxCodepoi
{
pDictHeader->m_iInfixCodepointBytes = iMaxCodepointLen;
if ( iMinInfixLen==1 )
sphWarn ( "min_infix_len must be greater 1, clamped" );
{
sphWarn ( "min_infix_len must be greater than 1, changed to 2" );
iMinInfixLen = 2;
}
}
if ( !m_pDict->DictEnd ( pDictHeader, iMemLimit, *m_pLastError, m_pThrottle ) )
@@ -14234,7 +14241,8 @@ bool CSphIndex_VLN::DoMerge ( const CSphIndex_VLN * pDstIndex, const CSphIndex_V
tFlush.m_dFieldMask.UnsetAll();
tHitBuilder.cidxHit ( &tFlush, NULL );
if ( !tHitBuilder.cidxDone ( iHitBufferSize, pSettings->m_tSettings.m_iMinInfixLen,
int iMinInfixLen = pSettings->m_tSettings.m_iMinInfixLen;
if ( !tHitBuilder.cidxDone ( iHitBufferSize, iMinInfixLen,
pSettings->m_pTokenizer->GetMaxCodepointLength(), &tBuildHeader ) )
return false;
@@ -17688,8 +17696,7 @@ bool sphExpandGetWords ( const char * sWord, const ExpansionContext_t & tCtx, IS
iPrefix++;
// do not expand prefixes under min length
int iMinLen = Max ( tCtx.m_iMinPrefixLen, tCtx.m_iMinInfixLen );
if ( iPrefix<iMinLen )
if ( iPrefix<tCtx.m_iMinPrefixLen )
return false;
int iBytes = sCodes - sPrefix;
@@ -3367,7 +3367,7 @@ class CSphIndex : public ISphKeywordsStat
virtual void Unlock () = 0;
/// called when index is loaded and prepared to work
virtual void PostSetup() = 0;
virtual void PostSetup();
public:
/// return index document, bytes totals (FIXME? remove this in favor of GetStatus() maybe?)
@@ -1264,8 +1264,6 @@ struct RtIndex_t : public ISphRtIndex, public ISphNoncopyable, public ISphWordli
virtual void PostSetup();
virtual bool IsRT() const { return true; }
virtual void Setup ( const CSphIndexSettings & tSettings );
virtual int UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphString & sError, CSphString & sWarning );
virtual bool SaveAttributes ( CSphString & sError ) const;
virtual DWORD GetAttributeStatus () const { return m_uDiskAttrStatus; }
@@ -4690,15 +4688,10 @@ bool RtIndex_t::LoadRamChunk ( DWORD uVersion, bool bRebuildInfixes )
}
void RtIndex_t::Setup ( const CSphIndexSettings & tSettings )
{
m_bStripperInited = true;
m_tSettings = tSettings;
}
void RtIndex_t::PostSetup()
{
ISphRtIndex::PostSetup();
m_iMaxCodepointLength = m_pTokenizer->GetMaxCodepointLength();
// bigram filter
@@ -12279,6 +12272,7 @@ bool PercolateIndex_c::MultiQueryEx ( int , const CSphQuery * , CSphQueryResult
void PercolateIndex_c::PostSetup()
{
PercolateIndex_i::PostSetup();
m_iMaxCodepointLength = m_pTokenizer->GetMaxCodepointLength();
// bigram filter
@@ -9366,36 +9366,6 @@ class ExtRanker_Export_c : public ExtRanker_T<RankerState_Export_fn>
// RANKER FACTORY
//////////////////////////////////////////////////////////////////////////
static void CheckQueryWord ( const char * szWord, CSphQueryResult * pResult, const CSphIndexSettings & tSettings )
{
if ( ( !tSettings.m_iMinPrefixLen && !tSettings.m_iMinInfixLen ) || !szWord )
return;
int iLen = strlen ( szWord );
bool bHeadStar = szWord[0]=='*';
bool bTailStar = szWord[iLen-1]=='*';
int iLenWOStars = iLen - ( bHeadStar ? 1 : 0 ) - ( bTailStar ? 1 : 0 );
if ( bHeadStar || bTailStar )
{
if ( tSettings.m_iMinInfixLen > 0 && iLenWOStars < tSettings.m_iMinInfixLen )
pResult->m_sWarning.SetSprintf ( "Query word length is less than min infix length. word: '%s' ", szWord );
else
if ( tSettings.m_iMinPrefixLen > 0 && iLenWOStars < tSettings.m_iMinPrefixLen )
pResult->m_sWarning.SetSprintf ( "Query word length is less than min prefix length. word: '%s' ", szWord );
}
}
static void CheckExtendedQuery ( const XQNode_t * pNode, CSphQueryResult * pResult, const CSphIndexSettings & tSettings )
{
ARRAY_FOREACH ( i, pNode->m_dWords )
CheckQueryWord ( pNode->m_dWords[i].m_sWord.cstr(), pResult, tSettings );
ARRAY_FOREACH ( i, pNode->m_dChildren )
CheckExtendedQuery ( pNode->m_dChildren[i], pResult, tSettings );
}
struct ExtQwordOrderbyQueryPos_t
{
bool IsLess ( const ExtQword_t * pA, const ExtQword_t * pB ) const
@@ -9430,9 +9400,6 @@ ISphRanker * sphCreateRanker ( const XQQuery_t & tXQ, const CSphQuery * pQuery,
// shortcut
const CSphIndex * pIndex = tTermSetup.m_pIndex;
// check the keywords
CheckExtendedQuery ( tXQ.m_pRoot, pResult, pIndex->GetSettings() );
// fill payload mask
DWORD uPayloadMask = 0;
for ( int i=0; i < pIndex->GetMatchSchema().GetFieldsCount(); i++ )

Large diffs are not rendered by default.

Oops, something went wrong.
Oops, something went wrong.

0 comments on commit 0c45098

Please sign in to comment.