Skip to content
Permalink
Browse files

fixed #1068 indexer produces bad index with negative skiplist offset …

…on large data; set index version to 58
  • Loading branch information
tomatolog committed Nov 15, 2019
1 parent 996de77 commit 6e3fc9e88941c9427410f9d0adeebb7dd09a900f
Showing with 7 additions and 7 deletions.
  1. +6 −6 src/sphinx.cpp
  2. +1 −1 src/sphinxint.h
int m_iDocs;
int m_iHits;
BYTE m_uHint;
int m_iSkiplistPos; ///< position in .spe file; not exactly likely to hit 2B
int64_t m_iSkiplistPos; ///< position in .spe file
};

struct DictBlock_t
tEntry.m_iHits = pBin->UnzipInt();
tEntry.m_uHint = (BYTE) pBin->ReadByte();
if ( tEntry.m_iDocs > m_iSkiplistBlockSize )
tEntry.m_iSkiplistPos = pBin->UnzipInt();
tEntry.m_iSkiplistPos = pBin->UnzipOffset();
else
tEntry.m_iSkiplistPos = 0;
}
if ( tWord.m_uHint )
m_wrDict.PutByte ( tWord.m_uHint );
if ( tWord.m_iDocs > m_iSkiplistBlockSize )
m_wrDict.ZipInt ( tWord.m_iSkiplistPos );
m_wrDict.ZipOffset ( tWord.m_iSkiplistPos );

// build infixes
if ( pInfixer )
m_wrTmpDict.PutByte ( pWord->m_uHint );
assert ( ( pWord->m_iDocs > m_iSkiplistBlockSize )==( pWord->m_iSkiplistPos!=0 ) );
if ( pWord->m_iDocs > m_iSkiplistBlockSize )
m_wrTmpDict.ZipInt ( pWord->m_iSkiplistPos );
m_wrTmpDict.ZipOffset ( pWord->m_iSkiplistPos );
}

tBlock.m_iLen = (int)( m_wrTmpDict.GetPos() - tBlock.m_iPos );
pWord->m_uHint = sphDoclistHintPack ( tEntry.m_iDocs, tEntry.m_iDoclistLength );
pWord->m_iSkiplistPos = 0;
if ( tEntry.m_iDocs > m_iSkiplistBlockSize )
pWord->m_iSkiplistPos = (int)( tEntry.m_iSkiplistOffset );
pWord->m_iSkiplistPos = tEntry.m_iSkiplistOffset;
}

SphWordID_t CSphDictKeywords::GetWordID ( BYTE * pWord )
m_uHint = ( m_iDocs>=DOCLIST_HINT_THRESH ) ? *m_pBuf++ : 0;
m_iDoclistHint = DoclistHintUnpack ( m_iDocs, m_uHint );
if ( m_iDocs > m_iSkiplistBlockSize )
m_iSkiplistOffset = sphUnzipInt ( m_pBuf );
m_iSkiplistOffset = sphUnzipOffset ( m_pBuf );
else
m_iSkiplistOffset = 0;

@@ -71,7 +71,7 @@ inline const char * strerrorm ( int errnum )
//////////////////////////////////////////////////////////////////////////

const DWORD INDEX_MAGIC_HEADER = 0x58485053; ///< my magic 'SPHX' header
const DWORD INDEX_FORMAT_VERSION = 57; ///< my format version
const DWORD INDEX_FORMAT_VERSION = 58; ///< my format version

const char MAGIC_SYNONYM_WHITESPACE = 1; // used internally in tokenizer only
const char MAGIC_CODE_SENTENCE = 2; // emitted from tokenizer on sentence boundary

0 comments on commit 6e3fc9e

Please sign in to comment.
You can’t perform that action at this time.