Skip to content

Commit

Permalink
Fix #430, memory leak tokenizer management
Browse files Browse the repository at this point in the history
Tokenizer switched to refcount management model, many code refactored.
  • Loading branch information
klirichek committed Sep 6, 2018
1 parent 116c5f1 commit 5ee7f20
Show file tree
Hide file tree
Showing 20 changed files with 264 additions and 322 deletions.
3 changes: 1 addition & 2 deletions src/gtests_rtstuff.cpp
Expand Up @@ -191,7 +191,6 @@ class RT : public ::testing::Test

virtual void TearDown ()
{
SafeDelete ( pTok );
sphRTDone ();
DeleteIndexFiles ( RT_INDEX_FILE_NAME );
}
Expand All @@ -200,7 +199,7 @@ class RT : public ::testing::Test
CSphSchema tSrcSchema;
CSphString sError, sWarning;

ISphTokenizer * pTok = nullptr;
ISphTokenizerRefPtr_c pTok;

CSphDictSettings tDictSettings;
};
Expand Down
21 changes: 5 additions & 16 deletions src/gtests_tokenizer.cpp
Expand Up @@ -30,7 +30,7 @@ class Tokenizer : public ::testing::Test
if ( !( uMode & TOK_NO_SHORT ) )
tSettings.m_iMinWordLen = 2;

ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tSettings, NULL, sError );
ISphTokenizerRefPtr_c pTokenizer { ISphTokenizer::Create ( tSettings, NULL, sError ) };
if ( !( uMode & TOK_NO_DASH ) )
{
Verify ( pTokenizer->SetCaseFolding ( "-, 0..9, A..Z->a..z, _, a..z, U+80..U+FF", sError ) );
Expand All @@ -51,20 +51,11 @@ class Tokenizer : public ::testing::Test
// however, Clone() adds backslash as a special
// and that must be done *after* SetCaseFolding, otherwise it's not special any more
ISphTokenizer * pTokenizer1 = pTokenizer->Clone ( SPH_CLONE_QUERY );
SafeDelete ( pTokenizer );

return pTokenizer1;
}


void TearDown () override
{
SafeDelete ( m_pTokenizer );
}


ISphTokenizer * m_pTokenizer = nullptr;
ISphTokenizer *& pTokenizer = m_pTokenizer;
ISphTokenizerRefPtr_c m_pTokenizer;
ISphTokenizerRefPtr_c& pTokenizer = m_pTokenizer;
CSphString sError;
};

Expand Down Expand Up @@ -541,7 +532,7 @@ TEST_P ( TokenizerP, short_token_handling )
NULL
};

ISphTokenizer * pShortTokenizer = m_pTokenizer->Clone ( SPH_CLONE_QUERY );
ISphTokenizerRefPtr_c pShortTokenizer { m_pTokenizer->Clone ( SPH_CLONE_QUERY ) };
pShortTokenizer->AddPlainChar ( '*' );

CSphTokenizerSettings tSettings = pShortTokenizer->GetSettings ();
Expand All @@ -563,8 +554,6 @@ TEST_P ( TokenizerP, short_token_handling )
ASSERT_FALSE ( dTestsShort[iCur] );
iCur++;
}

SafeDelete ( pShortTokenizer );
}

TEST_P( TokenizerP, boundaries )
Expand Down Expand Up @@ -676,7 +665,7 @@ class QueryParser : public Tokenizer
tSchema.AddField ( "title" );
tSchema.AddField ( "body" );

CSphScopedPtr<ISphTokenizer> pBase ( sphCreateUTF8Tokenizer () );
ISphTokenizerRefPtr_c pBase ( sphCreateUTF8Tokenizer () );
CSphTokenizerSettings tTokenizerSetup;
tTokenizerSetup.m_iMinWordLen = 2;
tTokenizerSetup.m_sSynonymsFile = g_sTmpfile;
Expand Down
4 changes: 1 addition & 3 deletions src/indexer.cpp
Expand Up @@ -932,7 +932,7 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
CSphDictSettings tDictSettings;
sphConfDictionary ( hIndex, tDictSettings );

ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tTokSettings, NULL, sError );
ISphTokenizerRefPtr_c pTokenizer { ISphTokenizer::Create ( tTokSettings, NULL, sError ) };
if ( !pTokenizer )
sphDie ( "index '%s': %s", sIndexName, sError.cstr() );

Expand Down Expand Up @@ -1101,7 +1101,6 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
{
fprintf ( stdout, "ERROR: index '%s': failed to configure some of the sources, will not index.\n", sIndexName );
SafeDelete ( pDict );
SafeDelete ( pTokenizer );
return false;
}

Expand Down Expand Up @@ -1157,7 +1156,6 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
tDict.Save ( g_sBuildStops, g_iTopStops, g_bBuildFreqs );

SafeDelete ( pFieldFilter );
SafeDelete ( pTokenizer );

bOK = true;

Expand Down
4 changes: 2 additions & 2 deletions src/indextool.cpp
Expand Up @@ -65,10 +65,10 @@ void ApplyMorphology ( CSphIndex * pIndex )
dInBuffer.Add(0);
dOutBuffer.Reserve ( dInBuffer.GetLength() );

CSphScopedPtr<ISphTokenizer> pTokenizer ( pIndex->GetTokenizer()->Clone ( SPH_CLONE_INDEX ) );
ISphTokenizerRefPtr_c pTokenizer { pIndex->GetTokenizer()->Clone ( SPH_CLONE_INDEX ) };
CSphDict * pDict = pIndex->GetDictionary();
BYTE * sBufferToDump = &dInBuffer[0];
if ( pTokenizer.Ptr() )
if ( pTokenizer )
{
pTokenizer->SetBuffer ( &dInBuffer[0], dInBuffer.GetLength() );
while ( BYTE * sToken = pTokenizer->GetToken() )
Expand Down
4 changes: 2 additions & 2 deletions src/searchd.cpp
Expand Up @@ -18088,7 +18088,7 @@ bool RotateIndexGreedy ( const ServedIndex_c * pIndex, ServedDesc_t &tWlockedInd
}

// try to use new index
CSphScopedPtr<ISphTokenizer> pTokenizer ( tWlockedIndex.m_pIndex->LeakTokenizer () ); // FIXME! disable support of that old indexes and remove this bullshit
ISphTokenizerRefPtr_c pTokenizer { tWlockedIndex.m_pIndex->LeakTokenizer () }; // FIXME! disable support of that old indexes and remove this bullshit
CSphScopedPtr<CSphDict> pDictionary ( tWlockedIndex.m_pIndex->LeakDictionary () );

// bool bRolledBack = false;
Expand Down Expand Up @@ -18118,7 +18118,7 @@ bool RotateIndexGreedy ( const ServedIndex_c * pIndex, ServedDesc_t &tWlockedInd
sphWarning ( "rotating index '%s': %s", sIndex, tWlockedIndex.m_pIndex->GetLastWarning().cstr() );

if ( !tWlockedIndex.m_pIndex->GetTokenizer () )
tWlockedIndex.m_pIndex->SetTokenizer ( pTokenizer.LeakPtr () );
tWlockedIndex.m_pIndex->SetTokenizer ( pTokenizer );

if ( !tWlockedIndex.m_pIndex->GetDictionary () )
tWlockedIndex.m_pIndex->SetDictionary ( pDictionary.LeakPtr () );
Expand Down

0 comments on commit 5ee7f20

Please sign in to comment.