Fix #430, memory leak tokenizer management

Tokenizer switched to refcount management model, many code refactored.
manticoresoftware · Sep 6, 2018 · 5ee7f20 · 5ee7f20
1 parent 116c5f1
commit 5ee7f20
Show file tree

Hide file tree

Showing 20 changed files with 264 additions and 322 deletions.
diff --git a/src/gtests_rtstuff.cpp b/src/gtests_rtstuff.cpp
@@ -191,7 +191,6 @@ class RT : public ::testing::Test
 
 	virtual void TearDown ()
 	{
-		SafeDelete ( pTok );
 		sphRTDone ();
 		DeleteIndexFiles ( RT_INDEX_FILE_NAME );
 	}
@@ -200,7 +199,7 @@ class RT : public ::testing::Test
 	CSphSchema tSrcSchema;
 	CSphString sError, sWarning;
 
-	ISphTokenizer * pTok = nullptr;
+	ISphTokenizerRefPtr_c pTok;
 
 	CSphDictSettings tDictSettings;
 };

diff --git a/src/gtests_tokenizer.cpp b/src/gtests_tokenizer.cpp
@@ -30,7 +30,7 @@ class Tokenizer : public ::testing::Test
 		if ( !( uMode & TOK_NO_SHORT ) )
 			tSettings.m_iMinWordLen = 2;
 
-		ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tSettings, NULL, sError );
+		ISphTokenizerRefPtr_c pTokenizer { ISphTokenizer::Create ( tSettings, NULL, sError ) };
 		if ( !( uMode & TOK_NO_DASH ) )
 		{
 			Verify ( pTokenizer->SetCaseFolding ( "-, 0..9, A..Z->a..z, _, a..z, U+80..U+FF", sError ) );
@@ -51,20 +51,11 @@ class Tokenizer : public ::testing::Test
 		// however, Clone() adds backslash as a special
 		// and that must be done *after* SetCaseFolding, otherwise it's not special any more
 		ISphTokenizer * pTokenizer1 = pTokenizer->Clone ( SPH_CLONE_QUERY );
-		SafeDelete ( pTokenizer );
-
 		return pTokenizer1;
 	}
 
-
-	void TearDown () override
-	{
-		SafeDelete ( m_pTokenizer );
-	}
-
-
-	ISphTokenizer * m_pTokenizer = nullptr;
-	ISphTokenizer *& pTokenizer = m_pTokenizer;
+	ISphTokenizerRefPtr_c m_pTokenizer;
+	ISphTokenizerRefPtr_c& pTokenizer = m_pTokenizer;
 	CSphString sError;
 };
 
@@ -541,7 +532,7 @@ TEST_P ( TokenizerP, short_token_handling )
 		NULL
 	};
 
-	ISphTokenizer * pShortTokenizer = m_pTokenizer->Clone ( SPH_CLONE_QUERY );
+	ISphTokenizerRefPtr_c pShortTokenizer { m_pTokenizer->Clone ( SPH_CLONE_QUERY ) };
 	pShortTokenizer->AddPlainChar ( '*' );
 
 	CSphTokenizerSettings tSettings = pShortTokenizer->GetSettings ();
@@ -563,8 +554,6 @@ TEST_P ( TokenizerP, short_token_handling )
 		ASSERT_FALSE ( dTestsShort[iCur] );
 		iCur++;
 	}
-
-	SafeDelete ( pShortTokenizer );
 }
 
 TEST_P( TokenizerP, boundaries )
@@ -676,7 +665,7 @@ class QueryParser : public Tokenizer
 		tSchema.AddField ( "title" );
 		tSchema.AddField ( "body" );
 
-		CSphScopedPtr<ISphTokenizer> pBase ( sphCreateUTF8Tokenizer () );
+		ISphTokenizerRefPtr_c pBase ( sphCreateUTF8Tokenizer () );
 		CSphTokenizerSettings tTokenizerSetup;
 		tTokenizerSetup.m_iMinWordLen = 2;
 		tTokenizerSetup.m_sSynonymsFile = g_sTmpfile;

diff --git a/src/indexer.cpp b/src/indexer.cpp
@@ -932,7 +932,7 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
 	CSphDictSettings tDictSettings;
 	sphConfDictionary ( hIndex, tDictSettings );
 
-	ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tTokSettings, NULL, sError );
+	ISphTokenizerRefPtr_c pTokenizer { ISphTokenizer::Create ( tTokSettings, NULL, sError ) };
 	if ( !pTokenizer )
 		sphDie ( "index '%s': %s", sIndexName, sError.cstr() );
 
@@ -1101,7 +1101,6 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
 	{
 		fprintf ( stdout, "ERROR: index '%s': failed to configure some of the sources, will not index.\n", sIndexName );
 		SafeDelete ( pDict );
-		SafeDelete ( pTokenizer );
 		return false;
 	}
 
@@ -1157,7 +1156,6 @@ bool DoIndex ( const CSphConfigSection & hIndex, const char * sIndexName,
 		tDict.Save ( g_sBuildStops, g_iTopStops, g_bBuildFreqs );
 
 		SafeDelete ( pFieldFilter );
-		SafeDelete ( pTokenizer );
 
 		bOK = true;
 

diff --git a/src/indextool.cpp b/src/indextool.cpp
@@ -65,10 +65,10 @@ void ApplyMorphology ( CSphIndex * pIndex )
 	dInBuffer.Add(0);
 	dOutBuffer.Reserve ( dInBuffer.GetLength() );
 
-	CSphScopedPtr<ISphTokenizer> pTokenizer ( pIndex->GetTokenizer()->Clone ( SPH_CLONE_INDEX ) );
+	ISphTokenizerRefPtr_c pTokenizer { pIndex->GetTokenizer()->Clone ( SPH_CLONE_INDEX ) };
 	CSphDict * pDict = pIndex->GetDictionary();
 	BYTE * sBufferToDump = &dInBuffer[0];
-	if ( pTokenizer.Ptr() )
+	if ( pTokenizer )
 	{
 		pTokenizer->SetBuffer ( &dInBuffer[0], dInBuffer.GetLength() );
 		while ( BYTE * sToken = pTokenizer->GetToken() )

diff --git a/src/searchd.cpp b/src/searchd.cpp
@@ -18088,7 +18088,7 @@ bool RotateIndexGreedy ( const ServedIndex_c * pIndex, ServedDesc_t &tWlockedInd
 	}
 
 	// try to use new index
-	CSphScopedPtr<ISphTokenizer> pTokenizer ( tWlockedIndex.m_pIndex->LeakTokenizer () ); // FIXME! disable support of that old indexes and remove this bullshit
+	ISphTokenizerRefPtr_c pTokenizer { tWlockedIndex.m_pIndex->LeakTokenizer () }; // FIXME! disable support of that old indexes and remove this bullshit
 	CSphScopedPtr<CSphDict> pDictionary ( tWlockedIndex.m_pIndex->LeakDictionary () );
 
 //	bool bRolledBack = false;
@@ -18118,7 +18118,7 @@ bool RotateIndexGreedy ( const ServedIndex_c * pIndex, ServedDesc_t &tWlockedInd
 		sphWarning ( "rotating index '%s': %s", sIndex, tWlockedIndex.m_pIndex->GetLastWarning().cstr() );
 
 	if ( !tWlockedIndex.m_pIndex->GetTokenizer () )
-		tWlockedIndex.m_pIndex->SetTokenizer ( pTokenizer.LeakPtr () );
+		tWlockedIndex.m_pIndex->SetTokenizer ( pTokenizer );
 
 	if ( !tWlockedIndex.m_pIndex->GetDictionary () )
 		tWlockedIndex.m_pIndex->SetDictionary ( pDictionary.LeakPtr () );