Skip to content

Commit

Permalink
Revert "FTS Tokenizer"
Browse files Browse the repository at this point in the history
This reverts commit 0bed426.
  • Loading branch information
GeertBosch committed Mar 30, 2015
1 parent 465bb26 commit edc6739
Show file tree
Hide file tree
Showing 21 changed files with 100 additions and 355 deletions.
1 change: 0 additions & 1 deletion src/mongo/db/fts/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ baseEnv.Library('base', [
'fts_spec.cpp',
'fts_spec_legacy.cpp',
'fts_language.cpp',
'fts_basic_tokenizer.cpp',
'fts_util.cpp',
'fts_element_iterator.cpp',
'stemmer.cpp',
Expand Down
90 changes: 0 additions & 90 deletions src/mongo/db/fts/fts_basic_tokenizer.cpp

This file was deleted.

79 changes: 0 additions & 79 deletions src/mongo/db/fts/fts_basic_tokenizer.h

This file was deleted.

6 changes: 0 additions & 6 deletions src/mongo/db/fts/fts_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
#include <string>

#include "mongo/base/init.h"
#include "mongo/db/fts/fts_basic_tokenizer.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/string_map.h"
Expand Down Expand Up @@ -81,10 +79,6 @@ namespace mongo {
LanguageMapV1 languageMapV1;
}

std::unique_ptr<FTSTokenizer> BasicFTSLanguage::createTokenizer() const {
return stdx::make_unique<BasicFTSTokenizer>(this);
}

MONGO_INITIALIZER_GROUP( FTSAllLanguagesRegistered, MONGO_NO_PREREQUISITES,
MONGO_NO_DEPENDENTS );

Expand Down
24 changes: 4 additions & 20 deletions src/mongo/db/fts/fts_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,8 @@ namespace mongo {

namespace fts {

class FTSTokenizer;

#define MONGO_FTS_LANGUAGE_DECLARE( language, name, version ) \
BasicFTSLanguage language; \
FTSLanguage language; \
MONGO_INITIALIZER_GENERAL( language, MONGO_NO_PREREQUISITES, \
( "FTSAllLanguagesRegistered" ) ) \
( ::mongo::InitializerContext* context ) { \
Expand Down Expand Up @@ -72,20 +70,12 @@ namespace mongo {
/** Create an uninitialized language. */
FTSLanguage();

virtual ~FTSLanguage() {}

/**
* Returns the language as a std::string in canonical form (lowercased English name). It is
* an error to call str() on an uninitialized language.
*/
const std::string& str() const;

/**
* Returns a new FTSTokenizer instance for this language.
* Lifetime is scoped to FTSLanguage (which are currently all process lifetime)
*/
virtual std::unique_ptr<FTSTokenizer> createTokenizer() const = 0;

/**
* Register std::string 'languageName' as a new language with text index version
* 'textIndexVersion'. Saves the resulting language to out-argument 'languageOut'.
Expand Down Expand Up @@ -130,15 +120,9 @@ namespace mongo {

typedef StatusWith<const FTSLanguage*> StatusWithFTSLanguage;


class BasicFTSLanguage : public FTSLanguage {
public:
std::unique_ptr<FTSTokenizer> createTokenizer() const override;
};

extern BasicFTSLanguage languagePorterV1;
extern BasicFTSLanguage languageEnglishV2;
extern BasicFTSLanguage languageFrenchV2;
extern FTSLanguage languagePorterV1;
extern FTSLanguage languageEnglishV2;
extern FTSLanguage languageFrenchV2;

}
}
31 changes: 17 additions & 14 deletions src/mongo/db/fts/fts_matcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#include "mongo/platform/basic.h"

#include "mongo/db/fts/fts_matcher.h"
#include "mongo/db/fts/fts_tokenizer.h"
#include "mongo/db/fts/fts_element_iterator.h"
#include "mongo/platform/strcasestr.h"

Expand Down Expand Up @@ -97,13 +96,15 @@ namespace mongo {

bool FTSMatcher::_hasPositiveTerm_string( const FTSLanguage* language,
const string& raw ) const {
std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());

tokenizer->reset(raw.c_str(), _query.getCaseSensitive());

while (tokenizer->moveNext()) {
string word = tokenizer->get().toString();
if (_query.getPositiveTerms().count(word) > 0) {
Tokenizer i( *language, raw );
Stemmer stemmer( *language );
while ( i.more() ) {
Token t = i.next();
if ( t.type != Token::TEXT ) {
continue;
}
string word = stemmer.stem( _query.normalizeString( t.data ) );
if ( _query.getPositiveTerms().count( word ) > 0 ) {
return true;
}
}
Expand All @@ -129,12 +130,14 @@ namespace mongo {

bool FTSMatcher::_hasNegativeTerm_string( const FTSLanguage* language,
const string& raw ) const {
std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());

tokenizer->reset(raw.c_str(), _query.getCaseSensitive());

while (tokenizer->moveNext()) {
string word = tokenizer->get().toString();
Tokenizer i( *language, raw );
Stemmer stemmer( *language );
while ( i.more() ) {
Token t = i.next();
if ( t.type != Token::TEXT ) {
continue;
}
string word = stemmer.stem( _query.normalizeString( t.data ) );
if ( _query.getNegatedTerms().count( word ) > 0 ) {
return true;
}
Expand Down
Loading

0 comments on commit edc6739

Please sign in to comment.