Skip to content
Permalink
Browse files
fixed #625 lost form flag (exact) for sequence of lemmatizer; added r…
…egressions to test 207
  • Loading branch information
tomatolog committed Jan 9, 2019
1 parent 6b95d48 commit 33b4573529e7c7c7bce19bf9d54054866f30d99a
Showing with 55 additions and 1 deletion.
  1. +2 −0 src/sphinxaot.cpp
  2. +1 −1 test/test_207/model.bin
  3. +52 −0 test/test_207/test.xml
@@ -1520,6 +1520,7 @@ class CSphAotTokenizerRu : public CSphAotTokenizerTmpl
// ok, time to work on a next word
assert ( m_iCurrent<0 );
BYTE * pToken = Base::GetToken();
m_eTokenMorph = m_pTokenizer->GetTokenMorph();
if ( !pToken )
return nullptr;

@@ -1650,6 +1651,7 @@ class CSphAotTokenizer : public CSphAotTokenizerTmpl
// ok, time to work on a next word
assert ( m_iCurrent<0 );
BYTE * pToken = Base::GetToken();
m_eTokenMorph = m_pTokenizer->GetTokenMorph();
if ( !pToken )
return nullptr;

Large diffs are not rendered by default.

@@ -298,6 +298,48 @@ index test_over
min_word_len = 1
min_infix_len = 2
}

source src_multi_lem
{
type = mysql
<sql_settings/>
sql_query = SELECT 1, 11 as idd, 'маленькие ноги' as text UNION SELECT 2, 22 as idd, 'маленькая нога' as text
sql_query_pre = SET NAMES utf8
sql_attr_uint = idd
}

index multi_lem11
{
source = src_multi_lem
path = <data_path/>/multi_lem11
dict = keywords
charset_table = 0..9, A..Z->a..z, _, ., -, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F, U+0401->U+0435, U+0451->U+0435
morphology = lemmatize_ru_all, lemmatize_en_all
index_exact_words = 1
min_word_len = 1
}

index multi_lem12 : multi_lem11
{
source = src_multi_lem
path = <data_path/>/multi_lem12
morphology = lemmatize_ru_all, lemmatize_en_all, lemmatize_de_all
}

index multi_lem2 : multi_lem11
{
source = src_multi_lem
path = <data_path/>/multi_lem2
morphology = lemmatize_ru, lemmatize_en_all
}

index multi_lem3 : multi_lem11
{
source = src_multi_lem
path = <data_path/>/multi_lem3
morphology = lemmatize_ru_all, lemmatize_en
}

</config>

<db_create>
@@ -566,6 +608,16 @@ CREATE TABLE test_table
<sphinxql>SELECT id FROM test_en_all1 WHERE MATCH('stops'); show meta</sphinxql>
<sphinxql>SELECT id FROM test_en_all1 WHERE MATCH('stop'); show meta</sphinxql>

<!-- regression form flag(exact) lost for lemmatizer sequence -->
<sphinxql>SELECT * FROM multi_lem11 WHERE MATCH('=маленькая')</sphinxql>
<sphinxql>SELECT * FROM multi_lem11 WHERE MATCH('=маленькие')</sphinxql>
<sphinxql>SELECT * FROM multi_lem12 WHERE MATCH('=маленькая')</sphinxql>
<sphinxql>SELECT * FROM multi_lem12 WHERE MATCH('=маленькие')</sphinxql>
<sphinxql>SELECT * FROM multi_lem2 WHERE MATCH('=маленькая')</sphinxql>
<sphinxql>SELECT * FROM multi_lem2 WHERE MATCH('=маленькие')</sphinxql>
<sphinxql>SELECT * FROM multi_lem3 WHERE MATCH('=маленькая')</sphinxql>
<sphinxql>SELECT * FROM multi_lem3 WHERE MATCH('=маленькие')</sphinxql>

</sphqueries>

</test>

0 comments on commit 33b4573

Please sign in to comment.