Skip to content
Permalink
Browse files

fixed #870 PQ to work with morphology and stemmers; added regressions…

… to test 321
  • Loading branch information...
tomatolog committed Aug 26, 2019
1 parent 912b048 commit 6b8c4242ef77c3fa4d0ccb7d76d81714b6728f0b
Showing with 38 additions and 12 deletions.
  1. +14 −1 src/sphinxpq.cpp
  2. +8 −10 src/sphinxpq.h
  3. +1 −1 test/test_321/model.bin
  4. +15 −0 test/test_321/test.xml
@@ -995,6 +995,19 @@ bool PercolateQwordSetup_c::QwordSetup ( ISphQword * pQword ) const
}


SphWordID_t PercolateDictProxy_c::GetWordID ( BYTE * pWord )
{
assert ( m_pDict );
assert ( !m_bHasMorph || m_pDictMorph );

// apply stemmers
if ( m_bHasMorph )
m_pDictMorph->GetWordID ( pWord );

return const_cast<DictMap_t *>(m_pDict)->GetTerm ( pWord );
}


SphWordID_t DictMap_t::GetTerm ( BYTE * sWord ) const
{
const DictTerm_t * pTerm = m_hTerms.Find ( sphFNV64 ( sWord ) );
@@ -1007,7 +1020,7 @@ SphWordID_t DictMap_t::GetTerm ( BYTE * sWord ) const

PercolateMatchContext_t * PercolateIndex_c::CreateMatchContext ( const RtSegment_t * pSeg, const SegmentReject_t &tReject )
{
return new PercolateMatchContext_t ( pSeg, m_iMaxCodepointLength, m_pDict->HasMorphology (), this
return new PercolateMatchContext_t ( pSeg, m_iMaxCodepointLength, m_pDict->HasMorphology(), GetStatelessDict ( m_pDict ), this
, m_tSchema, tReject );
}

@@ -166,10 +166,12 @@ class PercolateDictProxy_c : public CSphDict
{
const DictMap_t * m_pDict = nullptr;
const bool m_bHasMorph = false;
CSphDictRefPtr_c m_pDictMorph { nullptr };

public:
explicit PercolateDictProxy_c ( bool bHasMorph )
explicit PercolateDictProxy_c ( bool bHasMorph, CSphDict * pDictMorph )
: m_bHasMorph ( bHasMorph )
, m_pDictMorph ( pDictMorph )
{
}

@@ -179,11 +181,7 @@ class PercolateDictProxy_c : public CSphDict
}

// these only got called actually
SphWordID_t GetWordID ( BYTE * pWord ) final
{
assert ( m_pDict );
return const_cast<DictMap_t *>(m_pDict)->GetTerm ( pWord );
}
SphWordID_t GetWordID ( BYTE * pWord ) final;

SphWordID_t GetWordIDNonStemmed ( BYTE * pWord ) final
{
@@ -243,10 +241,10 @@ struct PercolateMatchContext_t : public PQMatchContextResult_t
const bool m_bUtf8 = false;
Warner_c m_dMsg;

PercolateMatchContext_t ( const RtSegment_t * pSeg, int iMaxCodepointLength, bool bHasMorph
, const PercolateIndex_i * pIndex, const ISphSchema &tSchema
, const SegmentReject_t &tReject )
: m_tDictMap ( bHasMorph )
PercolateMatchContext_t ( const RtSegment_t * pSeg, int iMaxCodepointLength, bool bHasMorph, CSphDict * pDictMorph
, const PercolateIndex_i * pIndex, const ISphSchema & tSchema
, const SegmentReject_t & tReject )
: m_tDictMap ( bHasMorph, pDictMorph )
, m_tSchema ( tSchema )
, m_tReject ( tReject )
, m_bUtf8 ( iMaxCodepointLength>1 )

Large diffs are not rendered by default.

@@ -57,6 +57,16 @@ index pq_mva
rt_attr_multi = mva3
}

index pq_stem
{
type = percolate
path = <data_path/>/pq_stem
rt_field = title
rt_attr_uint = gid

index_exact_words = 1
morphology = stem_ru
}

</config>

@@ -213,6 +223,11 @@ index pq_mva
<sphinxql>insert into pq_mva (query, tags) values('test1', 'q33')</sphinxql>
<sphinxql>select id, count(*) c, tags from pq_mva group by tags</sphinxql>

<!-- regression stemmed terms missed -->
<sphinxql>INSERT INTO pq_stem VALUES ( 'проверка' )</sphinxql>
<sphinxql>INSERT INTO pq_stem VALUES ( 'проверки' )</sphinxql>
<sphinxql>CALL PQ ('pq_stem', 'была проверка', 1 as docs, 0 as docs_json )</sphinxql>
<sphinxql>CALL PQ ('pq_stem', 'проверки делом', 1 as docs, 0 as docs_json )</sphinxql>
</sphqueries>

</test>

0 comments on commit 6b8c424

Please sign in to comment.
You can’t perform that action at this time.