Skip to content

Commit

Permalink
fixed #2504 daemon crash on snippet generation for empty documents or…
Browse files Browse the repository at this point in the history
… empty query; added regressions to test 62
  • Loading branch information
Stanislav Klinov committed Jul 21, 2016
1 parent 7ce29e9 commit e14a9bc
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
5 changes: 2 additions & 3 deletions src/sphinxexcerpt.cpp
Expand Up @@ -440,7 +440,7 @@ void SnippetsDocIndex_c::ParseQuery ( const char * sQuery, ISphTokenizer * pToke
if ( !m_bQueryMode )
{
// parse bag-of-words query
int iQueryLen = strlen ( sQuery ); // FIXME!!! get length as argument
int iQueryLen = sQuery ? strlen ( sQuery ) : 0; // FIXME!!! get length as argument
pTokenizer->SetBuffer ( (BYTE *)sQuery, iQueryLen );

BYTE * sWord = NULL;
Expand Down Expand Up @@ -2997,7 +2997,6 @@ static void TokenizeDocument ( T & tFunctor, const CSphHTMLStripper * pStripper,
const char * pStartPtr = pTokenizer->GetBufferPtr ();
const char * pLastTokenEnd = pStartPtr;
const char * pBufferEnd = pTokenizer->GetBufferEnd();
assert ( pStartPtr && pLastTokenEnd );

BYTE sNonStemmed [ 3*SPH_MAX_WORD_LEN+4];

Expand Down Expand Up @@ -3853,7 +3852,7 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, cons
pStripper = NULL;

// FIXME!!! check on real data (~100 Mb) as stripper changes len
int iDataLen = strlen ( pData );
int iDataLen = pData ? strlen ( pData ) : 0;

DoHighlighting ( tOptions, pIndex->GetSettings(), tExtQuery, eExtQuerySPZ, pData, iDataLen, pDict, pDocTokenizer, pStripper,
sWarning, sError, pQueryTokenizer, tOptions.m_dRes );
Expand Down
2 changes: 1 addition & 1 deletion test/test_062/model.bin
@@ -1 +1 @@
a:1:{i:0;a:1:{i:0;a:86:{i:0;s:14:"^sphinx month$";i:1;a:1:{i:0;s:113:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per [B]month[A].";}i:2;s:16:"^sphinx queries$";i:3;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:4;s:16:"^clusters month$";i:5;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:6;s:12:"^*inx *bytes";i:7;a:1:{i:0;s:113:"[B]Sphinx[A] clusters scale to billions of documents, [B]terabytes[A] of data, and billions of queries per month.";}i:8;s:3:"*i*";i:9;a:1:{i:0;s:125:"[B]Sphinx[A] clusters scale to [B]billions[A] of documents, terabytes of data, and [B]billions[A] of [B]queries[A] per month.";}i:10;s:4:"*on*";i:11;a:1:{i:0;s:119:"Sphinx clusters scale to [B]billions[A] of documents, terabytes of data, and [B]billions[A] of queries per [B]month[A].";}i:12;s:2:"*s";i:13;a:1:{i:0;s:137:"Sphinx [B]clusters[A] scale to [B]billions[A] of [B]documents[A], [B]terabytes[A] of data, and [B]billions[A] of [B]queries[A] per month.";}i:14;s:16:""clusters scale"";i:15;a:1:{i:0;s:107:"Sphinx [B]clusters scale[A] to billions of documents, terabytes of data, and billions of queries per month.";}i:16;s:23:""clusters do not scale"";i:17;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:18;s:7:""of d*"";i:19;a:1:{i:0;s:113:"Sphinx clusters scale to billions [B]of documents[A], terabytes [B]of data[A], and billions of queries per month.";}i:20;s:18:"terabyte* << quer*";i:21;a:1:{i:0;s:113:"Sphinx clusters scale to billions of documents, [B]terabytes[A] of data, and billions of [B]queries[A] per month.";}i:22;s:17:"data << terabyte*";i:23;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:24;s:16:""sphinx scale"~3";i:25;a:1:{i:0;s:113:"[B]Sphinx[A] clusters [B]scale[A] to billions of documents, terabytes of data, and billions of queries per month.";}i:26;s:19:""sphinx billions"~3";i:27;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:28;s:19:""silly documents"/1";i:29;a:1:{i:0;s:107:"Sphinx clusters scale to billions of [B]documents[A], terabytes of data, and billions of queries per month.";}i:30;s:28:""clusters scale to billions"";i:31;a:1:{i:0;s:107:"Sphinx [B]clusters scale to billions[A] of documents, terabytes of data, and billions of queries per month.";}i:32;s:41:""queries per month" | month | "per month"";i:33;a:1:{i:0;s:107:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of [B]queries per month[A].";}i:34;s:19:""of d*" | "of data"";i:35;a:1:{i:0;s:113:"Sphinx clusters scale to billions [B]of documents[A], terabytes [B]of data[A], and billions of queries per month.";}i:36;s:25:""of data" -"of hedgedogs"";i:37;a:1:{i:0;s:107:"Sphinx clusters scale to billions of documents, terabytes [B]of data[A], and billions of queries per month.";}i:38;s:21:""documents terabytes"";i:39;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:40;s:13:"@title sphinx";i:41;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:42;s:12:"@text sphinx";i:43;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:44;s:15:"@text[3] sphinx";i:45;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:46;s:18:"@text[3] documents";i:47;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:48;s:18:"@text[7] documents";i:49;a:1:{i:0;s:107:"Sphinx clusters scale to billions of [B]documents[A], terabytes of data, and billions of queries per month.";}i:50;s:6:"SPHINX";i:51;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:52;s:4:"SPH*";i:53;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:54;s:5:"*PHI*";i:55;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:56;s:4:"*INX";i:57;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:58;s:39:" "*mmitt* u" | ommitt* | "committed u" ";i:59;a:1:{i:0;s:45:"support is just committed to Sphinx code base";}i:60;s:26:" *ommitt* | "committed u" ";i:61;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:62;s:22:" *ommitt* committed u ";i:63;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:64;s:28:" committed* | "committed p" ";i:65;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:66;s:24:" committed* committed p ";i:67;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:68;s:26:" (support ("committed*")) ";i:69;a:1:{i:0;s:59:"<b>support</b> is just <b>committed</b> to Sphinx code base";}i:70;s:35:" (support ("code*" | "code test")) ";i:71;a:1:{i:0;s:47:"<b>support</b> is ... Sphinx <b>code</b> ... ";}i:72;s:16:"din abcingelonhe";i:73;a:1:{i:0;s:66:" ... Onenes <b>din</b> Pas ... ccc <b>abcingelonhe</b> cc ... ";}i:74;a:1:{i:0;s:13:"тест ... ";}i:75;a:1:{i:0;s:14:"тест ... ";}i:76;a:1:{i:0;s:14:"тест ... ";}i:77;a:1:{i:0;s:18:"тест на ... ";}i:78;a:1:{i:0;s:19:"тест на ... ";}i:79;a:1:{i:0;s:65:" ... begin begin <b>some</b> <b>starred</b> <b>text</b> and ... ";}i:80;a:1:{i:0;s:56:" ... mid mid mid <b>the</b> <b>right</b> <b>text</b> end";}i:81;a:1:{i:0;s:347:"Our company's core technology platform is based on <b>Microsoft</b> applications, including the Windows NT operating system and a SQL server relational database, all residing on scaleable hardware. The software is constructed using an advanced proprietary <b>XML</b> framework and resides on an N-tier architecture. The support of open systems ...";}i:82;a:1:{i:0;s:262:" ... database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The software</b> is constructed using <b>an</b> advanced proprietary ... Power Builder <b>and</b> are dependent <b>on an</b> Oracle relational database. <b>running</b> fast <b>and runs</b> out";}i:83;a:1:{i:0;s:769:"Our company's core technology platform is based <b>on</b> Microsoft <b>applications</b>, including <b>the</b> Windows NT operating system <b>and</b> a SQL server relational database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The software</b> is constructed using <b>an</b> advanced proprietary XML framework <b>and</b> resides <b>on an</b> N-tier architecture. <b>The</b> support of open systems <b>allows</b> integration with a large variety of existing commercial, proprietary <b>and</b> legacy <b>applications</b>.  Other <b>applications</b>, which are <b>also</b> operational in a Microsoft NT environment, have been developed using Power Builder <b>and</b> are dependent <b>on an</b> Oracle relational database. <b>running</b> fast <b>and runs</b> out";}i:84;a:1:{i:0;s:283:" ... database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The</b> <b>software</b> is constructed using <b>an</b> advanced proprietary ... Power Builder <b>and</b> are dependent <b>on</b> <b>an</b> Oracle relational database. <b>running</b> fast <b>and</b> <b>runs</b> out";}i:85;a:1:{i:0;s:797:"Our company's core technology platform is based <b>on</b> Microsoft <b>applications</b>, including <b>the</b> Windows NT operating system <b>and</b> a SQL server relational database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The</b> <b>software</b> is constructed using <b>an</b> advanced proprietary XML framework <b>and</b> resides <b>on</b> <b>an</b> N-tier architecture. <b>The</b> support of open systems <b>allows</b> integration with a large variety of existing commercial, proprietary <b>and</b> legacy <b>applications</b>.  Other <b>applications</b>, which are <b>also</b> operational in a Microsoft NT environment, have been developed using Power Builder <b>and</b> are dependent <b>on</b> <b>an</b> Oracle relational database. <b>running</b> fast <b>and</b> <b>runs</b> out";}}}}
a:1:{i:0;a:1:{i:0;a:98:{i:0;s:14:"^sphinx month$";i:1;a:1:{i:0;s:113:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per [B]month[A].";}i:2;s:16:"^sphinx queries$";i:3;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:4;s:16:"^clusters month$";i:5;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:6;s:12:"^*inx *bytes";i:7;a:1:{i:0;s:113:"[B]Sphinx[A] clusters scale to billions of documents, [B]terabytes[A] of data, and billions of queries per month.";}i:8;s:3:"*i*";i:9;a:1:{i:0;s:125:"[B]Sphinx[A] clusters scale to [B]billions[A] of documents, terabytes of data, and [B]billions[A] of [B]queries[A] per month.";}i:10;s:4:"*on*";i:11;a:1:{i:0;s:119:"Sphinx clusters scale to [B]billions[A] of documents, terabytes of data, and [B]billions[A] of queries per [B]month[A].";}i:12;s:2:"*s";i:13;a:1:{i:0;s:137:"Sphinx [B]clusters[A] scale to [B]billions[A] of [B]documents[A], [B]terabytes[A] of data, and [B]billions[A] of [B]queries[A] per month.";}i:14;s:16:""clusters scale"";i:15;a:1:{i:0;s:107:"Sphinx [B]clusters scale[A] to billions of documents, terabytes of data, and billions of queries per month.";}i:16;s:23:""clusters do not scale"";i:17;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:18;s:7:""of d*"";i:19;a:1:{i:0;s:113:"Sphinx clusters scale to billions [B]of documents[A], terabytes [B]of data[A], and billions of queries per month.";}i:20;s:18:"terabyte* << quer*";i:21;a:1:{i:0;s:113:"Sphinx clusters scale to billions of documents, [B]terabytes[A] of data, and billions of [B]queries[A] per month.";}i:22;s:17:"data << terabyte*";i:23;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:24;s:16:""sphinx scale"~3";i:25;a:1:{i:0;s:113:"[B]Sphinx[A] clusters [B]scale[A] to billions of documents, terabytes of data, and billions of queries per month.";}i:26;s:19:""sphinx billions"~3";i:27;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:28;s:19:""silly documents"/1";i:29;a:1:{i:0;s:107:"Sphinx clusters scale to billions of [B]documents[A], terabytes of data, and billions of queries per month.";}i:30;s:28:""clusters scale to billions"";i:31;a:1:{i:0;s:107:"Sphinx [B]clusters scale to billions[A] of documents, terabytes of data, and billions of queries per month.";}i:32;s:41:""queries per month" | month | "per month"";i:33;a:1:{i:0;s:107:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of [B]queries per month[A].";}i:34;s:19:""of d*" | "of data"";i:35;a:1:{i:0;s:113:"Sphinx clusters scale to billions [B]of documents[A], terabytes [B]of data[A], and billions of queries per month.";}i:36;s:25:""of data" -"of hedgedogs"";i:37;a:1:{i:0;s:107:"Sphinx clusters scale to billions of documents, terabytes [B]of data[A], and billions of queries per month.";}i:38;s:21:""documents terabytes"";i:39;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:40;s:13:"@title sphinx";i:41;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:42;s:12:"@text sphinx";i:43;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:44;s:15:"@text[3] sphinx";i:45;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:46;s:18:"@text[3] documents";i:47;a:1:{i:0;s:101:"Sphinx clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:48;s:18:"@text[7] documents";i:49;a:1:{i:0;s:107:"Sphinx clusters scale to billions of [B]documents[A], terabytes of data, and billions of queries per month.";}i:50;s:6:"SPHINX";i:51;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:52;s:4:"SPH*";i:53;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:54;s:5:"*PHI*";i:55;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:56;s:4:"*INX";i:57;a:1:{i:0;s:107:"[B]Sphinx[A] clusters scale to billions of documents, terabytes of data, and billions of queries per month.";}i:58;s:39:" "*mmitt* u" | ommitt* | "committed u" ";i:59;a:1:{i:0;s:45:"support is just committed to Sphinx code base";}i:60;s:26:" *ommitt* | "committed u" ";i:61;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:62;s:22:" *ommitt* committed u ";i:63;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:64;s:28:" committed* | "committed p" ";i:65;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:66;s:24:" committed* committed p ";i:67;a:1:{i:0;s:52:"support is just <b>committed</b> to Sphinx code base";}i:68;s:26:" (support ("committed*")) ";i:69;a:1:{i:0;s:59:"<b>support</b> is just <b>committed</b> to Sphinx code base";}i:70;s:35:" (support ("code*" | "code test")) ";i:71;a:1:{i:0;s:47:"<b>support</b> is ... Sphinx <b>code</b> ... ";}i:72;s:16:"din abcingelonhe";i:73;a:1:{i:0;s:66:" ... Onenes <b>din</b> Pas ... ccc <b>abcingelonhe</b> cc ... ";}i:74;a:1:{i:0;s:13:"тест ... ";}i:75;a:1:{i:0;s:14:"тест ... ";}i:76;a:1:{i:0;s:14:"тест ... ";}i:77;a:1:{i:0;s:18:"тест на ... ";}i:78;a:1:{i:0;s:19:"тест на ... ";}i:79;a:1:{i:0;s:65:" ... begin begin <b>some</b> <b>starred</b> <b>text</b> and ... ";}i:80;a:1:{i:0;s:56:" ... mid mid mid <b>the</b> <b>right</b> <b>text</b> end";}i:81;a:1:{i:0;s:347:"Our company's core technology platform is based on <b>Microsoft</b> applications, including the Windows NT operating system and a SQL server relational database, all residing on scaleable hardware. The software is constructed using an advanced proprietary <b>XML</b> framework and resides on an N-tier architecture. The support of open systems ...";}i:82;a:1:{i:0;s:262:" ... database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The software</b> is constructed using <b>an</b> advanced proprietary ... Power Builder <b>and</b> are dependent <b>on an</b> Oracle relational database. <b>running</b> fast <b>and runs</b> out";}i:83;a:1:{i:0;s:769:"Our company's core technology platform is based <b>on</b> Microsoft <b>applications</b>, including <b>the</b> Windows NT operating system <b>and</b> a SQL server relational database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The software</b> is constructed using <b>an</b> advanced proprietary XML framework <b>and</b> resides <b>on an</b> N-tier architecture. <b>The</b> support of open systems <b>allows</b> integration with a large variety of existing commercial, proprietary <b>and</b> legacy <b>applications</b>.  Other <b>applications</b>, which are <b>also</b> operational in a Microsoft NT environment, have been developed using Power Builder <b>and</b> are dependent <b>on an</b> Oracle relational database. <b>running</b> fast <b>and runs</b> out";}i:84;a:1:{i:0;s:283:" ... database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The</b> <b>software</b> is constructed using <b>an</b> advanced proprietary ... Power Builder <b>and</b> are dependent <b>on</b> <b>an</b> Oracle relational database. <b>running</b> fast <b>and</b> <b>runs</b> out";}i:85;a:1:{i:0;s:797:"Our company's core technology platform is based <b>on</b> Microsoft <b>applications</b>, including <b>the</b> Windows NT operating system <b>and</b> a SQL server relational database, <b>all</b> residing <b>on</b> scaleable hardware. <b>The</b> <b>software</b> is constructed using <b>an</b> advanced proprietary XML framework <b>and</b> resides <b>on</b> <b>an</b> N-tier architecture. <b>The</b> support of open systems <b>allows</b> integration with a large variety of existing commercial, proprietary <b>and</b> legacy <b>applications</b>.  Other <b>applications</b>, which are <b>also</b> operational in a Microsoft NT environment, have been developed using Power Builder <b>and</b> are dependent <b>on</b> <b>an</b> Oracle relational database. <b>running</b> fast <b>and</b> <b>runs</b> out";}i:86;a:2:{s:10:"query_mode";b:0;s:11:"allow_empty";b:0;}i:87;a:1:{i:0;s:0:"";}i:88;a:1:{i:0;s:7:"test me";}i:89;a:2:{s:10:"query_mode";b:1;s:11:"allow_empty";b:0;}i:90;a:1:{i:0;s:0:"";}i:91;a:1:{i:0;s:7:"test me";}i:92;a:2:{s:10:"query_mode";b:0;s:11:"allow_empty";b:1;}i:93;a:1:{i:0;s:0:"";}i:94;a:1:{i:0;s:0:"";}i:95;a:2:{s:10:"query_mode";b:1;s:11:"allow_empty";b:1;}i:96;a:1:{i:0;s:0:"";}i:97;a:1:{i:0;s:0:"";}}}}
14 changes: 14 additions & 0 deletions test/test_062/test.xml
Expand Up @@ -193,6 +193,20 @@ $results[] = $client->BuildExcerpts ( array ( $doc ), 'hi2', $query, array('quer
$results[] = $client->BuildExcerpts ( array ( $doc ), 'hi2', $query, array('query_mode'=>false, 'limit'=>200) );
$results[] = $client->BuildExcerpts ( array ( $doc ), 'hi2', $query, array('query_mode'=>false, 'limit'=>0) );
// crash daemon on either no documents or empty query
$opts = array (
array('query_mode'=>false, 'allow_empty'=>false),
array('query_mode'=>true, 'allow_empty'=>false),
array('query_mode'=>false, 'allow_empty'=>true),
array('query_mode'=>true, 'allow_empty'=>true) );
foreach ( $opts as $o )
{
$results[] = $o;
$results[] = $client->BuildExcerpts ( array (''), 'test', 'test me', $o );
$results[] = $client->BuildExcerpts ( array ('test me'), 'test', '', $o );
}
]]></custom_test>

</test>

0 comments on commit e14a9bc

Please sign in to comment.