Skip to content

Commit

Permalink
Changed behavior of retry_count option for mirrors
Browse files Browse the repository at this point in the history
Now if query attempt to remote fails, it will reselect new mirror
according to current ha-strategy, and retry with this new selected
host. Earlier selecting of a mirror was only before the query, and
in case of fail sphinx could just try to connect again to the same
host several times.

Test added, documented.
  • Loading branch information
klirichek committed Jul 22, 2016
1 parent 2afbae7 commit 66ea9fc
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 8 deletions.
14 changes: 13 additions & 1 deletion doc/sphinx.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11919,7 +11919,7 @@ Starting with 2.2.9-release, the value can additionally enumerate per agent
options such as:
<itemizedlist>
<listitem><para><link linkend="conf-ha-strategy">ha_strategy</link> - random,
roundrobin, nodeads, noerrors (replces index <link linkend="conf-ha-strategy">ha_strategy</link>
roundrobin, nodeads, noerrors (replaces index <link linkend="conf-ha-strategy">ha_strategy</link>
for particular agent)</para></listitem>
<listitem><para><link linkend="conf-agent-persistent">conn</link> - pconn,
persistent (same as <link linkend="conf-agent-persistent">agent_persistent</link>
Expand Down Expand Up @@ -14446,6 +14446,18 @@ Integer, specifies how many times sphinx will try to connect and query remote ag
fatal query error. Default is 0 (i.e. no retries). This value may be also specified on per-query basis using
'OPTION retry_count=XXX' clause. If per-query option exists, it will override the one specified in config.
</para>
<para>
Note, that if you use <link linkend="conf-agent">agent mirrors</link> in definition of your distributed
index, then before every attempt of connect sphinx will select different
mirror, according to specified <link
linkend="conf-ha-strategy">ha_strategy</link>specified.
</para>
<para>
For example, if you have 10 mirrors, and surely know, that at least one of them
alive, then you can definitely take the answer to a correct query,
specifying options <code>ha_strategy = roundrobin</code> and
<code>agent_retry_count = 9</code> in your config.
</para>
</sect2>

<sect2 id="conf-agent-retry-delay"><title>agent_retry_delay</title>
Expand Down
25 changes: 18 additions & 7 deletions src/searchdha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ void AgentConn_t::SpecifyAndSelectMirror ( MetaAgentDesc_t * pMirrorChooser )
void AgentConn_t::Fail ( AgentStats_e eStat, const char* sMessage, ... )
{
Close ();
m_eState = AGENT_RETRY; // since it became AGENT_UNUSED after Close()
va_list ap;
va_start ( ap, sMessage );
m_sFailure.SetSprintfVa ( sMessage, ap );
Expand Down Expand Up @@ -2278,6 +2279,7 @@ void SetNextRetry ( AgentWorkContext_t * pCtx )
pCtx->m_iRetries++;
pCtx->m_tmWait = tmNextTry;
pCtx->m_pAgents->m_eState = AGENT_RETRY;
pCtx->m_pAgents->SpecifyAndSelectMirror ();
}

void ThdWorkParallel ( AgentWorkContext_t * pCtx )
Expand Down Expand Up @@ -2317,16 +2319,25 @@ void ThdWorkSequental ( AgentWorkContext_t * pCtx )

pCtx->m_iAgentsDone += RemoteQueryAgents ( pCtx );

int iToRetry = 0;
for ( int i=0; i<pCtx->m_iAgentCount; i++ )
iToRetry += ( pCtx->m_pAgents[i].m_eState==AGENT_RETRY );

if ( iToRetry )
pCtx->m_iRetries++;
bool bNeedRetry = false;
if ( pCtx->m_iRetriesMax )
{
for ( int i = 0; i<pCtx->m_iAgentCount; i++ )
if ( pCtx->m_pAgents[i].m_eState==AGENT_RETRY )
{
bNeedRetry = true;
break;
}
}

pCtx->m_pfn = NULL;
if ( iToRetry && pCtx->m_iRetriesMax && pCtx->m_iRetries<=pCtx->m_iRetriesMax )
if ( bNeedRetry && ++pCtx->m_iRetries<=pCtx->m_iRetriesMax )
{
pCtx->m_pfn = ThdWorkSequental;
for ( int i = 0; i<pCtx->m_iAgentCount; i++ )
if ( pCtx->m_pAgents[i].m_eState==AGENT_RETRY )
pCtx->m_pAgents[i].SpecifyAndSelectMirror ();
}
}

class CSphRemoteAgentsController : public ISphRemoteAgentsController
Expand Down
1 change: 1 addition & 0 deletions test/test_259/model.bin
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a:1:{i:0;a:21:{i:0;a:3:{s:8:"sphinxql";s:18:"select * from dist";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:1;a:3:{s:8:"sphinxql";s:18:"select * from dist";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:2;a:3:{s:8:"sphinxql";s:19:"select * from dist2";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:3;a:3:{s:8:"sphinxql";s:19:"select * from dist2";s:5:"error";s:102:"index dist2: agent <AGENT1_ADDRESS/>: remote query error: unknown local index 'loc2' in search request";s:5:"errno";i:1064;}i:4;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:5:"error";s:90:"index dist3: agent 127.0.0.1:65432: connect() failed: errno=111, Operation now in progress";s:5:"errno";i:1064;}i:5;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:6;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:7;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:5:"error";s:90:"index dist3: agent 127.0.0.1:65432: connect() failed: errno=111, Operation now in progress";s:5:"errno";i:1064;}i:8;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:5:"error";s:90:"index dist3: agent 127.0.0.1:65432: connect() failed: errno=111, Operation now in progress";s:5:"errno";i:1064;}i:9;a:3:{s:8:"sphinxql";s:19:"select * from dist3";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:10;a:3:{s:8:"sphinxql";s:40:"select * from dist2 OPTION retry_count=2";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:11;a:3:{s:8:"sphinxql";s:40:"select * from dist2 OPTION retry_count=2";s:5:"error";s:102:"index dist2: agent <AGENT1_ADDRESS/>: remote query error: unknown local index 'loc2' in search request";s:5:"errno";i:1064;}i:12;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:13;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:14;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:15;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:16;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:17;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=3";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}i:18;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=1";s:10:"total_rows";i:5;s:4:"rows";a:5:{i:0;a:3:{s:2:"id";s:1:"5";s:3:"idd";s:1:"3";s:6:"aagent";s:1:"2";}i:1;a:3:{s:2:"id";s:1:"6";s:3:"idd";s:2:"12";s:6:"aagent";s:1:"2";}i:2;a:3:{s:2:"id";s:1:"7";s:3:"idd";s:1:"4";s:6:"aagent";s:1:"2";}i:3;a:3:{s:2:"id";s:1:"8";s:3:"idd";s:1:"5";s:6:"aagent";s:1:"2";}i:4;a:3:{s:2:"id";s:1:"9";s:3:"idd";s:2:"13";s:6:"aagent";s:1:"2";}}}i:19;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=1";s:5:"error";s:90:"index dist3: agent 127.0.0.1:65432: connect() failed: errno=111, Operation now in progress";s:5:"errno";i:1064;}i:20;a:3:{s:8:"sphinxql";s:40:"select * from dist3 OPTION retry_count=1";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";s:6:"aagent";s:1:"1";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"idd";s:2:"10";s:6:"aagent";s:1:"1";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"idd";s:1:"2";s:6:"aagent";s:1:"1";}i:3;a:3:{s:2:"id";s:1:"4";s:3:"idd";s:2:"11";s:6:"aagent";s:1:"1";}}}}}
128 changes: 128 additions & 0 deletions test/test_259/test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
<?xml version="1.0" encoding="utf-8"?>
<test>

<name>ha syntax and round-robin strategies</name>

<num_agents>3</num_agents>

<config>
searchd
{
<searchd_settings/>
workers = threads
agent_retry_delay = 100
}

<agent0>
index dist
{
type = distributed
agent = <agent1_address/>:loc1|<agent2_address/>:loc2
ha_strategy = roundrobin
}
index dist2
{
type = distributed
agent = <agent1_address/>|<agent2_address/>:loc2

This comment has been minimized.

Copy link
@razvanphp

razvanphp Sep 16, 2016

here agent1 does't have loc2 index, so it always fails:

ERROR 1064: index dist2: agent <AGENT1_ADDRESS/>: remote query error: unknown local index 'loc2' in search request
ha_strategy = roundrobin
}
index dist3
{
type = distributed
agent = 127.0.0.1:65432:loc|127.0.0.1:65432:loc|<agent1_address/>:loc1|<agent2_address/>:loc2
ha_strategy = roundrobin
}

</agent0>

<agent1>
source src_a1
{
type = mysql
<sql_settings/>

sql_query = SELECT id, idd, 1 as aagent, body FROM test_table where id in ( 1,2,3,4 )
sql_attr_uint = aagent
sql_attr_uint = idd
}

index loc1
{
source = src_a1
docinfo = extern
path = <data_path/>/a1
}
</agent1>

<agent2>
source src_a2
{
type = mysql
<sql_settings/>

sql_query = SELECT id, idd, 2 as aagent, body FROM test_table where id in ( 5,6,7, 8, 9 )
sql_attr_uint = aagent
sql_attr_uint = idd
}

index loc2
{
source = src_a2
docinfo = extern
path = <data_path/>/a2
}
</agent2>

</config>

<db_create>
CREATE TABLE `test_table`
(
`id` int(11) NOT NULL default '0',
`idd` int(11) NOT NULL default '0',
`body` varchar(1024) NOT NULL default ''
)
</db_create>

<db_drop>
DROP TABLE IF EXISTS `test_table`
</db_drop>

<db_insert>
INSERT INTO `test_table` VALUES
( 1, 1, 'the dog' ),
( 2, 10, 'the cat' ),
( 3, 2, 'the bird' ),
( 4, 11, 'cat eats bird' ),
( 5, 3, 'dog eats cat' ),
( 6, 12, 'bird' ),
( 7, 4, 'the cat' ),
( 8, 5, 'eats' ),
( 9, 13, 'the' )
</db_insert>

<sphqueries>
<sphinxql>select * from dist</sphinxql>
<sphinxql>select * from dist</sphinxql>
<sphinxql>select * from dist2</sphinxql>
<sphinxql>select * from dist2</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist3</sphinxql>
<sphinxql>select * from dist2 OPTION retry_count=2</sphinxql>
<sphinxql>select * from dist2 OPTION retry_count=2</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=3</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=1</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=1</sphinxql>
<sphinxql>select * from dist3 OPTION retry_count=1</sphinxql>
</sphqueries>

</test>

0 comments on commit 66ea9fc

Please sign in to comment.