New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implements Mirror Queue Sync in Batches #344
Changes from 22 commits
f30aaa3
676e413
5ec328d
ef2d3f3
701ee99
d480e1d
0e89449
e8fe201
ccae00a
5c2d50d
8076b1b
90c0244
7cdd8c7
474520d
332c389
b813c42
b6f44d6
7f27f43
e0bb5df
3776634
215dac3
c863a81
199d5a9
fbc7ff5
ed516d4
0646e9d
1a0a00c
b43b3b9
1929533
1a08f21
e17a5e2
dc72935
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
|
||
-export([init/3, terminate/2, delete_and_terminate/2, | ||
purge/1, purge_acks/1, publish/6, publish_delivered/5, | ||
batch_publish/4, batch_publish_delivered/4, | ||
discard/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, | ||
len/1, is_empty/1, depth/1, drain_confirmed/1, | ||
dropwhile/2, fetchwhile/4, set_ram_duration_target/2, ram_duration/1, | ||
|
@@ -147,13 +148,15 @@ sync_mirrors(HandleInfo, EmitStats, | |
QName, "Synchronising: " ++ Fmt ++ "~n", Params) | ||
end, | ||
Log("~p messages to synchronise", [BQ:len(BQS)]), | ||
{ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(QName), | ||
{ok, #amqqueue{slave_pids = SPids} = Q} = rabbit_amqqueue:lookup(QName), | ||
SyncBatchSize = rabbit_mirror_queue_misc:sync_batch_size(Q), | ||
Log("batch size: ~p", [SyncBatchSize]), | ||
Ref = make_ref(), | ||
Syncer = rabbit_mirror_queue_sync:master_prepare(Ref, QName, Log, SPids), | ||
gm:broadcast(GM, {sync_start, Ref, Syncer, SPids}), | ||
S = fun(BQSN) -> State#state{backing_queue_state = BQSN} end, | ||
case rabbit_mirror_queue_sync:master_go( | ||
Syncer, Ref, Log, HandleInfo, EmitStats, BQ, BQS) of | ||
Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, BQ, BQS) of | ||
{shutdown, R, BQS1} -> {stop, R, S(BQS1)}; | ||
{sync_died, R, BQS1} -> Log("~p", [R]), | ||
{ok, S(BQS1)}; | ||
|
@@ -241,6 +244,27 @@ publish(Msg = #basic_message { id = MsgId }, MsgProps, IsDelivered, ChPid, Flow, | |
BQS1 = BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS), | ||
ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). | ||
|
||
batch_publish(Publishes, ChPid, Flow, | ||
State = #state { gm = GM, | ||
seen_status = SS, | ||
backing_queue = BQ, | ||
backing_queue_state = BQS }) -> | ||
{Publishes1, false, MsgSizes} = | ||
lists:foldl(fun ({Msg = #basic_message { id = MsgId }, | ||
MsgProps, _IsDelivered}, {Pubs, false, Sizes}) -> | ||
{[{Msg, MsgProps, true} | Pubs], %% [0] | ||
false = dict:is_key(MsgId, SS), %% ASSERTION | ||
Sizes + rabbit_basic:msg_size(Msg)} | ||
end, {[], false, 0}, Publishes), | ||
Publishes2 = lists:reverse(Publishes1), | ||
ok = gm:broadcast(GM, {batch_publish, ChPid, Flow, Publishes2}, | ||
MsgSizes), | ||
BQS1 = BQ:batch_publish(Publishes2, ChPid, Flow, BQS), | ||
ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). | ||
%% [0] When the slave process the publish instruction, it sets the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this meant to say "when the slave process handles the publish command"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, |
||
%% IsDelivered flag to true, so to avoid iterating over the messages | ||
%% again at the slave, we do it here. | ||
|
||
publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps, | ||
ChPid, Flow, State = #state { gm = GM, | ||
seen_status = SS, | ||
|
@@ -253,6 +277,23 @@ publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps, | |
State1 = State #state { backing_queue_state = BQS1 }, | ||
{AckTag, ensure_monitoring(ChPid, State1)}. | ||
|
||
batch_publish_delivered(Publishes, ChPid, Flow, | ||
State = #state { gm = GM, | ||
seen_status = SS, | ||
backing_queue = BQ, | ||
backing_queue_state = BQS }) -> | ||
{false, MsgSizes} = | ||
lists:foldl(fun ({Msg = #basic_message { id = MsgId }, _MsgProps}, | ||
{false, Sizes}) -> | ||
{false = dict:is_key(MsgId, SS), %% ASSERTION | ||
Sizes + rabbit_basic:msg_size(Msg)} | ||
end, {false, 0}, Publishes), | ||
ok = gm:broadcast(GM, {batch_publish_delivered, ChPid, Flow, Publishes}, | ||
MsgSizes), | ||
{AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS), | ||
State1 = State #state { backing_queue_state = BQS1 }, | ||
{AckTags, ensure_monitoring(ChPid, State1)}. | ||
|
||
discard(MsgId, ChPid, Flow, State = #state { gm = GM, | ||
backing_queue = BQ, | ||
backing_queue_state = BQS, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
initial_queue_node/2, suggested_queue_nodes/1, | ||
is_mirrored/1, update_mirrors/2, validate_policy/1, | ||
maybe_auto_sync/1, maybe_drop_master_after_sync/1, | ||
log_info/3, log_warning/3]). | ||
sync_batch_size/1, log_info/3, log_warning/3]). | ||
|
||
%% for testing only | ||
-export([module/1]). | ||
|
@@ -38,11 +38,16 @@ | |
[policy_validator, <<"ha-params">>, ?MODULE]}}, | ||
{mfa, {rabbit_registry, register, | ||
[policy_validator, <<"ha-sync-mode">>, ?MODULE]}}, | ||
{mfa, {rabbit_registry, register, | ||
[policy_validator, <<"ha-sync-batch-size">>, ?MODULE]}}, | ||
{mfa, {rabbit_registry, register, | ||
[policy_validator, <<"ha-promote-on-shutdown">>, ?MODULE]}}, | ||
{requires, rabbit_registry}, | ||
{enables, recovery}]}). | ||
|
||
%% For compatibility with versions that don't support sync batching. | ||
-define(DEFAULT_BATCH_SIZE, 1). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we talking about pre-3.6.0 versions here? Mixed 3.6.0/3.5.x clusters are not allowed, so we can use a different default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. I think this constant came to life on my first POCs, but is not required anymore. Probably not used in the code.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is used via Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing the default to 16K leads to sync test failures. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at the code I remembered the original purpose. The idea is for it to be 1, so you either use non-batch sync, or batched sync in case the policy has been set. The logic that assumes policy batch size either There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, that makes more sense. I had changes that moved the default to the app file, bumped the default and simplified
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see no reason to not batch all the time, only making batch size configurable (with 16K or so by default). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is finding the right batch size. 16k for big messages is too much. It can even cause a network partition (reason why we have max msg size 2Gb in the first place). Finding the right value depends on workload (this is explained on the related rabbitmq-website PR), but if we provide a default, I think it has to be lower. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Messages that are hundreds of MB in size are probably very rare. Most messages on common workloads are < 4K in size. We can go with 4096 as default value and those with large messages can adjust it. 4K * 4 KiB per message = 16 MiB of payload, not particularly excessive. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @carlhoerberg can you please help us pick the default batch size for eager (full) mirror sync? Maybe you have some stats on median/95th percentile message size distribution at cloudamqp, or any other data that can help us here? |
||
|
||
%%---------------------------------------------------------------------------- | ||
|
||
-ifdef(use_specs). | ||
|
@@ -332,6 +337,14 @@ module(Mode) when is_binary(Mode) -> | |
end | ||
end. | ||
|
||
validate_mode(Mode) -> | ||
case module(Mode) of | ||
{ok, _Module} -> | ||
ok; | ||
not_mirrored -> | ||
{error, "~p is not a valid ha-mode value", [Mode]} | ||
end. | ||
|
||
is_mirrored(Q) -> | ||
case module(Q) of | ||
{ok, _} -> true; | ||
|
@@ -355,6 +368,16 @@ maybe_auto_sync(Q = #amqqueue{pid = QPid}) -> | |
ok | ||
end. | ||
|
||
sync_batch_size(#amqqueue{} = Q) -> | ||
case policy(<<"ha-sync-batch-size">>, Q) of | ||
none -> %% we need this case because none > 1 == true | ||
?DEFAULT_BATCH_SIZE; | ||
BatchSize when BatchSize > 1 -> | ||
BatchSize; | ||
_ -> | ||
?DEFAULT_BATCH_SIZE | ||
end. | ||
|
||
update_mirrors(OldQ = #amqqueue{pid = QPid}, | ||
NewQ = #amqqueue{pid = QPid}) -> | ||
case {is_mirrored(OldQ), is_mirrored(NewQ)} of | ||
|
@@ -410,25 +433,37 @@ validate_policy(KeyList) -> | |
Mode = proplists:get_value(<<"ha-mode">>, KeyList, none), | ||
Params = proplists:get_value(<<"ha-params">>, KeyList, none), | ||
SyncMode = proplists:get_value(<<"ha-sync-mode">>, KeyList, none), | ||
SyncBatchSize = proplists:get_value( | ||
<<"ha-sync-batch-size">>, KeyList, none), | ||
PromoteOnShutdown = proplists:get_value( | ||
<<"ha-promote-on-shutdown">>, KeyList, none), | ||
case {Mode, Params, SyncMode, PromoteOnShutdown} of | ||
{none, none, none, none} -> | ||
case {Mode, Params, SyncMode, SyncBatchSize, PromoteOnShutdown} of | ||
{none, none, none, none, none} -> | ||
ok; | ||
{none, _, _, _} -> | ||
{none, _, _, _, _} -> | ||
{error, "ha-mode must be specified to specify ha-params, " | ||
"ha-sync-mode or ha-promote-on-shutdown", []}; | ||
_ -> | ||
case module(Mode) of | ||
{ok, M} -> case M:validate_policy(Params) of | ||
ok -> case validate_sync_mode(SyncMode) of | ||
ok -> validate_pos(PromoteOnShutdown); | ||
E -> E | ||
end; | ||
E -> E | ||
end; | ||
_ -> {error, "~p is not a valid ha-mode value", [Mode]} | ||
end | ||
validate_policies( | ||
[{Mode, fun validate_mode/1}, | ||
{Params, ha_params_validator(Mode)}, | ||
{SyncMode, fun validate_sync_mode/1}, | ||
{SyncBatchSize, fun validate_sync_batch_size/1}, | ||
{PromoteOnShutdown, fun validate_pos/1}]) | ||
end. | ||
|
||
ha_params_validator(Mode) -> | ||
fun(Val) -> | ||
{ok, M} = module(Mode), | ||
M:validate_policy(Val) | ||
end. | ||
|
||
validate_policies([]) -> | ||
ok; | ||
validate_policies([{Val, Validator} | Rest]) -> | ||
case Validator(Val) of | ||
ok -> validate_policies(Rest); | ||
E -> E | ||
end. | ||
|
||
validate_sync_mode(SyncMode) -> | ||
|
@@ -440,6 +475,14 @@ validate_sync_mode(SyncMode) -> | |
"or \"automatic\", got ~p", [Mode]} | ||
end. | ||
|
||
validate_sync_batch_size(none) -> | ||
ok; | ||
validate_sync_batch_size(N) when is_integer(N) andalso N > 0 -> | ||
ok; | ||
validate_sync_batch_size(N) -> | ||
{error, "ha-sync-batch-size takes an integer greather than 0, " | ||
"~p given", [N]}. | ||
|
||
validate_pos(PromoteOnShutdown) -> | ||
case PromoteOnShutdown of | ||
<<"always">> -> ok; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we reverse the list after
foldl
, how about usingfoldr
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mostly from the docs
http://erlang.org/doc/man/lists.html#foldl-3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aslo AFAIK list reverse is a BIF
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine to change this to foldr if you think it's required
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets keep
foldl
.