Permalink
Browse files

Initial transplantation/checkin of message passing & dropping simulator.

  • Loading branch information...
0 parents commit 16a0f67dab1dba9cc040ef9f5207e97c082aa86e @slfritchie committed Apr 1, 2011
BIN rebar
Binary file not shown.
@@ -0,0 +1,4 @@
+%% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*-
+%% ex: ts=4 sw=4 ft=erlang et
+
+{cover_enabled, true}.
@@ -0,0 +1,123 @@
+%%%-------------------------------------------------------------------
+%%% @author Scott Lystig Fritchie <fritchie@snookles.com>
+%%% @copyright (C) 2011, Scott Lystig Fritchie
+%%% @doc
+%%%
+%%% @end
+%%% Created : 26 Mar 2011 by Scott Lystig Fritchie <fritchie@snookles.com>
+%%%-------------------------------------------------------------------
+-module(distrib_counter_bad1_sim).
+
+-compile(export_all).
+
+-include_lib("eqc/include/eqc.hrl").
+
+%%% Generators
+
+%% required
+gen_initial_ops(NumClients, NumServers, _NumKeys, _Props) ->
+ list(gen_counter_op(NumClients, NumServers)).
+
+gen_counter_op(NumClients, NumServers) ->
+ ?LET(ClientI, choose(1, NumClients),
+ {lists:nth(ClientI, all_clients()),
+ {counter_op, lists:sublist(all_servers(), NumServers)}}).
+
+%% required
+gen_client_initial_states(NumClients, _Props) ->
+ Clients = lists:sublist(all_clients(), 1, NumClients),
+ [{Clnt, [], fun counter_client1/2} || Clnt <- Clients].
+
+%% required
+gen_server_initial_states(NumServers, _Props) ->
+ Servers = lists:sublist(all_servers(), 1, NumServers),
+ [{Server, gen_nat_nat2(5, 1), fun counter_server1/2} || Server <- Servers].
+
+gen_nat_nat2(A, B) ->
+ %% Use nat() A/(A+B) of the time, nat()*nat() B/(A+B) of the time
+ slf_msgsim_qc:gen_nat_nat2(A, B).
+
+%%% Verify our properties
+
+%% required
+verify_property(NumClients, NumServers, _Props, F1, F2, Ops,
+ _Sched0, Runnable, Sched1, Trc, UTrc) ->
+ NumMsgs = length([x || {bang,_,_,_,_} <- Trc]),
+ NumDrops = length([x || {drop,_,_,_,_} <- Trc]),
+ NumTimeouts = length([x || {recv,_,scheduler,_,timeout} <- Trc]),
+ NumCrashes = length([x || {process_crash,_,_,_,_,_} <- Trc]),
+ Emitted = [Count || {_Clnt,_Step,{counter,Count}} <- UTrc,
+ Count /= timeout],
+ Steps = slf_msgsim:get_step(Sched1),
+ ?WHENFAIL(
+ io:format("Failed:\nF1 = ~p\nF2 = ~p\nEnd2 = ~P\n"
+ "Runnable = ~p, Receivable = ~p\n"
+ "Emitted counters = ~w\n",
+ [F1, F2, Sched1, 250,
+ slf_msgsim:runnable_procs(Sched1),
+ slf_msgsim:receivable_procs(Sched1),
+ Emitted]),
+ classify(NumDrops /= 0, at_least_1_msg_dropped,
+ measure("clients ", NumClients,
+ measure("servers ", NumServers,
+ measure("sched steps ", Steps,
+ measure("crashes ", NumCrashes,
+ measure("# ops ", length(Ops),
+ measure("# emitted ", length(Emitted),
+ measure("msgs sent ", NumMsgs,
+ measure("msgs dropped", NumDrops,
+ measure("timeouts ", NumTimeouts,
+ begin
+ conjunction([{runnable, Runnable == false},
+ {ops_finish, length(Ops) == length(UTrc)},
+ {emits_unique, length(Emitted) ==
+ length(lists:usort(Emitted))},
+ {not_retro, Emitted == lists:sort(Emitted)}])
+ end))))))))))).
+
+%%% Protocol implementation
+
+%% Known to be flawed: ask each server for its counter, then
+%% choose the max of all responses. The servers are naive
+%% and are not keeping per-key counters but rather a single
+%% counter for the entire server.
+
+counter_client1({counter_op, Servers}, _St) ->
+ [slf_msgsim:bang(Server, {incr_counter, slf_msgsim:self()}) ||
+ Server <- Servers],
+ {recv_timeout, fun counter_client1_reply/2, {Servers, []}}.
+
+counter_client1_reply({incr_counter_reply, Server, Count},
+ {Waiting, Replies})->
+ Replies2 = [{Server, Count}|Replies],
+ case Waiting -- [Server] of
+ [] ->
+ Val = make_val(Replies2),
+ slf_msgsim:add_utrace({counter, Val}),
+ {recv_general, same, unused};
+ Waiting2 ->
+ {recv_timeout, same, {Waiting2, Replies2}}
+ end;
+counter_client1_reply(timeout, {Waiting, Replies}) ->
+ Val = if length(Waiting) > length(Replies) ->
+ timeout;
+ true ->
+ make_val(Replies)
+ end,
+ slf_msgsim:add_utrace({counter, Val}),
+ {recv_general, same, unused}.
+
+counter_server1({incr_counter, From}, Count) ->
+ slf_msgsim:bang(From, {incr_counter_reply, slf_msgsim:self(), Count}),
+ {recv_general, same, Count + 1}.
+
+make_val(Replies) ->
+ lists:max([Counter || {_Server, Counter} <- Replies]).
+
+%%% Misc....
+
+all_clients() ->
+ [c1, c2, c3, c4, c5, c6, c7, c8, c9].
+
+all_servers() ->
+ [s1, s2, s3, s4, s5, s6, s7, s8, s9].
@@ -0,0 +1,150 @@
+NOTE: Search for %% for hints on what to look for when something goes wrong.
+
+Summary: Found an error: duplicate counters seen by the same client.
+
+(rk@sbb)75> eqc:quickcheck(eqc:numtests(100,slf_msgsim_qc:prop_simulate(distrib_counter_bad1_sim, []))).
+.......................Failed! After 24 tests.
+%% 1 client, 2 servers, 1 key (ignored)
+{1,2,1}
+%% 2 counter ops, both by same client
+{[{c1,{counter_op,[s1,s2]}},{c1,{counter_op,[s1,s2]}}],
+ [{c1,[],#Fun<distrib_counter_bad1_sim.5.52918649>}],
+ [{s1,6,#Fun<distrib_counter_bad1_sim.4.5305677>},
+ {s2,5,#Fun<distrib_counter_bad1_sim.4.5305677>}],
+%% Schedule isn't weird
+ [c1,s2,c1,s1],
+%% A single partition between c1 -> s1
+ [{partition,[c1,c1,c1],[s1,s1,s1],4,12}]}
+Failed:
+F1 = {1,2,1}
+F2 = {[{c1,{counter_op,[s1,s2]}},{c1,{counter_op,[s1,s2]}}],
+ [{c1,[],#Fun<distrib_counter_bad1_sim.5.52918649>}],
+ [{s1,6,#Fun<distrib_counter_bad1_sim.4.5305677>},
+ {s2,5,#Fun<distrib_counter_bad1_sim.4.5305677>}],
+ [c1,s2,c1,s1],
+ [{partition,[c1,c1,c1],[s1,s1,s1],4,12}]}
+End2 = {sched,15,6,
+ [c1,s2,c1,s1],
+ [],
+ [{c1,{proc,c1,unused,[],
+ {[],[]},
+ {[],[]},
+ outbox,#Fun<distrib_counter_bad1_sim.5.52918649>,
+ undefined}},
+ {s1,{proc,s1,7,[],
+ {[],[]},
+ {[],[]},
+ delayed,#Fun<distrib_counter_bad1_sim.4.5305677>,
+ undefined}},
+ {s2,{proc,s2,7,[],
+ {[],[]},
+ {[],[]},
+ delayed,#Fun<distrib_counter_bad1_sim.4.5305677>,
+ undefined}}],
+ [{recv,14,scheduler,c1,timeout},
+ {recv,13,s2,c1,{incr_counter_reply,s2,6}},
+ {deliver,12,s2,c1,{incr_counter_reply,s2,6}},
+ {bang,11,s2,c1,{incr_counter_reply,s2,6}},
+ {recv,11,c1,s2,{incr_counter,c1}},
+ {deliver,10,c1,s2,{incr_counter,c1}},
+ {bang,9,c1,s2,{incr_counter,c1}},
+ {drop,9,c1,s1,{incr_counter,c1}},
+ {recv,9,scheduler,c1,{counter_op,[s1,s2]}},
+ {recv,8,s2,c1,{incr_counter_reply,s2,5}},
+ {deliver,7,s2,c1,{incr_counter_reply,s2,5}},
+ {recv,6,s1,c1,{incr_counter_reply,s1,6}},
+ {deliver,5,s1,c1,{incr_counter_reply,s1,6}},
+ {bang,4,s2,c1,{incr_counter_reply,s2,5}},
+ {recv,4,c1,s2,{incr_counter,c1}},
+ {deliver,3,c1,s2,{incr_counter,c1}},
+ {bang,2,s1,c1,{incr_counter_reply,s1,6}},
+ {recv,2,c1,s1,{incr_counter,c1}},
+ {deliver,1,c1,s1,{incr_counter,c1}},
+ {bang,0,c1,s2,{incr_counter,c1}},
+ {bang,0,c1,s1,{incr_counter,c1}},
+ {recv,0,scheduler,c1,{counter_op,[s1,s2]}}],
+ [{c1,14,{counter,6}},{c1,8,{counter,6}}],
+ [{{c1,s1},[4,5,6,7,8,9,10,11,12]}]}
+Runnable = [], Receivable = []
+%% Duplicate counters are bad
+Emitted counters = [6,6]
+runnable: passed
+ops_finish: passed
+%% ... and the emits_unique property caught the duplicates
+emits_unique: failed
+not_retro: passed
+Shrinking.........(9 times)
+{1,2,1}
+%% Same ops with same actors
+{[{c1,{counter_op,[s1,s2]}},{c1,{counter_op,[s1,s2]}}],
+ [{c1,[],#Fun<distrib_counter_bad1_sim.5.52918649>}],
+ [{s1,1,#Fun<distrib_counter_bad1_sim.4.5305677>},
+ {s2,0,#Fun<distrib_counter_bad1_sim.4.5305677>}],
+%% Scheduler simplified slightly
+ [s1,c1,s2],
+%% The length of the c1 -> s1 partition shortened slightly.
+ [{partition,[c1],[s1],1,9}]}
+Failed:
+F1 = {1,2,1}
+F2 = {[{c1,{counter_op,[s1,s2]}},{c1,{counter_op,[s1,s2]}}],
+ [{c1,[],#Fun<distrib_counter_bad1_sim.5.52918649>}],
+%% Server s1 starts with counter = 1
+%% Server s2 starts with counter = 0
+ [{s1,1,#Fun<distrib_counter_bad1_sim.4.5305677>},
+ {s2,0,#Fun<distrib_counter_bad1_sim.4.5305677>}],
+ [s1,c1,s2],
+%% The network partition will interfere with c1's second request, look
+%% for the timeout that it receives at the end of the trace.
+ [{partition,[c1],[s1],1,9}]}
+End2 = {sched,15,6,
+ [s1,c1,s2],
+ [],
+ [{c1,{proc,c1,unused,[],
+ {[],[]},
+ {[],[]},
+ outbox,#Fun<distrib_counter_bad1_sim.5.52918649>,
+ undefined}},
+ {s1,{proc,s1,2,[],
+ {[],[]},
+ {[],[]},
+ delayed,#Fun<distrib_counter_bad1_sim.4.5305677>,
+ undefined}},
+ {s2,{proc,s2,2,[],
+ {[],[]},
+ {[],[]},
+ delayed,#Fun<distrib_counter_bad1_sim.4.5305677>,
+ undefined}}],
+%% This is the scheduler trace, in reverse order. Note that the
+%% partition has caused a timeout at simulated time = 14.
+ [{recv,14,scheduler,c1,timeout},
+ {recv,13,s2,c1,{incr_counter_reply,s2,1}},
+ {deliver,12,s2,c1,{incr_counter_reply,s2,1}},
+ {bang,11,s2,c1,{incr_counter_reply,s2,1}},
+ {recv,11,c1,s2,{incr_counter,c1}},
+ {deliver,10,c1,s2,{incr_counter,c1}},
+ {bang,9,c1,s2,{incr_counter,c1}},
+ {drop,9,c1,s1,{incr_counter,c1}},
+ {recv,9,scheduler,c1,{counter_op,[s1,s2]}},
+ {recv,8,s2,c1,{incr_counter_reply,s2,0}},
+ {deliver,7,s2,c1,{incr_counter_reply,s2,0}},
+ {recv,6,s1,c1,{incr_counter_reply,s1,1}},
+ {deliver,5,s1,c1,{incr_counter_reply,s1,1}},
+ {bang,4,s2,c1,{incr_counter_reply,s2,0}},
+ {recv,4,c1,s2,{incr_counter,c1}},
+ {deliver,3,c1,s2,{incr_counter,c1}},
+ {bang,2,s1,c1,{incr_counter_reply,s1,1}},
+ {recv,2,c1,s1,{incr_counter,c1}},
+ {deliver,1,c1,s1,{incr_counter,c1}},
+ {bang,0,c1,s2,{incr_counter,c1}},
+ {bang,0,c1,s1,{incr_counter,c1}},
+ {recv,0,scheduler,c1,{counter_op,[s1,s2]}}],
+%% This is the user trace, in reverse order. After the timeout,
+%% client c1 uses the single reply that it received (at time = 12)
+%% and emits a duplicate counter.
+ [{c1,14,{counter,1}},{c1,8,{counter,1}}],
+ [{{c1,s1},[1,2,3,4,5,6,7,8,9]}]}
+Runnable = [], Receivable = []
+%% Bummer, non-unique counters
+Emitted counters = [1,1]
+emits_unique: failed
+false
Oops, something went wrong.

0 comments on commit 16a0f67

Please sign in to comment.