From ff5213d504ea6642d542586c65e0bfd282b0b8f3 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Thu, 29 Jul 2010 10:29:06 -0600 Subject: [PATCH 01/72] Bumping bitcask dep to tip; need latest fix for file handle leaks --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 0552d631bb..2c0f56c8f4 100644 --- a/rebar.config +++ b/rebar.config @@ -12,7 +12,7 @@ {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, {bitcask, "1.0.2", {hg, "http://bitbucket.org/basho/bitcask", - "bitcask-1.0.2"}}, + "tip"}}, {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", "ebloom-1.0.1"}} ]}. From 414afc1c1d6770f8179eb97e844560f6baed0071 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 29 Jul 2010 17:33:10 -0400 Subject: [PATCH 02/72] Fix for bug #523 --- src/riak_kv_wm_mapred.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_wm_mapred.erl b/src/riak_kv_wm_mapred.erl index 05c7544f17..03f369a03a 100644 --- a/src/riak_kv_wm_mapred.erl +++ b/src/riak_kv_wm_mapred.erl @@ -102,7 +102,7 @@ process_post(RD, #state{inputs=Inputs, mrquery=Query, timeout=Timeout}=State) -> ?DEFAULT_TIMEOUT); is_binary(Inputs) -> Client:mapred_bucket(Inputs, Query, fun riak_kv_mapred_json:jsonify_not_found/1, - ?DEFAULT_TIMEOUT) + Timeout) end, RD1 = wrq:set_resp_header("Content-Type", "application/json", RD), case Results of From 7ce2f859374e6b23568d0c41452c4f86a4ce5acb Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 29 Jul 2010 17:37:19 -0400 Subject: [PATCH 03/72] Fix for #523 --- src/riak_kv_wm_mapred.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_wm_mapred.erl b/src/riak_kv_wm_mapred.erl index 03f369a03a..5c462168db 100644 --- a/src/riak_kv_wm_mapred.erl +++ b/src/riak_kv_wm_mapred.erl @@ -99,7 +99,7 @@ process_post(RD, #state{inputs=Inputs, mrquery=Query, timeout=Timeout}=State) -> Results = if is_list(Inputs) -> Client:mapred(Inputs, Query, fun riak_kv_mapred_json:jsonify_not_found/1, - ?DEFAULT_TIMEOUT); + Timeout); is_binary(Inputs) -> Client:mapred_bucket(Inputs, Query, fun riak_kv_mapred_json:jsonify_not_found/1, Timeout) From 2bc8b94e76a186ce6f79e3608ed4b6a11acff788 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Fri, 30 Jul 2010 10:21:30 -0400 Subject: [PATCH 04/72] Bumping bitcask vsn to 1.0.3 --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 2c0f56c8f4..955cf7a083 100644 --- a/rebar.config +++ b/rebar.config @@ -11,7 +11,7 @@ "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, - {bitcask, "1.0.2", {hg, "http://bitbucket.org/basho/bitcask", + {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", "tip"}}, {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", "ebloom-1.0.1"}} From a90f72e7b6ea861171192494c8ae557b31a025d0 Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sat, 31 Jul 2010 22:58:36 -0400 Subject: [PATCH 05/72] Improving JS VM allocation and tracking --- src/riak_kv_js_manager.erl | 246 +++++++++++++++++++++++++------------ src/riak_kv_js_vm.erl | 54 ++++---- src/riak_kv_vnode.erl | 14 ++- 3 files changed, 210 insertions(+), 104 deletions(-) diff --git a/src/riak_kv_js_manager.erl b/src/riak_kv_js_manager.erl index b356dd3865..19a6ddfb8a 100644 --- a/src/riak_kv_js_manager.erl +++ b/src/riak_kv_js_manager.erl @@ -21,83 +21,122 @@ %% ------------------------------------------------------------------- %% @doc dispatch work to JavaScript VMs - -module(riak_kv_js_manager). -behaviour(gen_server). %% API --export([start_link/1, dispatch/1, blocking_dispatch/1, add_to_manager/0, reload/1, reload/0]). +-export([start_link/1, + add_vm/0, + reload/0, + reload/1, + mark_idle/0, + reserve_vm/0, + dispatch/2, + blocking_dispatch/2, + pool_size/0]). %% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --record(state, {tid}). - -dispatch(JSCall) -> - case select_random() of - no_vms -> - {error, no_vms}; - Target -> - JobId = {Target, make_ref()}, - riak_kv_js_vm:dispatch(Target, self(), JobId, JSCall), - {ok, JobId} - end. +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -blocking_dispatch(JSCall) -> - case select_random() of - no_vms -> - {error, no_vms}; - Target -> - JobId = {Target, make_ref()}, - riak_kv_js_vm:blocking_dispatch(Target, JobId, JSCall) - end. +-define(SERVER, ?MODULE). + +-record('DOWN', {ref, type, pid, info}). +-record(vm_state, {pid, needs_reload=false}). +-record(state, {master, idle, reserve}). + +start_link(ChildCount) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [ChildCount], []). -%% Hack to allow riak-admin to trigger a reload reload([]) -> reload(). - reload() -> - gen_server:call(?MODULE, reload_all_vm). + gen_server:call(?SERVER, reload_vms). -add_to_manager() -> - gen_server:cast(?MODULE, {add_child, self()}). +add_vm() -> + gen_server:cast(?SERVER, {add_vm, self()}). -start_link(ChildCount) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [ChildCount], []). +mark_idle() -> + gen_server:call(?SERVER, {mark_idle, self()}). + +dispatch(JSCall, Tries) -> + dispatch(JSCall, Tries, Tries). + +blocking_dispatch(JSCall, Tries) -> + blocking_dispatch(JSCall, Tries, Tries). + +reserve_vm() -> + gen_server:call(?SERVER, reserve_vm). + +pool_size() -> + gen_server:call(?SERVER, pool_size). init([ChildCount]) -> - Tid = ets:new(?MODULE, [named_table]), - start_children(ChildCount), - {ok, #state{tid=Tid}}. - -handle_call(reload_all_vm, _From, #state{tid=Tid}=State) -> - ets:safe_fixtable(Tid, true), - reload_children(ets:first(Tid), Tid), - ets:safe_fixtable(Tid, false), + Master = ets:new(jsvm_master, [private, {keypos, 2}]), + Idle = ets:new(jsvm_idle, [private]), + start_vms(ChildCount), + {ok, #state{master=Master, idle=Idle}}. + +handle_call({mark_idle, VM}, _From, #state{master=Master, + idle=Idle}=State) -> + case needs_reload(Master, VM) of + true -> + riak_kv_js_vm:reload(VM), + clear_reload(Master, VM); + false -> + ok + end, + ets:insert(Idle, {VM}), + {reply, ok, State}; + +handle_call(reload_vms, _From, #state{master=Master, idle=Idle}=State) -> + reload_idle_vms(Idle), + mark_pending_reloads(Master, Idle), riak_kv_vnode:purge_mapcaches(), {reply, ok, State}; +handle_call(reserve_vm, _From, #state{idle=Idle}=State) -> + try + ets:safe_fixtable(Idle, true), + Reply = case ets:first(Idle) of + '$end_of_table' -> + {error, no_vms}; + VM -> + ets:delete(Idle, VM), + {ok, VM} + end, + {reply, Reply, State} + after + ets:safe_fixtable(Idle, false) + end; + +handle_call(pool_size, _From, #state{idle=Idle}=State) -> + {reply, ets:info(Idle, size), State}; + handle_call(_Request, _From, State) -> {reply, ignore, State}. -handle_cast({add_child, ChildPid}, #state{tid=Tid}=State) -> - erlang:monitor(process, ChildPid), - ets:insert_new(Tid, {ChildPid}), +handle_cast({add_vm, VMPid}, #state{master=Master, idle=Idle}=State) -> + erlang:monitor(process, VMPid), + VMState = #vm_state{pid=VMPid}, + ets:insert(Master, VMState), + ets:insert(Idle, {VMPid}), {noreply, State}; + handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, _Type, Pid, _Info}, #state{tid=Tid}=State) -> - case ets:lookup(Tid, Pid) of - [] -> - {noreply, State}; - [{Pid}] -> - ets:delete(?MODULE, Pid), - riak_kv_js_sup:start_js(self()), - {noreply, State} - end; +handle_info(#'DOWN'{pid=Pid}, #state{master=Master, idle=Idle}=State) -> + ets:delete(Master, Pid), + ets:delete(Idle, Pid), + riak_kv_js_sup:start_js(self()), + {noreply, State}; + handle_info(_Info, State) -> {noreply, State}. @@ -108,39 +147,88 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Internal functions -start_children(0) -> - ok; -start_children(Count) -> - riak_kv_js_sup:start_js(self()), - start_children(Count - 1). +needs_reload(Master, VMPid) -> + [VMState] = ets:lookup(Master, VMPid), + VMState#vm_state.needs_reload. + +clear_reload(Master, VMPid) -> + [VMState] = ets:lookup(Master, VMPid), + VMState1 = VMState#vm_state{needs_reload=false}, + ets:insert(Master, VMState1). -select_random() -> - case ets:match(?MODULE, {'$1'}) of +is_vm_idle(Idle, VMPid) -> + case ets:lookup(Idle, {VMPid}) of [] -> - no_vms; - Members -> - {T1, T2, T3} = erlang:now(), - random:seed(T1, T2, T3), - Pos = pick_pos(erlang:get(?MODULE), length(Members)), - [Member] = lists:nth(Pos, Members), - Member + false; + _ -> + true end. -pick_pos(undefined, Size) -> - Pos = random:uniform(Size), - erlang:put(?MODULE, Pos), - Pos; -pick_pos(OldPos, Size) -> - case random:uniform(Size) of - OldPos -> - pick_pos(OldPos, Size); - Pos -> - erlang:put(?MODULE, Pos), - Pos +start_vms(0) -> + ok; +start_vms(Count) -> + riak_kv_js_sup:start_js(self()), + start_vms(Count - 1). + +reload_idle_vms(Tid) -> + try + ets:safe_fixtable(Tid, true), + reload_idle_vms(ets:first(Tid), Tid) + after + ets:safe_fixtable(Tid, false) end. -reload_children('$end_of_table', _Tid) -> +reload_idle_vms('$end_of_table', _Tid) -> ok; -reload_children(Current, Tid) -> +reload_idle_vms(Current, Tid) -> riak_kv_js_vm:reload(Current), - reload_children(ets:next(Tid, Current), Tid). + reload_idle_vms(ets:next(Tid), Tid). + +mark_pending_reloads(Master, Idle) -> + try + ets:safe_fixtable(Master, true), + mark_pending_reloads(ets:first(Master), Master, Idle) + after + ets:safe_fixtable(Master, false) + end. + +mark_pending_reloads('$end_of_table', _Master, _Idle) -> + ok; +mark_pending_reloads(VMState, Master, Idle) -> + case is_vm_idle(Idle, VMState#vm_state.pid) of + true -> + ok; + false -> + VMState1 = VMState#vm_state{needs_reload=true}, + ets:insert(Master, VMState1) + end, + mark_pending_reloads(ets:next(Master), Master, Idle). + +dispatch(_JSCall, _MaxCount, 0) -> + {error, no_vms}; +dispatch(JSCall, MaxCount, Count) -> + case reserve_vm() of + {ok, VM} -> + JobId = {VM, make_ref()}, + riak_kv_js_vm:dispatch(VM, self(), JobId, JSCall), + {ok, JobId}; + {error, no_vms} -> + ScalingFactor = (1 + (MaxCount - Count)) * + (0.1 + random:uniform(100) * 0.001), + timer:sleep(erlang:round(500 * ScalingFactor)), + dispatch(JSCall, MaxCount, Count - 1) + end. + +blocking_dispatch(_JSCall, _MaxCount, 0) -> + {error, no_vms}; +blocking_dispatch(JSCall, MaxCount, Count) -> + case reserve_vm() of + {ok, VM} -> + JobId = {VM, make_ref()}, + riak_kv_js_vm:blocking_dispatch(VM, JobId, JSCall); + {error, no_vms} -> + ScalingFactor = (1 + (MaxCount - Count)) * + (0.1 + random:uniform(100) * 0.001), + timer:sleep(erlang:round(500 * ScalingFactor)), + blocking_dispatch(JSCall, MaxCount, Count - 1) + end. diff --git a/src/riak_kv_js_vm.erl b/src/riak_kv_js_vm.erl index 5e36561c89..56f2a36257 100644 --- a/src/riak_kv_js_vm.erl +++ b/src/riak_kv_js_vm.erl @@ -58,7 +58,7 @@ init([Manager]) -> {ok, Ctx} -> error_logger:info_msg("Spidermonkey VM (thread stack: ~pMB, max heap: ~pMB) host starting (~p)~n", [StackSize, HeapSize, self()]), - riak_kv_js_manager:add_to_manager(), + riak_kv_js_manager:add_vm(), erlang:monitor(process, Manager), {ok, #state{manager=Manager, ctx=Ctx}}; Error -> @@ -78,13 +78,18 @@ handle_call({dispatch, _JobId, {{jsanon, JS}, Reduced, Arg}}, _From, #state{ctx= {Error, undefined, NewState} -> {Error, NewState} end, + riak_kv_js_manager:mark_idle(), {reply, Reply, UpdatedState}; %% Reduce phase with named function handle_call({dispatch, _JobId, {{jsfun, JS}, Reduced, Arg}}, _From, #state{ctx=Ctx}=State) -> - {reply, invoke_js(Ctx, JS, [Reduced, Arg]), State}; + Reply = invoke_js(Ctx, JS, [Reduced, Arg]), + riak_kv_js_manager:mark_idle(), + {reply, Reply, State}; %% Pre-commit hook with named function handle_call({dispatch, _JobId, {{jsfun, JS}, Obj}}, _From, #state{ctx=Ctx}=State) -> - {reply, invoke_js(Ctx, JS, [riak_object:to_json(Obj)]), State}; + Reply = invoke_js(Ctx, JS, [riak_object:to_json(Obj)]), + riak_kv_js_manager:mark_idle(), + {reply, Reply, State}; handle_call(_Request, _From, State) -> {reply, ignore, State}. @@ -98,26 +103,28 @@ handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc Value, KeyData, _BKey}}, #state{ctx=Ctx}=State) -> {Result, UpdatedState} = case define_anon_js(JS, State) of - {ok, FunName, NewState} -> - JsonValue = riak_object:to_json(Value), - JsonArg = jsonify_arg(Arg), - case invoke_js(Ctx, FunName, [JsonValue, KeyData, JsonArg]) of - {ok, R} -> - {{ok, R}, NewState}; - Error -> - {Error, State} - end; - {Error, undefined, NewState} -> - {Error, NewState} - end, - case Result of - {ok, ReturnValue} -> - riak_core_vnode:reply(Sender, {mapexec_reply, ReturnValue, Requestor}), - {noreply, UpdatedState}; - ErrorResult -> - riak_core_vnode:reply(Sender, {mapexec_error_noretry, Requestor, ErrorResult}), - {noreply, State} - end; + {ok, FunName, NewState} -> + JsonValue = riak_object:to_json(Value), + JsonArg = jsonify_arg(Arg), + case invoke_js(Ctx, FunName, [JsonValue, KeyData, JsonArg]) of + {ok, R} -> + {{ok, R}, NewState}; + Error -> + {Error, State} + end; + {Error, undefined, NewState} -> + {Error, NewState} + end, + FinalState = case Result of + {ok, ReturnValue} -> + riak_core_vnode:reply(Sender, {mapexec_reply, ReturnValue, Requestor}), + UpdatedState; + ErrorResult -> + riak_core_vnode:reply(Sender, {mapexec_error_noretry, Requestor, ErrorResult}), + State + end, + riak_kv_js_manager:mark_idle(), + {noreply, FinalState}; %% Map phase with named function handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsfun, JS}, Arg, _Acc}, @@ -133,6 +140,7 @@ handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsfun, JS}, Arg, _Acc} Error -> riak_core_vnode:reply(Sender, {mapexec_error_noretry, Requestor, Error}) end, + riak_kv_js_manager:mark_idle(), {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 525c241a90..40312aa28e 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -427,8 +427,18 @@ do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, case Mod:get(ModState, BKey) of {ok, Binary} -> V = binary_to_term(Binary), - riak_kv_js_manager:dispatch({Sender, QTerm, V, KeyData, BKey}), - map_executing; + case riak_kv_js_manager:dispatch({Sender, QTerm, V, KeyData, BKey}, 10) of + {ok, _JobId} -> + map_executing; + Error -> + case Error of + {error, no_vms} -> + error_logger:info_msg("JS call failed: All VMs busy~n"); + _ -> + error_logger:error_msg("JS call error: ~p~n", [Error]) + end, + Error + end; {error, notfound} -> {error, notfound} end; From 9ae79a42d4f3699954386d03815c0d8471e45147 Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sat, 31 Jul 2010 23:18:04 -0400 Subject: [PATCH 06/72] Fixing up call sites for new riak_kv_js_manager API --- src/riak_kv_js_manager.erl | 2 ++ src/riak_kv_put_fsm.erl | 2 +- src/riak_kv_reduce_phase.erl | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_js_manager.erl b/src/riak_kv_js_manager.erl index 19a6ddfb8a..6dcddc066c 100644 --- a/src/riak_kv_js_manager.erl +++ b/src/riak_kv_js_manager.erl @@ -205,6 +205,7 @@ mark_pending_reloads(VMState, Master, Idle) -> mark_pending_reloads(ets:next(Master), Master, Idle). dispatch(_JSCall, _MaxCount, 0) -> + error_logger:info_msg("JS call failed: All VMs are busy.~n"); {error, no_vms}; dispatch(JSCall, MaxCount, Count) -> case reserve_vm() of @@ -220,6 +221,7 @@ dispatch(JSCall, MaxCount, Count) -> end. blocking_dispatch(_JSCall, _MaxCount, 0) -> + error_logger:info_msg("JS call failed: All VMs are busy.~n"); {error, no_vms}; blocking_dispatch(JSCall, MaxCount, Count) -> case reserve_vm() of diff --git a/src/riak_kv_put_fsm.erl b/src/riak_kv_put_fsm.erl index 24bc2db052..17d0d1c2bc 100644 --- a/src/riak_kv_put_fsm.erl +++ b/src/riak_kv_put_fsm.erl @@ -335,7 +335,7 @@ invoke_hook(precommit, Mod0, Fun0, undefined, RObj) -> Fun = binary_to_atom(Fun0, utf8), wrap_hook(Mod, Fun, RObj); invoke_hook(precommit, undefined, undefined, JSName, RObj) -> - case riak_kv_js_manager:blocking_dispatch({{jsfun, JSName}, RObj}) of + case riak_kv_js_manager:blocking_dispatch({{jsfun, JSName}, RObj}, 5) of {ok, <<"fail">>} -> fail; {ok, [{<<"fail">>, Message}]} -> diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index 1f512fa86c..6e0017295e 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -91,11 +91,11 @@ perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, {javascript, _} -> case riak_kv_js_manager:blocking_dispatch({FunTerm, [riak_kv_mapred_json:jsonify_not_found(R) || R <- Reduced], - Arg}) of + Arg}, 5) of {ok, Data} when is_list(Data) -> {ok, [riak_kv_mapred_json:dejsonify_not_found(Datum) || Datum <- Data]}; - Data -> - Data + Error -> + throw(Error) end end catch _:R -> From 8b0bed89ea38d1a6439172630ec5f98edea34e87 Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sat, 31 Jul 2010 23:19:32 -0400 Subject: [PATCH 07/72] Fixing compile errs and removing redundant logging --- src/riak_kv_js_manager.erl | 4 ++-- src/riak_kv_vnode.erl | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/riak_kv_js_manager.erl b/src/riak_kv_js_manager.erl index 6dcddc066c..caae8e6944 100644 --- a/src/riak_kv_js_manager.erl +++ b/src/riak_kv_js_manager.erl @@ -205,7 +205,7 @@ mark_pending_reloads(VMState, Master, Idle) -> mark_pending_reloads(ets:next(Master), Master, Idle). dispatch(_JSCall, _MaxCount, 0) -> - error_logger:info_msg("JS call failed: All VMs are busy.~n"); + error_logger:info_msg("JS call failed: All VMs are busy.~n"), {error, no_vms}; dispatch(JSCall, MaxCount, Count) -> case reserve_vm() of @@ -221,7 +221,7 @@ dispatch(JSCall, MaxCount, Count) -> end. blocking_dispatch(_JSCall, _MaxCount, 0) -> - error_logger:info_msg("JS call failed: All VMs are busy.~n"); + error_logger:info_msg("JS call failed: All VMs are busy.~n"), {error, no_vms}; blocking_dispatch(JSCall, MaxCount, Count) -> case reserve_vm() of diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 40312aa28e..8a48c106f4 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -431,12 +431,6 @@ do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, {ok, _JobId} -> map_executing; Error -> - case Error of - {error, no_vms} -> - error_logger:info_msg("JS call failed: All VMs busy~n"); - _ -> - error_logger:error_msg("JS call error: ~p~n", [Error]) - end, Error end; {error, notfound} -> From c328b4189c489a701c70f727721b0616c00a273e Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sat, 31 Jul 2010 23:50:37 -0400 Subject: [PATCH 08/72] Converted map operation on vnode to blocking behavior --- include/riak_kv_vnode.hrl | 6 ++---- src/riak_kv_vnode.erl | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/riak_kv_vnode.hrl b/include/riak_kv_vnode.hrl index f5491b69ae..a158723799 100644 --- a/include/riak_kv_vnode.hrl +++ b/include/riak_kv_vnode.hrl @@ -22,7 +22,8 @@ -record(riak_kv_map_req_v1, { bkey :: {binary(), binary()}, qterm :: term(), - keydata :: term()}). + keydata :: term(), + from :: term()}). -record(riak_kv_vclock_req_v1, { bkeys = [] :: [{binary(), binary()}] @@ -34,6 +35,3 @@ -define(KV_DELETE_REQ, #riak_kv_delete_req_v1). -define(KV_MAP_REQ, #riak_kv_map_req_v1). -define(KV_VCLOCK_REQ, #riak_kv_vclock_req_v1). - - - diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 8a48c106f4..6ddd4eaa71 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -106,13 +106,13 @@ list_keys(Preflist, Bucket, ReqId) -> riak_kv_vnode_master). map(Preflist, ClientPid, QTerm, BKey, KeyData) -> - riak_core_vnode_master:command(Preflist, - ?KV_MAP_REQ{ - qterm=QTerm, - bkey=BKey, - keydata=KeyData}, - {fsm, undefined, ClientPid}, - riak_kv_vnode_master). + riak_core_vnode_master:sync_spawn_command(Preflist, + ?KV_MAP_REQ{ + qterm=QTerm, + bkey=BKey, + keydata=KeyData, + from={fsm, undefined, ClientPid}}, + riak_kv_vnode_master). fold(Preflist, Fun, Acc0) -> riak_core_vnode_master:sync_spawn_command(Preflist, @@ -167,9 +167,9 @@ handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, {error, _Reason} -> {reply, {fail, Idx, ReqId}, NewState} end; -handle_command(?KV_MAP_REQ{bkey=BKey,qterm=QTerm,keydata=KeyData}, - Sender, State) -> - do_map(Sender,QTerm,BKey,KeyData,State,self()); +handle_command(?KV_MAP_REQ{bkey=BKey,qterm=QTerm,keydata=KeyData,from=From}, + _Sender, State) -> + do_map(From,QTerm,BKey,KeyData,State,self()); handle_command(?KV_VCLOCK_REQ{bkeys=BKeys}, _Sender, State) -> {reply, do_get_vclocks(BKeys, State), State}; handle_command(?FOLD_REQ{foldfun=Fun, acc0=Acc},_Sender,State) -> From e3bf70892e323e44e7d74a8897e98c7a86e8e9f4 Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sun, 1 Aug 2010 00:08:21 -0400 Subject: [PATCH 09/72] Hooked up flow cache to reduce phase --- src/riak_kv_reduce_phase.erl | 48 ++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index 6e0017295e..d736445bdf 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -82,23 +82,35 @@ terminate(_Reason, _State) -> perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, Reduced) -> - try - case {Lang, FunTerm} of - {erlang, {qfun,F}} -> - {ok, F(Reduced,Arg)}; - {erlang, {modfun,M,F}} -> - {ok, M:F(Reduced,Arg)}; - {javascript, _} -> - case riak_kv_js_manager:blocking_dispatch({FunTerm, + Key = erlang:phash2({FunTerm, Arg, Reduced}), + case luke_phase:check_cache(Key) of + not_found -> + try + case {Lang, FunTerm} of + {erlang, {qfun,F}} -> + Value = F(Reduced,Arg), + luke_phase:cache_value(Key, Value), + {ok, Value}; + {erlang, {modfun,M,F}} -> + Value = M:F(Reduced,Arg), + luke_phase:cache_value(Key, Value), + {ok, Value}; + {javascript, _} -> + case riak_kv_js_manager:blocking_dispatch({FunTerm, [riak_kv_mapred_json:jsonify_not_found(R) || R <- Reduced], - Arg}, 5) of - {ok, Data} when is_list(Data) -> - {ok, [riak_kv_mapred_json:dejsonify_not_found(Datum) || Datum <- Data]}; - Error -> - throw(Error) - end - end - catch _:R -> - error_logger:error_msg("Failed reduce: ~p~n", [R]), - {error, failed_reduce} + Arg}, 5) of + {ok, Data} when is_list(Data) -> + Data1 = [riak_kv_mapred_json:dejsonify_not_found(Datum) || Datum <- Data], + luke_phase:cache_value(Key, Data1), + {ok, Data1}; + Error -> + throw(Error) + end + end + catch _:R -> + error_logger:error_msg("Failed reduce: ~p~n", [R]), + {error, failed_reduce} + end; + Value -> + {ok, Value} end. From 4fc2d3031bbb9d4a073c445097bb5bbcf11df8fc Mon Sep 17 00:00:00 2001 From: kevsmith Date: Sun, 1 Aug 2010 10:38:04 -0400 Subject: [PATCH 10/72] Fixing error propagation;Wiring up mapcache to JS map functions --- src/riak_kv_js_vm.erl | 8 ++--- src/riak_kv_map_executor.erl | 28 ++++++++++----- src/riak_kv_vnode.erl | 67 +++++++++++++++++++++++++++++------- 3 files changed, 79 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_js_vm.erl b/src/riak_kv_js_vm.erl index 56f2a36257..d6b7803fa5 100644 --- a/src/riak_kv_js_vm.erl +++ b/src/riak_kv_js_vm.erl @@ -99,7 +99,7 @@ handle_cast(reload, #state{ctx=Ctx}=State) -> {noreply, State}; %% Map phase with anonymous function -handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc}, +handle_cast({dispatch, _Requestor, JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc}, Value, KeyData, _BKey}}, #state{ctx=Ctx}=State) -> {Result, UpdatedState} = case define_anon_js(JS, State) of @@ -117,10 +117,10 @@ handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc end, FinalState = case Result of {ok, ReturnValue} -> - riak_core_vnode:reply(Sender, {mapexec_reply, ReturnValue, Requestor}), + riak_core_vnode:send_command(Sender, {mapexec_reply, JobId, ReturnValue}), UpdatedState; ErrorResult -> - riak_core_vnode:reply(Sender, {mapexec_error_noretry, Requestor, ErrorResult}), + riak_core_vnode:send_command(Sender, {mapexec_error_noretry, JobId, ErrorResult}), State end, riak_kv_js_manager:mark_idle(), @@ -198,7 +198,7 @@ define_anon_js(JS, #state{ctx=Ctx, anon_funs=AnonFuns, next_funid=NextFunId}=Sta {ok, FunName, State#state{anon_funs=[{Hash, FunName}|AnonFuns], next_funid=NextFunId + 1}}; Error -> error_logger:warning_msg("Error defining anonymous Javascript function: ~p~n", [Error]), - {error, undefined, State} + {Error, undefined, State} end; FunName -> {ok, FunName, State} diff --git a/src/riak_kv_map_executor.erl b/src/riak_kv_map_executor.erl index c948b78735..a843376740 100644 --- a/src/riak_kv_map_executor.erl +++ b/src/riak_kv_map_executor.erl @@ -90,9 +90,16 @@ try_vnode(#state{qterm=QTerm, bkey=BKey, keydata=KeyData, vnodes=[{P, VN}|VNs], false -> try_vnode(StateData#state{vnodes=VNs}); true -> - riak_kv_vnode:map({P,VN},self(),QTerm,BKey,KeyData), - {ok, TRef} = timer:send_after(VNodeTimeout, self(), timeout), - StateData#state{vnodes=VNs, vnode_timer=TRef} + case riak_kv_vnode:map({P,VN},self(),QTerm,BKey,KeyData) of + {mapexec_reply, executing, _} -> + {ok, TRef} = timer:send_after(VNodeTimeout, self(), timeout), + StateData#state{vnodes=VNs, vnode_timer=TRef}; + {error, no_vms} -> + try_vnode(StateData); + Msg -> + gen_fsm:send_event(self(), Msg), + StateData#state{vnodes=VNs} + end end. wait(timeout, StateData=#state{bkey=BKey, keydata=KD, phase_pid=PhasePid,vnodes=[]}) -> @@ -101,21 +108,21 @@ wait(timeout, StateData=#state{bkey=BKey, keydata=KD, phase_pid=PhasePid,vnodes= wait(timeout, #state{timeout=Timeout}=StateData) -> case try_vnode(StateData) of {error, no_vnodes} -> - {stop, normal, StateData}; + {stop, normal, StateData}; NewState -> {next_state, wait, NewState, Timeout} end; wait({mapexec_error, _VN, _ErrMsg}, StateData=#state{bkey=BKey, keydata=KD, phase_pid=PhasePid,vnodes=[], vnode_timer=TRef}) -> - timer:cancel(TRef), + cancel_timer(TRef), riak_kv_phase_proto:mapexec_result(PhasePid, [{not_found, BKey, KD}]), {stop,normal,StateData}; wait({mapexec_error_noretry, _VN, ErrMsg}, #state{phase_pid=PhasePid, vnode_timer=TRef}=StateData) -> - timer:cancel(TRef), + cancel_timer(TRef), riak_kv_phase_proto:mapexec_error(PhasePid, ErrMsg), {stop, normal, StateData}; wait({mapexec_error, _VN, _ErrMsg}, #state{timeout=Timeout, vnode_timer=TRef}=StateData) -> - timer:cancel(TRef), + cancel_timer(TRef), case try_vnode(StateData) of {error, no_vnodes} -> {stop, normal, StateData}; @@ -125,7 +132,7 @@ wait({mapexec_error, _VN, _ErrMsg}, #state{timeout=Timeout, vnode_timer=TRef}=St wait({mapexec_reply, executing, _}, #state{timeout=Timeout}=StateData) -> {next_state, wait, StateData, Timeout}; wait({mapexec_reply, RetVal, _VN}, StateData=#state{phase_pid=PhasePid, vnode_timer=TRef}) -> - timer:cancel(TRef), + cancel_timer(TRef), riak_kv_phase_proto:mapexec_result(PhasePid, RetVal), {stop,normal,StateData}. @@ -151,3 +158,8 @@ terminate(Reason, _StateName, _State) -> %% @private code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. + +cancel_timer(undefined) -> + ok; +cancel_timer(TRef) -> + timer:cancel(TRef). diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 6ddd4eaa71..beb425c7fa 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -49,10 +49,16 @@ -export([map_test/3]). -endif. +-record(mrjob, {cachekey :: term(), + bkey :: term(), + reqid :: term(), + target :: pid()}). + -record(state, {idx :: partition(), mod :: module(), modstate :: term(), mapcache :: term(), + mrjobs :: term(), in_handoff = false :: boolean()}). -record(putargs, {returnbody :: boolean(), @@ -111,7 +117,7 @@ map(Preflist, ClientPid, QTerm, BKey, KeyData) -> qterm=QTerm, bkey=BKey, keydata=KeyData, - from={fsm, undefined, ClientPid}}, + from=ClientPid}, riak_kv_vnode_master). fold(Preflist, Fun, Acc0) -> @@ -140,7 +146,7 @@ init([Index]) -> Configuration = app_helper:get_env(riak_kv), {ok, ModState} = Mod:start(Index, Configuration), schedule_clear_mapcache(), - {ok, #state{idx=Index, mod=Mod, modstate=ModState, mapcache=orddict:new()}}. + {ok, #state{idx=Index, mod=Mod, modstate=ModState, mapcache=orddict:new(), mrjobs=dict:new()}}. handle_command(?KV_PUT_REQ{bkey=BKey, object=Object, @@ -200,7 +206,38 @@ handle_command(purge_mapcache, _Sender, State) -> {noreply, State#state{mapcache=orddict:new()}}; handle_command(clear_mapcache, _Sender, State) -> schedule_clear_mapcache(), - {noreply, State#state{mapcache=orddict:new()}}. + {noreply, State#state{mapcache=orddict:new()}}; +handle_command({mapexec_error_noretry, JobId, Err}, _Sender, #state{mrjobs=Jobs}=State) -> + NewState = case dict:find(JobId, Jobs) of + {ok, Job} -> + Jobs1 = dict:erase(JobId, Jobs), + #mrjob{target=Target} = Job, + gen_fsm:send_event(Target, {mapexec_error_noretry, self(), Err}), + State#state{mrjobs=Jobs1}; + error -> + State + end, + {noreply, NewState}; +handle_command({mapexec_reply, JobId, Result}, _Sender, #state{mrjobs=Jobs, + mapcache=MapCache}=State) -> + NewState = case dict:find(JobId, Jobs) of + {ok, Job} -> + Jobs1 = dict:erase(JobId, Jobs), + #mrjob{cachekey=CacheKey, target=Target, bkey=BKey} = Job, + Cache = case orddict:find(BKey, MapCache) of + error -> + orddict:new(); + {ok, C} -> + C + end, + Cache1 = orddict:store(CacheKey, Result, Cache), + gen_fsm:send_event(Target, {mapexec_reply, Result, self()}), + MapCache1 = orddict:store(BKey, Cache1, MapCache), + State#state{mrjobs=Jobs1, mapcache=MapCache1}; + error -> + State + end, + {noreply, NewState}. handle_handoff_command(Req=?FOLD_REQ{}, Sender, State) -> handle_command(Req, Sender, State); @@ -399,10 +436,14 @@ do_diffobj_put(BKey={Bucket,_}, DiffObj, end. %% @private -do_map(Sender, QTerm, BKey, KeyData, #state{mod=Mod, modstate=ModState, mapcache=Cache}=State, VNode) -> - {Reply, NewState} = case do_map(QTerm, BKey, Mod, ModState, KeyData, Cache, VNode, Sender) of - map_executing -> - {{mapexec_reply, executing, self()}, State}; +do_map(Sender, QTerm, BKey, KeyData, #state{mrjobs=Jobs, mod=Mod, modstate=ModState, + mapcache=Cache}=State, VNode) -> + {Reply, NewState} = case do_map(QTerm, BKey, Mod, ModState, KeyData, Cache, VNode) of + {map_executing, BKey, CacheKey, ReqId} -> + J = #mrjob{reqid=ReqId, target=Sender, + bkey=BKey, cachekey=CacheKey}, + Jobs1 = dict:store(ReqId, J, Jobs), + {{mapexec_reply, executing, self()}, State#state{mrjobs=Jobs1}}; {ok, Retval} -> {{mapexec_reply, Retval, self()}, State}; {error, Error} -> @@ -410,7 +451,7 @@ do_map(Sender, QTerm, BKey, KeyData, #state{mod=Mod, modstate=ModState, mapcache end, {reply, Reply, NewState}. -do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, VNode, _Sender) -> +do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), CacheVal = cache_fetch(BKey, CacheKey, Cache), case CacheVal of @@ -419,7 +460,7 @@ do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, CV -> {ok, CV} end; -do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, Cache, _VNode, Sender) -> +do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), CacheVal = cache_fetch(BKey, CacheKey, Cache), case CacheVal of @@ -427,9 +468,9 @@ do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, case Mod:get(ModState, BKey) of {ok, Binary} -> V = binary_to_term(Binary), - case riak_kv_js_manager:dispatch({Sender, QTerm, V, KeyData, BKey}, 10) of - {ok, _JobId} -> - map_executing; + case riak_kv_js_manager:dispatch({self(), QTerm, V, KeyData, BKey}, 10) of + {ok, JobId} -> + {map_executing, BKey, CacheKey, JobId}; Error -> Error end; @@ -444,6 +485,8 @@ build_key({modfun, CMod, CFun}, Arg, KeyData) -> {CMod, CFun, Arg, KeyData}; build_key({jsfun, FunName}, Arg, KeyData) -> {FunName, Arg, KeyData}; +build_key({jsanon, Src}, Arg, KeyData) -> + {erlang:phash2(Src), Arg, KeyData}; build_key(_, _, _) -> no_key. From 57683986919488612ab11595b585ac6b6cada8b3 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Sun, 1 Aug 2010 10:58:16 -0400 Subject: [PATCH 11/72] Depending on updated version of luke --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 955cf7a083..dca22116ca 100644 --- a/rebar.config +++ b/rebar.config @@ -7,7 +7,7 @@ "tip"}}, {riakc, "0.2.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "tip"}}, - {luke, "\.*", {hg, "http://bitbucket.org/basho/luke", + {luke, "\.*", {hg, "http://bitbucket.org/kevsmith/luke-mr", "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, From 08c2095e0a075de507da4ee045e6287a584308d4 Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Mon, 2 Aug 2010 10:18:16 -0600 Subject: [PATCH 12/72] Changed the startup order for the protocol buffers processes so it listens *after* starting the supervisor. bz://544 ticket #142 --- src/riak_kv_sup.erl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_sup.erl b/src/riak_kv_sup.erl index 8a87e8ea22..5a422be0eb 100644 --- a/src/riak_kv_sup.erl +++ b/src/riak_kv_sup.erl @@ -45,12 +45,10 @@ init([]) -> {riak_core_vnode_master, start_link, [riak_kv_vnode, riak_kv_legacy_vnode]}, permanent, 5000, worker, [riak_core_vnode_master]}, - RiakPb = [{riak_kv_pb_listener, - {riak_kv_pb_listener, start_link, []}, - permanent, 5000, worker, [riak_kv_pb_listener]}, - {riak_kv_pb_socket_sup, - {riak_kv_pb_socket_sup, start_link, []}, - permanent, infinity, supervisor, [riak_kv_pb_socket_sup]} + RiakPb = [ {riak_kv_pb_socket_sup, {riak_kv_pb_socket_sup, start_link, []}, + permanent, infinity, supervisor, [riak_kv_pb_socket_sup]}, + {riak_kv_pb_listener, {riak_kv_pb_listener, start_link, []}, + permanent, 5000, worker, [riak_kv_pb_listener]} ], RiakStat = {riak_kv_stat, {riak_kv_stat, start_link, []}, From 9acb2749495b68867a4dd6d4ba815e559c9e9946 Mon Sep 17 00:00:00 2001 From: kevsmith Date: Mon, 2 Aug 2010 13:42:21 -0400 Subject: [PATCH 13/72] Turning on parallel reduce phases for JS;Adding rereduce toggling --- src/riak_kv_mapred_json.erl | 48 ++++++++++++++++---------- src/riak_kv_mapred_query.erl | 66 +++++++++++++++++------------------- src/riak_kv_reduce_phase.erl | 49 +++++++++++++++++++------- 3 files changed, 99 insertions(+), 64 deletions(-) diff --git a/src/riak_kv_mapred_json.erl b/src/riak_kv_mapred_json.erl index 2928c0fa49..b5725cb53d 100644 --- a/src/riak_kv_mapred_json.erl +++ b/src/riak_kv_mapred_json.erl @@ -116,28 +116,42 @@ parse_query([{struct, [{Type, {struct, StepDef}}]}|T], Accum) <<"link">> -> link end, Keep = proplists:get_value(<<"keep">>, StepDef, T==[]), - Step = case not(Keep =:= true orelse Keep =:= false) of + Rereduce = proplists:get_value(<<"rereduce">>, StepDef, true), + Step = case (StepType =:= reduce andalso (Rereduce /= true andalso Rereduce /= false)) of true -> - {error, ["The \"keep\" field was not a boolean value in:\n" + {error, ["The \"rereduce\" field was not a boolean value in:\n" " ",mochijson2:encode( {struct,[{Type,{struct,StepDef}}]}), "\n"]}; false -> - if StepType == link -> - case parse_link_step(StepDef) of - {ok, {Bucket, Tag}} -> - {ok, {link, Bucket, Tag, Keep}}; - LError -> - LError - end; - true -> % map or reduce - Lang = proplists:get_value(<<"language">>, StepDef), - case parse_step(Lang, StepDef) of - {ok, ParsedStep} -> - Arg = proplists:get_value(<<"arg">>, StepDef, none), - {ok, {StepType, ParsedStep, Arg, Keep}}; - QError -> - QError + case not(Keep =:= true orelse Keep =:= false) of + true -> + {error, ["The \"keep\" field was not a boolean value in:\n" + " ",mochijson2:encode( + {struct,[{Type,{struct,StepDef}}]}), + "\n"]}; + false -> + if StepType == link -> + case parse_link_step(StepDef) of + {ok, {Bucket, Tag}} -> + {ok, {link, Bucket, Tag, Keep}}; + LError -> + LError + end; + true -> % map or reduce + Lang = proplists:get_value(<<"language">>, StepDef), + case parse_step(Lang, StepDef) of + {ok, ParsedStep} -> + Arg = proplists:get_value(<<"arg">>, StepDef, none), + case StepType of + reduce -> + {ok, {StepType, ParsedStep, Arg, Rereduce, Keep}}; + _ -> + {ok, {StepType, ParsedStep, Arg, Keep}} + end; + QError -> + QError + end end end end, diff --git a/src/riak_kv_mapred_query.erl b/src/riak_kv_mapred_query.erl index 25ae1a5692..91d430f6ca 100644 --- a/src/riak_kv_mapred_query.erl +++ b/src/riak_kv_mapred_query.erl @@ -63,10 +63,9 @@ -export([start/6]). start(Node, Client, ReqId, Query0, ResultTransformer, Timeout) -> - EffectiveTimeout = erlang:trunc(Timeout * 1.1), case check_query_syntax(Query0) of {ok, Query} -> - luke:new_flow(Node, Client, ReqId, Query, ResultTransformer, EffectiveTimeout); + luke:new_flow(Node, Client, ReqId, Query, ResultTransformer, Timeout); {bad_qterm, QTerm} -> {stop, {bad_qterm, QTerm}} end. @@ -76,30 +75,32 @@ check_query_syntax(Query) -> check_query_syntax([], Accum) -> {ok, Accum}; -check_query_syntax([QTerm={QTermType, QueryFun, Misc, Acc}|Rest], Accum) when is_boolean(Acc) -> +check_query_syntax([QTerm|Rest], Accum) -> + io:format("QTerm: ~p~n", [QTerm]), + {QTermType, QueryFun, Misc, Rereduce, Acc} = parse_qterm(QTerm), PhaseDef = case QTermType of link -> - {phase_mod(link), phase_behavior(link, QueryFun, Acc), [{erlang, QTerm}]}; + {phase_mod(link), phase_behavior(link, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; T when T =:= map orelse T=:= reduce -> case QueryFun of {modfun, Mod, Fun} when is_atom(Mod), is_atom(Fun) -> - {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{erlang, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; {qfun, Fun} when is_function(Fun) -> - {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{erlang, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; {jsanon, JS} when is_binary(JS) -> - {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{javascript, QTerm}]}; {jsanon, {Bucket, Key}} when is_binary(Bucket), is_binary(Key) -> case fetch_js(Bucket, Key) of {ok, JS} -> - {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, - {T, {jsanon, JS}, Misc, Acc}}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), + [{javascript, {T, {jsanon, JS}, Misc, Rereduce, Acc}}]}; _ -> {bad_qterm, QTerm} end; {jsfun, JS} when is_binary(JS) -> - {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{javascript, QTerm}]}; _ -> {bad_qterm, QTerm} end @@ -111,6 +112,11 @@ check_query_syntax([QTerm={QTermType, QueryFun, Misc, Acc}|Rest], Accum) when is check_query_syntax(Rest, [PhaseDef|Accum]) end. +parse_qterm({Type, QueryFun, Misc, Rereduce, Accum}) -> + {Type, QueryFun, Misc, Rereduce, Accum}; +parse_qterm({Type, QueryFun, Misc, Accum}) -> + {Type, QueryFun, Misc, false, Accum}. + phase_mod(link) -> riak_kv_map_phase; phase_mod(map) -> @@ -118,37 +124,27 @@ phase_mod(map) -> phase_mod(reduce) -> riak_kv_reduce_phase. -phase_behavior(link, _QueryFun, true) -> +phase_behavior(link, _QueryFun, _, true) -> [accumulate]; -phase_behavior(link, _QueryFun, false) -> +phase_behavior(link, _QueryFun, _, false) -> []; -phase_behavior(map, _QueryFun, true) -> +phase_behavior(map, _QueryFun, _, true) -> [accumulate]; -phase_behavior(map, _QueryFun, false) -> +phase_behavior(map, _QueryFun, _, false) -> []; -%% Turn off parallel converges for jsanon since -%% they take too long to execute and wind up -%% monopolizing the available JS VMs on a given node -phase_behavior(reduce, {FunType, _}, Accumulate) -> - CP = if - FunType =:= jsanon -> - 1; - true -> - 2 - end, - if - Accumulate =:= true -> - [{converge, CP}, accumulate]; - true -> - [{converge, CP}] - end; -phase_behavior(reduce, {modfun, _, _}, Accumulate) -> - if - Accumulate =:= true -> - [{converge, 2}, accumulate]; +phase_behavior(reduce, _QueryFun, Rereduce, Accumulate) -> + Props = build_props(Rereduce, Accumulate), + [{converge, 2}|Props]. + +build_props(Rereduce, Accumulate) -> + Props1 = [{rereduce, Rereduce}], + case Accumulate of true -> - [{converge, 2}] + [accumulate|Props1]; + false -> + Props1 end. + fetch_js(Bucket, Key) -> {ok, Client} = riak:local_client(), case Client:get(Bucket, Key, 1) of diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index d736445bdf..33c9b02bd2 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -29,19 +29,26 @@ -export([init/1, handle_input/3, handle_input_done/1, handle_event/2, handle_timeout/1, handle_info/2, terminate/2]). --record(state, {qterm, reduced=[], new_inputs=[]}). +-record(state, {qterm, rereduce, reduced=[], new_inputs=[]}). %% @private init([QTerm]) -> - {ok, #state{qterm=QTerm}}. + {_Lang, {reduce, _FunTerm, _Arg, Rereduce, _Acc}} = QTerm, + {ok, #state{qterm=QTerm, rereduce=Rereduce}}. -handle_input(Inputs, #state{reduced=Reduced0, qterm=QTerm, new_inputs=New0}=State0, _Timeout) -> +handle_input(Inputs, #state{rereduce=Rereduce, reduced=Reduced0, qterm=QTerm, + new_inputs=New0}=State0, _Timeout) -> New1 = New0 ++ Inputs, if length(New1) > 20 -> case perform_reduce(QTerm, New1) of {ok, Reduced} -> - {no_output, State0#state{reduced=Reduced0 ++ Reduced, new_inputs=[]}, 250}; + case Rereduce of + true -> + {no_output, State0#state{reduced=Reduced0 ++ Reduced, new_inputs=[]}, 250}; + false -> + {output, Reduced, State0#state{reduced=[]}} + end; Error -> {stop, Error, State0#state{reduced=[], new_inputs=[]}} end; @@ -49,15 +56,33 @@ handle_input(Inputs, #state{reduced=Reduced0, qterm=QTerm, new_inputs=New0}=Stat {no_output, State0#state{new_inputs=New1}, 250} end. -handle_input_done(#state{qterm=QTerm, reduced=Reduced0, new_inputs=New0}=State) -> - case perform_reduce(QTerm, Reduced0 ++ New0) of - {ok, Reduced} -> - luke_phase:complete(), - {output, Reduced, State#state{reduced=Reduced}}; - Error -> - {stop, Error, State#state{reduced=[]}} +handle_input_done(#state{qterm=QTerm, rereduce=Rereduce, reduced=Reduced0, new_inputs=New0}=State) -> + case Rereduce of + true -> + case perform_reduce(QTerm, Reduced0 ++ New0) of + {ok, Reduced} -> + luke_phase:complete(), + {output, Reduced, State#state{reduced=Reduced}}; + Error -> + {stop, Error, State#state{reduced=[]}} + end; + false -> + case length(New0) > 0 of + true -> + case perform_reduce(QTerm, Reduced0 ++ New0) of + {ok, Reduced} -> + luke_phase:complete(), + {output, Reduced, State#state{reduced=Reduced}}; + Error -> + {stop, Error, State#state{reduced=[]}} + end; + false -> + luke_phase:complete(), + {no_output, State} + end end. + handle_timeout(#state{qterm=QTerm, reduced=Reduced0, new_inputs=New0}=State) -> if length(New0) > 0 -> @@ -80,7 +105,7 @@ handle_info(_Info, State) -> terminate(_Reason, _State) -> ok. -perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, +perform_reduce({Lang,{reduce,FunTerm,Arg,_Rereduce, _Acc}}, Reduced) -> Key = erlang:phash2({FunTerm, Arg, Reduced}), case luke_phase:check_cache(Key) of From 6dbe894b43fc2da637327f588b907c2512a35f40 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Mon, 2 Aug 2010 13:53:52 -0400 Subject: [PATCH 14/72] Removing debug output --- src/riak_kv_mapred_query.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/riak_kv_mapred_query.erl b/src/riak_kv_mapred_query.erl index 91d430f6ca..add02fd8fd 100644 --- a/src/riak_kv_mapred_query.erl +++ b/src/riak_kv_mapred_query.erl @@ -76,7 +76,6 @@ check_query_syntax(Query) -> check_query_syntax([], Accum) -> {ok, Accum}; check_query_syntax([QTerm|Rest], Accum) -> - io:format("QTerm: ~p~n", [QTerm]), {QTermType, QueryFun, Misc, Rereduce, Acc} = parse_qterm(QTerm), PhaseDef = case QTermType of link -> From 631ef6785cb26befcc72c575902e032711233962 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Mon, 2 Aug 2010 13:58:53 -0400 Subject: [PATCH 15/72] Emptying new data buffer when reduce finishes (for rereduce=false case) --- src/riak_kv_reduce_phase.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index 33c9b02bd2..a26912abcd 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -47,7 +47,7 @@ handle_input(Inputs, #state{rereduce=Rereduce, reduced=Reduced0, qterm=QTerm, true -> {no_output, State0#state{reduced=Reduced0 ++ Reduced, new_inputs=[]}, 250}; false -> - {output, Reduced, State0#state{reduced=[]}} + {output, Reduced, State0#state{reduced=[], new_inputs=[]}} end; Error -> {stop, Error, State0#state{reduced=[], new_inputs=[]}} From 259445e43d6927d2c6927f1a1d4ca7adf027d1a9 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 06:45:04 -0400 Subject: [PATCH 16/72] Removing rereduce toggle --- src/riak_kv_mapred_json.erl | 48 ++++++++++---------------- src/riak_kv_mapred_query.erl | 66 +++++++++++++++++++----------------- src/riak_kv_reduce_phase.erl | 49 +++++++------------------- 3 files changed, 64 insertions(+), 99 deletions(-) diff --git a/src/riak_kv_mapred_json.erl b/src/riak_kv_mapred_json.erl index b5725cb53d..2928c0fa49 100644 --- a/src/riak_kv_mapred_json.erl +++ b/src/riak_kv_mapred_json.erl @@ -116,42 +116,28 @@ parse_query([{struct, [{Type, {struct, StepDef}}]}|T], Accum) <<"link">> -> link end, Keep = proplists:get_value(<<"keep">>, StepDef, T==[]), - Rereduce = proplists:get_value(<<"rereduce">>, StepDef, true), - Step = case (StepType =:= reduce andalso (Rereduce /= true andalso Rereduce /= false)) of + Step = case not(Keep =:= true orelse Keep =:= false) of true -> - {error, ["The \"rereduce\" field was not a boolean value in:\n" + {error, ["The \"keep\" field was not a boolean value in:\n" " ",mochijson2:encode( {struct,[{Type,{struct,StepDef}}]}), "\n"]}; false -> - case not(Keep =:= true orelse Keep =:= false) of - true -> - {error, ["The \"keep\" field was not a boolean value in:\n" - " ",mochijson2:encode( - {struct,[{Type,{struct,StepDef}}]}), - "\n"]}; - false -> - if StepType == link -> - case parse_link_step(StepDef) of - {ok, {Bucket, Tag}} -> - {ok, {link, Bucket, Tag, Keep}}; - LError -> - LError - end; - true -> % map or reduce - Lang = proplists:get_value(<<"language">>, StepDef), - case parse_step(Lang, StepDef) of - {ok, ParsedStep} -> - Arg = proplists:get_value(<<"arg">>, StepDef, none), - case StepType of - reduce -> - {ok, {StepType, ParsedStep, Arg, Rereduce, Keep}}; - _ -> - {ok, {StepType, ParsedStep, Arg, Keep}} - end; - QError -> - QError - end + if StepType == link -> + case parse_link_step(StepDef) of + {ok, {Bucket, Tag}} -> + {ok, {link, Bucket, Tag, Keep}}; + LError -> + LError + end; + true -> % map or reduce + Lang = proplists:get_value(<<"language">>, StepDef), + case parse_step(Lang, StepDef) of + {ok, ParsedStep} -> + Arg = proplists:get_value(<<"arg">>, StepDef, none), + {ok, {StepType, ParsedStep, Arg, Keep}}; + QError -> + QError end end end, diff --git a/src/riak_kv_mapred_query.erl b/src/riak_kv_mapred_query.erl index 91d430f6ca..25ae1a5692 100644 --- a/src/riak_kv_mapred_query.erl +++ b/src/riak_kv_mapred_query.erl @@ -63,9 +63,10 @@ -export([start/6]). start(Node, Client, ReqId, Query0, ResultTransformer, Timeout) -> + EffectiveTimeout = erlang:trunc(Timeout * 1.1), case check_query_syntax(Query0) of {ok, Query} -> - luke:new_flow(Node, Client, ReqId, Query, ResultTransformer, Timeout); + luke:new_flow(Node, Client, ReqId, Query, ResultTransformer, EffectiveTimeout); {bad_qterm, QTerm} -> {stop, {bad_qterm, QTerm}} end. @@ -75,32 +76,30 @@ check_query_syntax(Query) -> check_query_syntax([], Accum) -> {ok, Accum}; -check_query_syntax([QTerm|Rest], Accum) -> - io:format("QTerm: ~p~n", [QTerm]), - {QTermType, QueryFun, Misc, Rereduce, Acc} = parse_qterm(QTerm), +check_query_syntax([QTerm={QTermType, QueryFun, Misc, Acc}|Rest], Accum) when is_boolean(Acc) -> PhaseDef = case QTermType of link -> - {phase_mod(link), phase_behavior(link, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; + {phase_mod(link), phase_behavior(link, QueryFun, Acc), [{erlang, QTerm}]}; T when T =:= map orelse T=:= reduce -> case QueryFun of {modfun, Mod, Fun} when is_atom(Mod), is_atom(Fun) -> - {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{erlang, QTerm}]}; {qfun, Fun} when is_function(Fun) -> - {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{erlang, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{erlang, QTerm}]}; {jsanon, JS} when is_binary(JS) -> - {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{javascript, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, QTerm}]}; {jsanon, {Bucket, Key}} when is_binary(Bucket), is_binary(Key) -> case fetch_js(Bucket, Key) of {ok, JS} -> - {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), - [{javascript, {T, {jsanon, JS}, Misc, Rereduce, Acc}}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, + {T, {jsanon, JS}, Misc, Acc}}]}; _ -> {bad_qterm, QTerm} end; {jsfun, JS} when is_binary(JS) -> - {phase_mod(T), phase_behavior(T, QueryFun, Rereduce, Acc), [{javascript, QTerm}]}; + {phase_mod(T), phase_behavior(T, QueryFun, Acc), [{javascript, QTerm}]}; _ -> {bad_qterm, QTerm} end @@ -112,11 +111,6 @@ check_query_syntax([QTerm|Rest], Accum) -> check_query_syntax(Rest, [PhaseDef|Accum]) end. -parse_qterm({Type, QueryFun, Misc, Rereduce, Accum}) -> - {Type, QueryFun, Misc, Rereduce, Accum}; -parse_qterm({Type, QueryFun, Misc, Accum}) -> - {Type, QueryFun, Misc, false, Accum}. - phase_mod(link) -> riak_kv_map_phase; phase_mod(map) -> @@ -124,27 +118,37 @@ phase_mod(map) -> phase_mod(reduce) -> riak_kv_reduce_phase. -phase_behavior(link, _QueryFun, _, true) -> +phase_behavior(link, _QueryFun, true) -> [accumulate]; -phase_behavior(link, _QueryFun, _, false) -> +phase_behavior(link, _QueryFun, false) -> []; -phase_behavior(map, _QueryFun, _, true) -> +phase_behavior(map, _QueryFun, true) -> [accumulate]; -phase_behavior(map, _QueryFun, _, false) -> +phase_behavior(map, _QueryFun, false) -> []; -phase_behavior(reduce, _QueryFun, Rereduce, Accumulate) -> - Props = build_props(Rereduce, Accumulate), - [{converge, 2}|Props]. - -build_props(Rereduce, Accumulate) -> - Props1 = [{rereduce, Rereduce}], - case Accumulate of +%% Turn off parallel converges for jsanon since +%% they take too long to execute and wind up +%% monopolizing the available JS VMs on a given node +phase_behavior(reduce, {FunType, _}, Accumulate) -> + CP = if + FunType =:= jsanon -> + 1; + true -> + 2 + end, + if + Accumulate =:= true -> + [{converge, CP}, accumulate]; true -> - [accumulate|Props1]; - false -> - Props1 + [{converge, CP}] + end; +phase_behavior(reduce, {modfun, _, _}, Accumulate) -> + if + Accumulate =:= true -> + [{converge, 2}, accumulate]; + true -> + [{converge, 2}] end. - fetch_js(Bucket, Key) -> {ok, Client} = riak:local_client(), case Client:get(Bucket, Key, 1) of diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index 33c9b02bd2..d736445bdf 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -29,26 +29,19 @@ -export([init/1, handle_input/3, handle_input_done/1, handle_event/2, handle_timeout/1, handle_info/2, terminate/2]). --record(state, {qterm, rereduce, reduced=[], new_inputs=[]}). +-record(state, {qterm, reduced=[], new_inputs=[]}). %% @private init([QTerm]) -> - {_Lang, {reduce, _FunTerm, _Arg, Rereduce, _Acc}} = QTerm, - {ok, #state{qterm=QTerm, rereduce=Rereduce}}. + {ok, #state{qterm=QTerm}}. -handle_input(Inputs, #state{rereduce=Rereduce, reduced=Reduced0, qterm=QTerm, - new_inputs=New0}=State0, _Timeout) -> +handle_input(Inputs, #state{reduced=Reduced0, qterm=QTerm, new_inputs=New0}=State0, _Timeout) -> New1 = New0 ++ Inputs, if length(New1) > 20 -> case perform_reduce(QTerm, New1) of {ok, Reduced} -> - case Rereduce of - true -> - {no_output, State0#state{reduced=Reduced0 ++ Reduced, new_inputs=[]}, 250}; - false -> - {output, Reduced, State0#state{reduced=[]}} - end; + {no_output, State0#state{reduced=Reduced0 ++ Reduced, new_inputs=[]}, 250}; Error -> {stop, Error, State0#state{reduced=[], new_inputs=[]}} end; @@ -56,33 +49,15 @@ handle_input(Inputs, #state{rereduce=Rereduce, reduced=Reduced0, qterm=QTerm, {no_output, State0#state{new_inputs=New1}, 250} end. -handle_input_done(#state{qterm=QTerm, rereduce=Rereduce, reduced=Reduced0, new_inputs=New0}=State) -> - case Rereduce of - true -> - case perform_reduce(QTerm, Reduced0 ++ New0) of - {ok, Reduced} -> - luke_phase:complete(), - {output, Reduced, State#state{reduced=Reduced}}; - Error -> - {stop, Error, State#state{reduced=[]}} - end; - false -> - case length(New0) > 0 of - true -> - case perform_reduce(QTerm, Reduced0 ++ New0) of - {ok, Reduced} -> - luke_phase:complete(), - {output, Reduced, State#state{reduced=Reduced}}; - Error -> - {stop, Error, State#state{reduced=[]}} - end; - false -> - luke_phase:complete(), - {no_output, State} - end +handle_input_done(#state{qterm=QTerm, reduced=Reduced0, new_inputs=New0}=State) -> + case perform_reduce(QTerm, Reduced0 ++ New0) of + {ok, Reduced} -> + luke_phase:complete(), + {output, Reduced, State#state{reduced=Reduced}}; + Error -> + {stop, Error, State#state{reduced=[]}} end. - handle_timeout(#state{qterm=QTerm, reduced=Reduced0, new_inputs=New0}=State) -> if length(New0) > 0 -> @@ -105,7 +80,7 @@ handle_info(_Info, State) -> terminate(_Reason, _State) -> ok. -perform_reduce({Lang,{reduce,FunTerm,Arg,_Rereduce, _Acc}}, +perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, Reduced) -> Key = erlang:phash2({FunTerm, Arg, Reduced}), case luke_phase:check_cache(Key) of From 8f95f852b4c037f749944e51148fb645b337f2b9 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 09:21:31 -0400 Subject: [PATCH 17/72] Turn on multi-process reduce phases for jsnon functions; Linked keys_fsm to mapred process so they die together in case of timeouts --- src/riak_kv_keys_fsm.erl | 7 +++++++ src/riak_kv_mapred_query.erl | 27 ++++++--------------------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index 7dacbfb7e2..bdb90c30a8 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -53,6 +53,13 @@ init([ReqId,Bucket,Timeout,ClientType,ErrorTolerance,Client]) -> {ok, Bloom} = ebloom:new(10000000,ErrorTolerance,ReqId), StateData = #state{client=Client, client_type=ClientType, timeout=Timeout, bloom=Bloom, req_id=ReqId, bucket=Bucket, ring=Ring}, + case ClientType of + %% Link to the mapred job so we die if the job dies + mapred -> + link(Client); + _ -> + ok + end, {ok,initialize,StateData,0}. %% @private diff --git a/src/riak_kv_mapred_query.erl b/src/riak_kv_mapred_query.erl index 25ae1a5692..760046c324 100644 --- a/src/riak_kv_mapred_query.erl +++ b/src/riak_kv_mapred_query.erl @@ -126,28 +126,13 @@ phase_behavior(map, _QueryFun, true) -> [accumulate]; phase_behavior(map, _QueryFun, false) -> []; -%% Turn off parallel converges for jsanon since -%% they take too long to execute and wind up -%% monopolizing the available JS VMs on a given node -phase_behavior(reduce, {FunType, _}, Accumulate) -> - CP = if - FunType =:= jsanon -> - 1; - true -> - 2 - end, - if - Accumulate =:= true -> - [{converge, CP}, accumulate]; +phase_behavior(reduce, {_, _}, Accumulate) -> + Behaviors0 = [{converge, 2}], + case Accumulate of true -> - [{converge, CP}] - end; -phase_behavior(reduce, {modfun, _, _}, Accumulate) -> - if - Accumulate =:= true -> - [{converge, 2}, accumulate]; - true -> - [{converge, 2}] + [accumulate|Behaviors0]; + false -> + Behaviors0 end. fetch_js(Bucket, Key) -> {ok, Client} = riak:local_client(), From 7a272302cf1ac9760af10f1cc1a2fb2a3e3ddd49 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 14:08:35 -0400 Subject: [PATCH 18/72] Disabling reduce cache -- Too much memory consumption and hash collisions on phash2 were skewing results --- src/riak_kv_reduce_phase.erl | 51 +++++++++++++++--------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index d736445bdf..22599eb36e 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -82,35 +82,26 @@ terminate(_Reason, _State) -> perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, Reduced) -> - Key = erlang:phash2({FunTerm, Arg, Reduced}), - case luke_phase:check_cache(Key) of - not_found -> - try - case {Lang, FunTerm} of - {erlang, {qfun,F}} -> - Value = F(Reduced,Arg), - luke_phase:cache_value(Key, Value), - {ok, Value}; - {erlang, {modfun,M,F}} -> - Value = M:F(Reduced,Arg), - luke_phase:cache_value(Key, Value), - {ok, Value}; - {javascript, _} -> - case riak_kv_js_manager:blocking_dispatch({FunTerm, - [riak_kv_mapred_json:jsonify_not_found(R) || R <- Reduced], - Arg}, 5) of - {ok, Data} when is_list(Data) -> - Data1 = [riak_kv_mapred_json:dejsonify_not_found(Datum) || Datum <- Data], - luke_phase:cache_value(Key, Data1), - {ok, Data1}; - Error -> - throw(Error) - end + try + case {Lang, FunTerm} of + {erlang, {qfun,F}} -> + Value = F(Reduced,Arg), + {ok, Value}; + {erlang, {modfun,M,F}} -> + Value = M:F(Reduced,Arg), + {ok, Value}; + {javascript, _} -> + case riak_kv_js_manager:blocking_dispatch({FunTerm, + [riak_kv_mapred_json:jsonify_not_found(R) || R <- Reduced], + Arg}, 5) of + {ok, Data} when is_list(Data) -> + Data1 = [riak_kv_mapred_json:dejsonify_not_found(Datum) || Datum <- Data], + {ok, Data1}; + Error -> + throw(Error) end - catch _:R -> - error_logger:error_msg("Failed reduce: ~p~n", [R]), - {error, failed_reduce} - end; - Value -> - {ok, Value} + end + catch _:R -> + error_logger:error_msg("Failed reduce: ~p~n", [R]), + {error, failed_reduce} end. From 2a71110646f23f0523c6b823d818025717173bcb Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 15:51:35 -0400 Subject: [PATCH 19/72] Adding experimental support for a real LRU in riak_kv_vnode --- ebin/riak_kv.app | 1 + src/riak_kv_lru.erl | 126 ++++++++++++++++++++++++++++++++++++++++++ src/riak_kv_vnode.erl | 76 +++++++++---------------- 3 files changed, 152 insertions(+), 51 deletions(-) create mode 100644 src/riak_kv_lru.erl diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index a3435360ab..1348b35a2f 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -25,6 +25,7 @@ riak_kv_js_vm, riak_kv_keys_fsm, riak_kv_legacy_vnode, + riak_kv_lru, riak_kv_map_executor, riak_kv_map_localphase, riak_kv_map_phase, diff --git a/src/riak_kv_lru.erl b/src/riak_kv_lru.erl new file mode 100644 index 0000000000..4cab47c958 --- /dev/null +++ b/src/riak_kv_lru.erl @@ -0,0 +1,126 @@ +-module(riak_kv_lru). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-export([new/1, + put/3, + remove/2, + fetch/2, + size/1, + max_size/1, + clear/1, + destroy/1]). + +-record(kv_lru, {max_size, + idx, + cache}). + +-record(kv_lru_entry, {ts, + key, + value}). + +new(Size) -> + IdxName = pid_to_list(self()) ++ "_cache_idx", + CacheName = pid_to_list(self()) ++ "_cache", + Idx = ets:new(list_to_atom(IdxName), [set, private]), + Cache = ets:new(list_to_atom(CacheName), [ordered_set, private, {keypos, 2}]), + #kv_lru{max_size=Size, idx=Idx, cache=Cache}. + +put(#kv_lru{max_size=MaxSize, idx=Idx, cache=Cache}, Key, Value) -> + remove_existing_if_needed(Idx, Cache, Key), + insert_value(Idx, Cache, Key, Value), + prune_oldest_if_needed(MaxSize, Idx, Cache). + +fetch(#kv_lru{idx=Idx, cache=Cache}=LRU, Key) -> + case fetch_value(Idx, Cache, Key) of + notfound -> + notfound; + Value -> + %% Do a put to update the timestamp in the cache + put(LRU, Key, Value), + Value + end. + +remove(#kv_lru{idx=Idx, cache=Cache}, Key) -> + remove_existing_if_needed(Idx, Cache, Key). + +size(#kv_lru{idx=Idx}) -> + ets:info(Idx, size). + +max_size(#kv_lru{max_size=MaxSize}) -> + MaxSize. + +clear(#kv_lru{idx=Idx, cache=Cache}) -> + ets:delete_all_objects(Idx), + ets:delete_all_objects(Cache). + +destroy(#kv_lru{idx=Idx, cache=Cache}) -> + ets:delete(Idx), + ets:delete(Cache). + +%% Internal functions +remove_existing_if_needed(Idx, Cache, Key) -> + case ets:lookup(Idx, Key) of + [] -> + ok; + [{Key, TS}] -> + ets:delete(Cache, TS), + ets:delete(Idx, Key) + end. + +insert_value(Idx, Cache, Key, Value) -> + TS = erlang:now(), + Entry = #kv_lru_entry{ts=TS, key=Key, value=Value}, + ets:insert_new(Cache, Entry), + ets:insert(Idx, {Key, TS}). + +prune_oldest_if_needed(MaxSize, Idx, Cache) -> + OverSize = MaxSize + 1, + case ets:info(Idx, size) of + OverSize -> + Key = ets:first(Cache), + [Entry] = ets:lookup(Cache, Key), + ets:delete(Cache, Entry#kv_lru_entry.ts), + ets:delete(Idx, Entry#kv_lru_entry.key), + ok; + _ -> + ok + end. + +fetch_value(Idx, Cache, Key) -> + case ets:lookup(Idx, Key) of + [] -> + notfound; + [{Key, TS}] -> + [Entry] = ets:lookup(Cache, TS), + Entry#kv_lru_entry.value + end. + +-ifdef(TEST). + +put_fetch_test() -> + C = riak_kv_lru:new(5), + riak_kv_lru:put(C, <<"hello">>, <<"world">>), + <<"world">> = riak_kv_lru:fetch(C, <<"hello">>), + riak_kv_lru:destroy(C). + +delete_test() -> + C = riak_kv_lru:new(5), + riak_kv_lru:put(C, "hello", "world"), + riak_kv_lru:remove(C, "hello"), + notfound = riak_kv_lru:fetch(C, "hello"), + riak_kv_lru:destroy(C). + +size_test() -> + C = riak_kv_lru:new(5), + [riak_kv_lru:put(C, X, X) || X <- lists:seq(1, 6)], + notfound = riak_kv_lru:fetch(C, 1), + 5 = riak_kv_lru:size(C), + 5 = riak_kv_lru:max_size(C), + 2 = riak_kv_lru:fetch(C, 2), + 6 = riak_kv_lru:fetch(C, 6), + riak_kv_lru:destroy(C). + +-endif. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index beb425c7fa..1738b9cf0d 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -68,7 +68,6 @@ reqid :: non_neg_integer(), bprops :: maybe_improper_list(), prunetime :: non_neg_integer()}). --define(CLEAR_MAPCACHE_INTERVAL, 60000). %% TODO: add -specs to all public API funcs, this module seems fragile? @@ -143,10 +142,11 @@ get_vclocks(Preflist, BKeyList) -> init([Index]) -> Mod = app_helper:get_env(riak_kv, storage_backend), + CacheSize = app_helper:get_env(riak_kv, mapcache_size, 100), Configuration = app_helper:get_env(riak_kv), {ok, ModState} = Mod:start(Index, Configuration), - schedule_clear_mapcache(), - {ok, #state{idx=Index, mod=Mod, modstate=ModState, mapcache=orddict:new(), mrjobs=dict:new()}}. + + {ok, #state{idx=Index, mod=Mod, modstate=ModState, mapcache=riak_kv_lru:new(CacheSize), mrjobs=dict:new()}}. handle_command(?KV_PUT_REQ{bkey=BKey, object=Object, @@ -156,7 +156,8 @@ handle_command(?KV_PUT_REQ{bkey=BKey, Sender, State=#state{idx=Idx,mapcache=Cache}) -> riak_core_vnode:reply(Sender, {w, Idx, ReqId}), do_put(Sender, BKey, Object, ReqId, StartTime, Options, State), - {noreply, State#state{mapcache=orddict:erase(BKey,Cache)}}; + riak_kv_lru:remove(Cache, BKey), + {noreply, State}}; handle_command(?KV_GET_REQ{bkey=BKey,req_id=ReqId},Sender,State) -> do_get(Sender, BKey, ReqId, State); @@ -166,12 +167,12 @@ handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId}, _Sender, handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, State=#state{mod=Mod, modstate=ModState, idx=Idx, mapcache=Cache}) -> - NewState = State#state{mapcache=orddict:erase(BKey,Cache)}, + riak_kv_lru:remove(BKey, Cache), case Mod:delete(ModState, BKey) of ok -> - {reply, {del, Idx, ReqId}, NewState}; + {reply, {del, Idx, ReqId}, State}; {error, _Reason} -> - {reply, {fail, Idx, ReqId}, NewState} + {reply, {fail, Idx, ReqId}, State} end; handle_command(?KV_MAP_REQ{bkey=BKey,qterm=QTerm,keydata=KeyData,from=From}, _Sender, State) -> @@ -188,25 +189,18 @@ handle_command({backend_callback, Ref, Msg}, _Sender, {noreply, State}; handle_command({mapcache, BKey,{FunName,Arg,KeyData}, MF_Res}, _Sender, State=#state{mapcache=Cache}) -> - KeyCache0 = case orddict:find(BKey, Cache) of - error -> orddict:new(); - {ok,CDict} -> CDict - end, - KeyCache = orddict:store({FunName,Arg,KeyData},MF_Res,KeyCache0), - {noreply, State#state{mapcache=orddict:store(BKey,KeyCache,Cache)}}; + riak_kv_lru:put(Cache, {BKey, {FunName,Arg,KeyData}}, MF_Res), + {noreply, State}; handle_command({mapcache, BKey,{M,F,Arg,KeyData},MF_Res}, _Sender, State=#state{mapcache=Cache}) -> - KeyCache0 = case orddict:find(BKey, Cache) of - error -> orddict:new(); - {ok,CDict} -> CDict - end, - KeyCache = orddict:store({M,F,Arg,KeyData},MF_Res,KeyCache0), - {noreply, State#state{mapcache=orddict:store(BKey,KeyCache,Cache)}}; -handle_command(purge_mapcache, _Sender, State) -> - {noreply, State#state{mapcache=orddict:new()}}; -handle_command(clear_mapcache, _Sender, State) -> - schedule_clear_mapcache(), - {noreply, State#state{mapcache=orddict:new()}}; + riak_kv_lru:put(Cache, {Bkey, {M,F,Arg,KeyData}}, MF_Res), + {noreply, State}; +handle_command(purge_mapcache, _Sender, #state{mapcache=Cache}=State) -> + riak_kv_lru:clear(Cache), + {noreply, State}; +handle_command(clear_mapcache, _Sender, #state{mapcache=Cache}=State) -> + riak_kv_lru:clear(Cache), + {noreply, State}; handle_command({mapexec_error_noretry, JobId, Err}, _Sender, #state{mrjobs=Jobs}=State) -> NewState = case dict:find(JobId, Jobs) of {ok, Job} -> @@ -224,16 +218,9 @@ handle_command({mapexec_reply, JobId, Result}, _Sender, #state{mrjobs=Jobs, {ok, Job} -> Jobs1 = dict:erase(JobId, Jobs), #mrjob{cachekey=CacheKey, target=Target, bkey=BKey} = Job, - Cache = case orddict:find(BKey, MapCache) of - error -> - orddict:new(); - {ok, C} -> - C - end, - Cache1 = orddict:store(CacheKey, Result, Cache), + riak_kv_lru:put({BKey, CacheKey}, Result), gen_fsm:send_event(Target, {mapexec_reply, Result, self()}), - MapCache1 = orddict:store(BKey, Cache1, MapCache), - State#state{mrjobs=Jobs1, mapcache=MapCache1}; + State#state{mrjobs=Jobs1}; error -> State end, @@ -453,9 +440,8 @@ do_map(Sender, QTerm, BKey, KeyData, #state{mrjobs=Jobs, mod=Mod, modstate=ModSt do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), - CacheVal = cache_fetch(BKey, CacheKey, Cache), - case CacheVal of - not_cached -> + case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of + notfound -> uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData, VNode); CV -> {ok, CV} @@ -463,8 +449,8 @@ do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), CacheVal = cache_fetch(BKey, CacheKey, Cache), - case CacheVal of - not_cached -> + case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of + notfound -> case Mod:get(ModState, BKey) of {ok, Binary} -> V = binary_to_term(Binary), @@ -486,22 +472,10 @@ build_key({modfun, CMod, CFun}, Arg, KeyData) -> build_key({jsfun, FunName}, Arg, KeyData) -> {FunName, Arg, KeyData}; build_key({jsanon, Src}, Arg, KeyData) -> - {erlang:phash2(Src), Arg, KeyData}; + {mochihex:to_hex(crypto:sha(Src)), Arg, KeyData}; build_key(_, _, _) -> no_key. -cache_fetch(_BKey, no_key, _Cache) -> - not_cached; -cache_fetch(BKey, CacheKey, Cache) -> - case orddict:find(BKey, Cache) of - error -> not_cached; - {ok,CDict} -> - case orddict:find(CacheKey,CDict) of - error -> not_cached; - {ok,CVal} -> CVal - end - end. - uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData, VNode) -> case Mod:get(ModState, BKey) of {ok, Binary} -> From 3995079935580af03b69fd4854b170fafea7575e Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 15:59:10 -0400 Subject: [PATCH 20/72] Fixing compile errors --- src/riak_kv_vnode.erl | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 1738b9cf0d..aa5df2b3a2 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -157,7 +157,7 @@ handle_command(?KV_PUT_REQ{bkey=BKey, riak_core_vnode:reply(Sender, {w, Idx, ReqId}), do_put(Sender, BKey, Object, ReqId, StartTime, Options, State), riak_kv_lru:remove(Cache, BKey), - {noreply, State}}; + {noreply, State}; handle_command(?KV_GET_REQ{bkey=BKey,req_id=ReqId},Sender,State) -> do_get(Sender, BKey, ReqId, State); @@ -193,7 +193,7 @@ handle_command({mapcache, BKey,{FunName,Arg,KeyData}, MF_Res}, _Sender, {noreply, State}; handle_command({mapcache, BKey,{M,F,Arg,KeyData},MF_Res}, _Sender, State=#state{mapcache=Cache}) -> - riak_kv_lru:put(Cache, {Bkey, {M,F,Arg,KeyData}}, MF_Res), + riak_kv_lru:put(Cache, {BKey, {M,F,Arg,KeyData}}, MF_Res), {noreply, State}; handle_command(purge_mapcache, _Sender, #state{mapcache=Cache}=State) -> riak_kv_lru:clear(Cache), @@ -218,7 +218,7 @@ handle_command({mapexec_reply, JobId, Result}, _Sender, #state{mrjobs=Jobs, {ok, Job} -> Jobs1 = dict:erase(JobId, Jobs), #mrjob{cachekey=CacheKey, target=Target, bkey=BKey} = Job, - riak_kv_lru:put({BKey, CacheKey}, Result), + riak_kv_lru:put(MapCache, {BKey, CacheKey}, Result), gen_fsm:send_event(Target, {mapexec_reply, Result, self()}), State#state{mrjobs=Jobs1}; error -> @@ -438,17 +438,16 @@ do_map(Sender, QTerm, BKey, KeyData, #state{mrjobs=Jobs, mod=Mod, modstate=ModSt end, {reply, Reply, NewState}. -do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, VNode) -> +do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of notfound -> - uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData, VNode); + uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData); CV -> {ok, CV} end; do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), - CacheVal = cache_fetch(BKey, CacheKey, Cache), case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of notfound -> case Mod:get(ModState, BKey) of @@ -476,18 +475,18 @@ build_key({jsanon, Src}, Arg, KeyData) -> build_key(_, _, _) -> no_key. -uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData, VNode) -> +uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData) -> case Mod:get(ModState, BKey) of {ok, Binary} -> V = binary_to_term(Binary), - exec_map(V, FunTerm, Arg, BKey, KeyData, VNode); + exec_map(V, FunTerm, Arg, BKey, KeyData); {error, notfound} -> - exec_map({error, notfound}, FunTerm, Arg, BKey, KeyData, VNode); + exec_map({error, notfound}, FunTerm, Arg, BKey, KeyData); X -> {error, X} end. -exec_map(V, FunTerm, Arg, BKey, KeyData, _VNode) -> +exec_map(V, FunTerm, Arg, BKey, KeyData) -> try case FunTerm of {qfun, F} -> {ok, (F)(V,KeyData,Arg)}; @@ -501,10 +500,6 @@ exec_map(V, FunTerm, Arg, BKey, KeyData, _VNode) -> {error, Reason} end. -schedule_clear_mapcache() -> - riak_core_vnode:send_command_after(?CLEAR_MAPCACHE_INTERVAL, clear_mapcache). - - -ifdef(TEST). dummy_backend() -> From 938e09937a70508c5702b892b27d365ec777a4fe Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 16:13:26 -0400 Subject: [PATCH 21/72] Removing unnecessary safe_fixtable calls --- src/riak_kv_js_manager.erl | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/riak_kv_js_manager.erl b/src/riak_kv_js_manager.erl index caae8e6944..35f0c8a81a 100644 --- a/src/riak_kv_js_manager.erl +++ b/src/riak_kv_js_manager.erl @@ -101,19 +101,14 @@ handle_call(reload_vms, _From, #state{master=Master, idle=Idle}=State) -> {reply, ok, State}; handle_call(reserve_vm, _From, #state{idle=Idle}=State) -> - try - ets:safe_fixtable(Idle, true), - Reply = case ets:first(Idle) of - '$end_of_table' -> - {error, no_vms}; - VM -> - ets:delete(Idle, VM), - {ok, VM} - end, - {reply, Reply, State} - after - ets:safe_fixtable(Idle, false) - end; + Reply = case ets:first(Idle) of + '$end_of_table' -> + {error, no_vms}; + VM -> + ets:delete(Idle, VM), + {ok, VM} + end, + {reply, Reply, State}; handle_call(pool_size, _From, #state{idle=Idle}=State) -> {reply, ets:info(Idle, size), State}; @@ -171,12 +166,7 @@ start_vms(Count) -> start_vms(Count - 1). reload_idle_vms(Tid) -> - try - ets:safe_fixtable(Tid, true), - reload_idle_vms(ets:first(Tid), Tid) - after - ets:safe_fixtable(Tid, false) - end. + reload_idle_vms(ets:first(Tid), Tid). reload_idle_vms('$end_of_table', _Tid) -> ok; @@ -185,12 +175,7 @@ reload_idle_vms(Current, Tid) -> reload_idle_vms(ets:next(Tid), Tid). mark_pending_reloads(Master, Idle) -> - try - ets:safe_fixtable(Master, true), - mark_pending_reloads(ets:first(Master), Master, Idle) - after - ets:safe_fixtable(Master, false) - end. + mark_pending_reloads(ets:first(Master), Master, Idle). mark_pending_reloads('$end_of_table', _Master, _Idle) -> ok; From 880895516283f457ef14119fad397f6c6ae2f94c Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 16:13:45 -0400 Subject: [PATCH 22/72] Making vnode cache tunable param more "official sounding" --- src/riak_kv_vnode.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index aa5df2b3a2..9d131e1e6c 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -142,7 +142,8 @@ get_vclocks(Preflist, BKeyList) -> init([Index]) -> Mod = app_helper:get_env(riak_kv, storage_backend), - CacheSize = app_helper:get_env(riak_kv, mapcache_size, 100), + CacheSize = app_helper:get_env(riak_kv, vnode_cache_entries, 100), + io:format("CacheSize: ~p~n", [CacheSize]), Configuration = app_helper:get_env(riak_kv), {ok, ModState} = Mod:start(Index, Configuration), From 963cc621858145a00f09bd5df98b3f676f57558d Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 3 Aug 2010 17:17:07 -0400 Subject: [PATCH 23/72] Removing debug code from riak_kv_vnode;Fixing ETS usage to make LRU cache more efficient --- src/riak_kv_lru.erl | 56 +++++++++++++++++++++++++------------------ src/riak_kv_vnode.erl | 1 - 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_lru.erl b/src/riak_kv_lru.erl index 4cab47c958..00d64225b2 100644 --- a/src/riak_kv_lru.erl +++ b/src/riak_kv_lru.erl @@ -17,15 +17,15 @@ idx, cache}). --record(kv_lru_entry, {ts, - key, - value}). +-record(kv_lru_entry, {key, + value, + ts}). new(Size) -> IdxName = pid_to_list(self()) ++ "_cache_idx", CacheName = pid_to_list(self()) ++ "_cache", - Idx = ets:new(list_to_atom(IdxName), [set, private]), - Cache = ets:new(list_to_atom(CacheName), [ordered_set, private, {keypos, 2}]), + Idx = ets:new(list_to_atom(IdxName), [ordered_set, private]), + Cache = ets:new(list_to_atom(CacheName), [private, {keypos, 2}]), #kv_lru{max_size=Size, idx=Idx, cache=Cache}. put(#kv_lru{max_size=MaxSize, idx=Idx, cache=Cache}, Key, Value) -> @@ -33,13 +33,13 @@ put(#kv_lru{max_size=MaxSize, idx=Idx, cache=Cache}, Key, Value) -> insert_value(Idx, Cache, Key, Value), prune_oldest_if_needed(MaxSize, Idx, Cache). -fetch(#kv_lru{idx=Idx, cache=Cache}=LRU, Key) -> - case fetch_value(Idx, Cache, Key) of +fetch(#kv_lru{cache=Cache}=LRU, Key) -> + case fetch_value(Cache, Key) of notfound -> notfound; Value -> %% Do a put to update the timestamp in the cache - put(LRU, Key, Value), + riak_kv_lru:put(LRU, Key, Value), Value end. @@ -62,39 +62,39 @@ destroy(#kv_lru{idx=Idx, cache=Cache}) -> %% Internal functions remove_existing_if_needed(Idx, Cache, Key) -> - case ets:lookup(Idx, Key) of - [] -> + case ets:lookup(Cache, Key) of + [Entry] -> + ets:delete(Idx, Entry#kv_lru_entry.ts), + ets:delete(Cache, Key), ok; - [{Key, TS}] -> - ets:delete(Cache, TS), - ets:delete(Idx, Key) + [] -> + ok end. insert_value(Idx, Cache, Key, Value) -> TS = erlang:now(), - Entry = #kv_lru_entry{ts=TS, key=Key, value=Value}, + Entry = #kv_lru_entry{key=Key, value=Value, ts=TS}, ets:insert_new(Cache, Entry), - ets:insert(Idx, {Key, TS}). + ets:insert(Idx, {TS, Key}). prune_oldest_if_needed(MaxSize, Idx, Cache) -> OverSize = MaxSize + 1, case ets:info(Idx, size) of OverSize -> - Key = ets:first(Cache), - [Entry] = ets:lookup(Cache, Key), - ets:delete(Cache, Entry#kv_lru_entry.ts), - ets:delete(Idx, Entry#kv_lru_entry.key), + TS = ets:first(Idx), + [{TS, Key}] = ets:lookup(Idx, TS), + ets:delete(Idx, TS), + ets:delete(Cache, Key), ok; _ -> ok end. -fetch_value(Idx, Cache, Key) -> - case ets:lookup(Idx, Key) of +fetch_value(Cache, Key) -> + case ets:lookup(Cache, Key) of [] -> notfound; - [{Key, TS}] -> - [Entry] = ets:lookup(Cache, TS), + [Entry] -> Entry#kv_lru_entry.value end. @@ -123,4 +123,14 @@ size_test() -> 6 = riak_kv_lru:fetch(C, 6), riak_kv_lru:destroy(C). +age_test() -> + C = riak_kv_lru:new(3), + [riak_kv_lru:put(C, X, X) || X <- lists:seq(1, 3)], + timer:sleep(500), + 2 = riak_kv_lru:fetch(C, 2), + riak_kv_lru:put(C, 4, 4), + 2 = riak_kv_lru:fetch(C, 2), + 4 = riak_kv_lru:fetch(C, 4), + notfound = riak_kv_lru:fetch(C, 1), + riak_kv_lru:destroy(C). -endif. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 9d131e1e6c..8c839e08ba 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -143,7 +143,6 @@ get_vclocks(Preflist, BKeyList) -> init([Index]) -> Mod = app_helper:get_env(riak_kv, storage_backend), CacheSize = app_helper:get_env(riak_kv, vnode_cache_entries, 100), - io:format("CacheSize: ~p~n", [CacheSize]), Configuration = app_helper:get_env(riak_kv), {ok, ModState} = Mod:start(Index, Configuration), From 4cab456041b124efd86892ea1a8600571eea45dd Mon Sep 17 00:00:00 2001 From: justin Date: Tue, 3 Aug 2010 22:18:50 -0400 Subject: [PATCH 24/72] riak_object:new auto-update only when initial metadata, resolving 559 --HG-- extra : rebase_source : bc703b5feed80891ade882e92ef2e15c0efd33e4 --- src/riak_object.erl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/riak_object.erl b/src/riak_object.erl index 6374937e61..0f6d700661 100644 --- a/src/riak_object.erl +++ b/src/riak_object.erl @@ -60,7 +60,7 @@ %% @doc Constructor for new riak objects. -spec new(Bucket::bucket(), Key::key(), Value::value()) -> riak_object(). new(B, K, V) when is_binary(B), is_binary(K) -> - new(B, K, V, dict:new()). + new(B, K, V, no_initial_metadata). %% @doc Constructor for new riak objects with an initial content-type. -spec new(Bucket::bucket(), Key::key(), Value::value(), string() | dict()) -> riak_object(). @@ -76,9 +76,16 @@ new(B, K, V, MD) when is_binary(B), is_binary(K) -> true -> throw({error,key_too_large}); false -> - Contents = [#r_content{metadata=MD, value=V}], - #r_object{bucket=B,key=K,updatemetadata=MD, - contents=Contents,vclock=vclock:fresh()} + case MD of + no_initial_metadata -> + Contents = [#r_content{metadata=dict:new(), value=V}], + #r_object{bucket=B,key=K, + contents=Contents,vclock=vclock:fresh()}; + _ -> + Contents = [#r_content{metadata=MD, value=V}], + #r_object{bucket=B,key=K,updatemetadata=MD, + contents=Contents,vclock=vclock:fresh()} + end end. -spec equal(riak_object(), riak_object()) -> true | false. From c22a5c3f613c13fbe848fab1206ca8d767cbc76a Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 4 Aug 2010 10:11:13 -0400 Subject: [PATCH 25/72] Making LRU bucket/key aware so we can do bucket/key level purges when data changes G: -- --- src/riak_kv_keys_fsm.erl | 7 ++- src/riak_kv_lru.erl | 128 +++++++++++++++++++++++++-------------- src/riak_kv_vnode.erl | 14 ++--- 3 files changed, 95 insertions(+), 54 deletions(-) diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index bdb90c30a8..f5897376c1 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -165,7 +165,12 @@ process_keys(Keys,Bucket,ClientType,Bloom,ReqId,Client) -> %% @private process_keys([],Bucket,ClientType,_Bloom,ReqId,Client,Acc) -> case ClientType of - mapred -> luke_flow:add_inputs(Client, [{Bucket,K} || K <- Acc]); + mapred -> + try + luke_flow:add_inputs(Client, [{Bucket,K} || K <- Acc]) + catch _Error -> + exit(self(), shutdown) + end; plain -> Client ! {ReqId, {keys, Acc}} end, ok; diff --git a/src/riak_kv_lru.erl b/src/riak_kv_lru.erl index 00d64225b2..c269483f91 100644 --- a/src/riak_kv_lru.erl +++ b/src/riak_kv_lru.erl @@ -5,16 +5,18 @@ -endif. -export([new/1, - put/3, - remove/2, - fetch/2, + put/4, + remove/3, + fetch/3, size/1, max_size/1, clear/1, + clear_bkey/2, destroy/1]). -record(kv_lru, {max_size, - idx, + bucket_idx, + age_idx, cache}). -record(kv_lru_entry, {key, @@ -22,76 +24,94 @@ ts}). new(Size) -> - IdxName = pid_to_list(self()) ++ "_cache_idx", + IdxName = pid_to_list(self()) ++ "_cache_age_idx", + BucketIdxName = pid_to_list(self()) ++ "_bucket_idx", CacheName = pid_to_list(self()) ++ "_cache", Idx = ets:new(list_to_atom(IdxName), [ordered_set, private]), + BucketIdx = ets:new(list_to_atom(BucketIdxName), [bag, private]), Cache = ets:new(list_to_atom(CacheName), [private, {keypos, 2}]), - #kv_lru{max_size=Size, idx=Idx, cache=Cache}. + #kv_lru{max_size=Size, age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}. -put(#kv_lru{max_size=MaxSize, idx=Idx, cache=Cache}, Key, Value) -> - remove_existing_if_needed(Idx, Cache, Key), - insert_value(Idx, Cache, Key, Value), - prune_oldest_if_needed(MaxSize, Idx, Cache). +put(#kv_lru{max_size=MaxSize, age_idx=Idx, bucket_idx=BucketIdx, + cache=Cache}, BKey, Key, Value) -> + remove_existing(Idx, BucketIdx, Cache, BKey, Key), + insert_value(Idx, BucketIdx, Cache, BKey, Key, Value), + prune_oldest_if_needed(MaxSize, Idx, BucketIdx, Cache). -fetch(#kv_lru{cache=Cache}=LRU, Key) -> - case fetch_value(Cache, Key) of +fetch(#kv_lru{cache=Cache}=LRU, BKey, Key) -> + case fetch_value(Cache, BKey, Key) of notfound -> notfound; Value -> %% Do a put to update the timestamp in the cache - riak_kv_lru:put(LRU, Key, Value), + riak_kv_lru:put(LRU, BKey, Key, Value), Value end. -remove(#kv_lru{idx=Idx, cache=Cache}, Key) -> - remove_existing_if_needed(Idx, Cache, Key). +remove(#kv_lru{age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}, BKey, Key) -> + remove_existing(Idx, BucketIdx, Cache, BKey, Key). -size(#kv_lru{idx=Idx}) -> +size(#kv_lru{age_idx=Idx}) -> ets:info(Idx, size). max_size(#kv_lru{max_size=MaxSize}) -> MaxSize. -clear(#kv_lru{idx=Idx, cache=Cache}) -> +clear(#kv_lru{age_idx=Idx, cache=Cache}) -> ets:delete_all_objects(Idx), ets:delete_all_objects(Cache). -destroy(#kv_lru{idx=Idx, cache=Cache}) -> +clear_bkey(#kv_lru{bucket_idx=BucketIdx}=LRU, BKey) -> + R = ets:match(BucketIdx, {BKey, '$1'}), + case R of + [] -> + ok; + Keys -> + [remove(LRU, BKey, Key) || [Key] <- Keys], + ok + end. + +destroy(#kv_lru{age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}) -> ets:delete(Idx), + ets:delete(BucketIdx), ets:delete(Cache). %% Internal functions -remove_existing_if_needed(Idx, Cache, Key) -> - case ets:lookup(Cache, Key) of +remove_existing(Idx, BucketIdx, Cache, BKey, Key) -> + CacheKey = {BKey, Key}, + case ets:lookup(Cache, CacheKey) of [Entry] -> ets:delete(Idx, Entry#kv_lru_entry.ts), - ets:delete(Cache, Key), + ets:delete(BucketIdx, CacheKey), + ets:delete(Cache, CacheKey), ok; [] -> ok end. -insert_value(Idx, Cache, Key, Value) -> +insert_value(Idx, BucketIdx, Cache, BKey, Key, Value) -> + CacheKey = {BKey, Key}, TS = erlang:now(), - Entry = #kv_lru_entry{key=Key, value=Value, ts=TS}, + Entry = #kv_lru_entry{key=CacheKey, value=Value, ts=TS}, ets:insert_new(Cache, Entry), - ets:insert(Idx, {TS, Key}). + ets:insert_new(Idx, {TS, CacheKey}), + ets:insert(BucketIdx, CacheKey). -prune_oldest_if_needed(MaxSize, Idx, Cache) -> +prune_oldest_if_needed(MaxSize, Idx, BucketIdx, Cache) -> OverSize = MaxSize + 1, case ets:info(Idx, size) of OverSize -> TS = ets:first(Idx), - [{TS, Key}] = ets:lookup(Idx, TS), - ets:delete(Idx, TS), - ets:delete(Cache, Key), + [{TS, {BKey, Key}}] = ets:lookup(Idx, TS), + remove_existing(Idx, BucketIdx, Cache, BKey, Key), ok; _ -> ok end. -fetch_value(Cache, Key) -> - case ets:lookup(Cache, Key) of +fetch_value(Cache, BKey, Key) -> + CacheKey = {BKey, Key}, + case ets:lookup(Cache, CacheKey) of [] -> notfound; [Entry] -> @@ -99,38 +119,54 @@ fetch_value(Cache, Key) -> end. -ifdef(TEST). - put_fetch_test() -> + BKey = {<<"test">>, <<"foo">>}, C = riak_kv_lru:new(5), - riak_kv_lru:put(C, <<"hello">>, <<"world">>), - <<"world">> = riak_kv_lru:fetch(C, <<"hello">>), + riak_kv_lru:put(C, BKey, <<"hello">>, <<"world">>), + <<"world">> = riak_kv_lru:fetch(C, BKey, <<"hello">>), riak_kv_lru:destroy(C). delete_test() -> + BKey = {<<"test">>, <<"foo">>}, C = riak_kv_lru:new(5), - riak_kv_lru:put(C, "hello", "world"), - riak_kv_lru:remove(C, "hello"), - notfound = riak_kv_lru:fetch(C, "hello"), + riak_kv_lru:put(C, BKey, "hello", "world"), + riak_kv_lru:remove(C, BKey, "hello"), + notfound = riak_kv_lru:fetch(C, BKey, "hello"), riak_kv_lru:destroy(C). size_test() -> + BKey = {<<"test">>, <<"foo">>}, C = riak_kv_lru:new(5), - [riak_kv_lru:put(C, X, X) || X <- lists:seq(1, 6)], - notfound = riak_kv_lru:fetch(C, 1), + [riak_kv_lru:put(C, BKey, X, X) || X <- lists:seq(1, 6)], + notfound = riak_kv_lru:fetch(C, BKey, 1), 5 = riak_kv_lru:size(C), 5 = riak_kv_lru:max_size(C), - 2 = riak_kv_lru:fetch(C, 2), - 6 = riak_kv_lru:fetch(C, 6), + 2 = riak_kv_lru:fetch(C, BKey, 2), + 6 = riak_kv_lru:fetch(C, BKey, 6), riak_kv_lru:destroy(C). age_test() -> + BKey = {<<"test">>, <<"foo">>}, C = riak_kv_lru:new(3), - [riak_kv_lru:put(C, X, X) || X <- lists:seq(1, 3)], + [riak_kv_lru:put(C, BKey, X, X) || X <- lists:seq(1, 3)], timer:sleep(500), - 2 = riak_kv_lru:fetch(C, 2), - riak_kv_lru:put(C, 4, 4), - 2 = riak_kv_lru:fetch(C, 2), - 4 = riak_kv_lru:fetch(C, 4), - notfound = riak_kv_lru:fetch(C, 1), + 2 = riak_kv_lru:fetch(C, BKey, 2), + riak_kv_lru:put(C, BKey, 4, 4), + 2 = riak_kv_lru:fetch(C, BKey, 2), + 4 = riak_kv_lru:fetch(C, BKey, 4), + notfound = riak_kv_lru:fetch(C, BKey, 1), riak_kv_lru:destroy(C). + +clear_bkey_test() -> + BKey1 = {<<"test">>, <<"foo">>}, + BKey2 = {<<"test">>, <<"bar">>}, + C = riak_kv_lru:new(10), + F = fun(X) -> + riak_kv_lru:put(C, BKey1, X, X), + riak_kv_lru:put(C, BKey2, X, X) end, + [F(X) || X <- lists:seq(1, 5)], + riak_kv_lru:clear_bkey(C, BKey2), + notfound = riak_kv_lru:fetch(C, BKey1, 3), + 3 = riak_kv_lru:fetch(C, BKey2, 3). + -endif. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 8c839e08ba..a1ff5c612b 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -156,7 +156,7 @@ handle_command(?KV_PUT_REQ{bkey=BKey, Sender, State=#state{idx=Idx,mapcache=Cache}) -> riak_core_vnode:reply(Sender, {w, Idx, ReqId}), do_put(Sender, BKey, Object, ReqId, StartTime, Options, State), - riak_kv_lru:remove(Cache, BKey), + riak_kv_lru:clear_bkey(Cache, BKey), {noreply, State}; handle_command(?KV_GET_REQ{bkey=BKey,req_id=ReqId},Sender,State) -> @@ -167,7 +167,7 @@ handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId}, _Sender, handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, State=#state{mod=Mod, modstate=ModState, idx=Idx, mapcache=Cache}) -> - riak_kv_lru:remove(BKey, Cache), + riak_kv_lru:clear_bkey(Cache, BKey), case Mod:delete(ModState, BKey) of ok -> {reply, {del, Idx, ReqId}, State}; @@ -189,11 +189,11 @@ handle_command({backend_callback, Ref, Msg}, _Sender, {noreply, State}; handle_command({mapcache, BKey,{FunName,Arg,KeyData}, MF_Res}, _Sender, State=#state{mapcache=Cache}) -> - riak_kv_lru:put(Cache, {BKey, {FunName,Arg,KeyData}}, MF_Res), + riak_kv_lru:put(Cache, BKey, {FunName,Arg,KeyData}, MF_Res), {noreply, State}; handle_command({mapcache, BKey,{M,F,Arg,KeyData},MF_Res}, _Sender, State=#state{mapcache=Cache}) -> - riak_kv_lru:put(Cache, {BKey, {M,F,Arg,KeyData}}, MF_Res), + riak_kv_lru:put(Cache, BKey, {M,F,Arg,KeyData}, MF_Res), {noreply, State}; handle_command(purge_mapcache, _Sender, #state{mapcache=Cache}=State) -> riak_kv_lru:clear(Cache), @@ -218,7 +218,7 @@ handle_command({mapexec_reply, JobId, Result}, _Sender, #state{mrjobs=Jobs, {ok, Job} -> Jobs1 = dict:erase(JobId, Jobs), #mrjob{cachekey=CacheKey, target=Target, bkey=BKey} = Job, - riak_kv_lru:put(MapCache, {BKey, CacheKey}, Result), + riak_kv_lru:put(MapCache, BKey, CacheKey, Result), gen_fsm:send_event(Target, {mapexec_reply, Result, self()}), State#state{mrjobs=Jobs1}; error -> @@ -440,7 +440,7 @@ do_map(Sender, QTerm, BKey, KeyData, #state{mrjobs=Jobs, mod=Mod, modstate=ModSt do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), - case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of + case riak_kv_lru:fetch(Cache, BKey, CacheKey) of notfound -> uncached_map(BKey, Mod, ModState, FunTerm, Arg, KeyData); CV -> @@ -448,7 +448,7 @@ do_map({erlang, {map, FunTerm, Arg, _Acc}}, BKey, Mod, ModState, KeyData, Cache, end; do_map({javascript, {map, FunTerm, Arg, _}=QTerm}, BKey, Mod, ModState, KeyData, Cache, _VNode) -> CacheKey = build_key(FunTerm, Arg, KeyData), - case riak_kv_lru:fetch(Cache, {BKey, CacheKey}) of + case riak_kv_lru:fetch(Cache, BKey, CacheKey) of notfound -> case Mod:get(ModState, BKey) of {ok, Binary} -> From 750ae5df9cdfe38251c107014d1111e8f5750561 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 4 Aug 2010 11:00:53 -0400 Subject: [PATCH 26/72] Disabling all cache ops when max_size=0 --- src/riak_kv_lru.erl | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/riak_kv_lru.erl b/src/riak_kv_lru.erl index c269483f91..8a88e1df47 100644 --- a/src/riak_kv_lru.erl +++ b/src/riak_kv_lru.erl @@ -23,6 +23,8 @@ value, ts}). +new(0) -> + #kv_lru{max_size=0}; new(Size) -> IdxName = pid_to_list(self()) ++ "_cache_age_idx", BucketIdxName = pid_to_list(self()) ++ "_bucket_idx", @@ -32,12 +34,16 @@ new(Size) -> Cache = ets:new(list_to_atom(CacheName), [private, {keypos, 2}]), #kv_lru{max_size=Size, age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}. +put(#kv_lru{max_size=0}, _BKey, _Key, _Value) -> + ok; put(#kv_lru{max_size=MaxSize, age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}, BKey, Key, Value) -> remove_existing(Idx, BucketIdx, Cache, BKey, Key), insert_value(Idx, BucketIdx, Cache, BKey, Key, Value), prune_oldest_if_needed(MaxSize, Idx, BucketIdx, Cache). +fetch(#kv_lru{max_size=0}, _BKey, _Key) -> + notfound; fetch(#kv_lru{cache=Cache}=LRU, BKey, Key) -> case fetch_value(Cache, BKey, Key) of notfound -> @@ -48,19 +54,29 @@ fetch(#kv_lru{cache=Cache}=LRU, BKey, Key) -> Value end. +remove(#kv_lru{max_size=0}, _BKey, _Key) -> + ok; remove(#kv_lru{age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}, BKey, Key) -> - remove_existing(Idx, BucketIdx, Cache, BKey, Key). + remove_existing(Idx, BucketIdx, Cache, BKey, Key), + ok. +size(#kv_lru{max_size=0}) -> + 0; size(#kv_lru{age_idx=Idx}) -> ets:info(Idx, size). max_size(#kv_lru{max_size=MaxSize}) -> MaxSize. +clear(#kv_lru{max_size=0}) -> + ok; clear(#kv_lru{age_idx=Idx, cache=Cache}) -> ets:delete_all_objects(Idx), - ets:delete_all_objects(Cache). + ets:delete_all_objects(Cache), + ok. +clear_bkey(#kv_lru{max_size=0}, _BKey) -> + ok; clear_bkey(#kv_lru{bucket_idx=BucketIdx}=LRU, BKey) -> R = ets:match(BucketIdx, {BKey, '$1'}), case R of @@ -71,10 +87,13 @@ clear_bkey(#kv_lru{bucket_idx=BucketIdx}=LRU, BKey) -> ok end. +destroy(#kv_lru{max_size=0}) -> + ok; destroy(#kv_lru{age_idx=Idx, bucket_idx=BucketIdx, cache=Cache}) -> ets:delete(Idx), ets:delete(BucketIdx), - ets:delete(Cache). + ets:delete(Cache), + ok. %% Internal functions remove_existing(Idx, BucketIdx, Cache, BKey, Key) -> @@ -166,7 +185,16 @@ clear_bkey_test() -> riak_kv_lru:put(C, BKey2, X, X) end, [F(X) || X <- lists:seq(1, 5)], riak_kv_lru:clear_bkey(C, BKey2), - notfound = riak_kv_lru:fetch(C, BKey1, 3), - 3 = riak_kv_lru:fetch(C, BKey2, 3). + notfound = riak_kv_lru:fetch(C, BKey2, 3), + 3 = riak_kv_lru:fetch(C, BKey1, 3), + riak_kv_lru:destroy(C). + +zero_size_test() -> + BKey = {<<"test">>, <<"foo">>}, + C = riak_kv_lru:new(0), + ok = riak_kv_lru:put(C, BKey, 1, 1), + notfound = riak_kv_lru:fetch(C, BKey, 1), + 0 = riak_kv_lru:size(C), + riak_kv_lru:destroy(C). -endif. From f4f9c56e9033be6ed5bc7761d37ecc52b5f837ae Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 4 Aug 2010 15:12:35 -0400 Subject: [PATCH 27/72] Fixed bug in named JS fun dispatch;Reduced amount of console spew when a m/r job crashes --- src/riak_kv_js_vm.erl | 10 +++++----- src/riak_kv_map_phase.erl | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_js_vm.erl b/src/riak_kv_js_vm.erl index d6b7803fa5..cf15e14196 100644 --- a/src/riak_kv_js_vm.erl +++ b/src/riak_kv_js_vm.erl @@ -127,18 +127,18 @@ handle_cast({dispatch, _Requestor, JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc {noreply, FinalState}; %% Map phase with named function -handle_cast({dispatch, Requestor, _JobId, {Sender, {map, {jsfun, JS}, Arg, _Acc}, +handle_cast({dispatch, _Requestor, JobId, {Sender, {map, {jsfun, JS}, Arg, _Acc}, Value, - KeyData, BKey}}, #state{ctx=Ctx}=State) -> + KeyData, _BKey}}, #state{ctx=Ctx}=State) -> JsonValue = riak_object:to_json(Value), JsonArg = jsonify_arg(Arg), case invoke_js(Ctx, JS, [JsonValue, KeyData, JsonArg]) of {ok, R} -> %% Requestor should be the dispatching vnode - riak_kv_vnode:mapcache(Requestor, BKey, {JS, Arg, KeyData}, R), - riak_core_vnode:reply(Sender, {mapexec_reply, R, Requestor}); + %%riak_kv_vnode:mapcache(Requestor, BKey, {JS, Arg, KeyData}, R), + riak_core_vnode:send_command(Sender, {mapexec_reply, JobId, R}); Error -> - riak_core_vnode:reply(Sender, {mapexec_error_noretry, Requestor, Error}) + riak_core_vnode:send_command(Sender, {mapexec_error_noretry, JobId, Error}) end, riak_kv_js_manager:mark_idle(), {noreply, State}; diff --git a/src/riak_kv_map_phase.erl b/src/riak_kv_map_phase.erl index acb8574593..aab29eebc7 100644 --- a/src/riak_kv_map_phase.erl +++ b/src/riak_kv_map_phase.erl @@ -58,7 +58,7 @@ handle_event({mapexec_reply, Reply, Executor}, #state{fsms=FSMs0}=State) -> FSMs = lists:delete(Executor, FSMs0), {output, Reply, State#state{fsms=FSMs}}; handle_event({mapexec_error, _Executor, Reply}, State) -> - {stop, Reply, State}; + {stop, Reply, State#state{ring=none, fsms=none, acc=none}}; handle_event(_Event, State) -> {no_output, State}. From d99b2e1cc333144053087f178e1559afb05575c4 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 4 Aug 2010 18:18:37 -0400 Subject: [PATCH 28/72] Removing debug output --- src/riak_kv_put_fsm.erl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/riak_kv_put_fsm.erl b/src/riak_kv_put_fsm.erl index 3be1e77a23..17d0d1c2bc 100644 --- a/src/riak_kv_put_fsm.erl +++ b/src/riak_kv_put_fsm.erl @@ -341,9 +341,7 @@ invoke_hook(precommit, undefined, undefined, JSName, RObj) -> {ok, [{<<"fail">>, Message}]} -> {fail, Message}; {ok, NewObj} -> - Obj = riak_object:from_json(NewObj), - io:format("Object: ~p~n", [Obj]), - Obj; + riak_object:from_json(NewObj); {error, Error} -> error_logger:error_msg("Error executing pre-commit hook: ~s", [Error]), From 22438da6779f1f627483e8a8cc04cb08563f6545 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 5 Aug 2010 07:58:57 -0400 Subject: [PATCH 29/72] Fixing error in riak_kv_mapred_query exposed by Java client integration tests --- src/riak_kv_mapred_query.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_mapred_query.erl b/src/riak_kv_mapred_query.erl index 760046c324..c6e55b8e4e 100644 --- a/src/riak_kv_mapred_query.erl +++ b/src/riak_kv_mapred_query.erl @@ -126,7 +126,7 @@ phase_behavior(map, _QueryFun, true) -> [accumulate]; phase_behavior(map, _QueryFun, false) -> []; -phase_behavior(reduce, {_, _}, Accumulate) -> +phase_behavior(reduce, _QueryFun, Accumulate) -> Behaviors0 = [{converge, 2}], case Accumulate of true -> @@ -134,6 +134,7 @@ phase_behavior(reduce, {_, _}, Accumulate) -> false -> Behaviors0 end. + fetch_js(Bucket, Key) -> {ok, Client} = riak:local_client(), case Client:get(Bucket, Key, 1) of From b79c094af4166cf874c5d82ccf823cf320fac1c1 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Fri, 13 Aug 2010 06:53:45 -0400 Subject: [PATCH 30/72] Adding riak_kv_keylister infrastructure to manage the list keys process --- ebin/riak_kv.app | 4 ++ include/riak_kv_vnode.hrl | 9 ++- src/lk.erl | 66 ++++++++++++++++++++++ src/riak_kv_keylister.erl | 74 ++++++++++++++++++++++++ src/riak_kv_keylister_master.erl | 50 ++++++++++++++++ src/riak_kv_keylister_sup.erl | 43 ++++++++++++++ src/riak_kv_keys_fsm.erl | 53 ++++++++++++----- src/riak_kv_sup.erl | 10 ++++ src/riak_kv_vnode.erl | 97 ++++++++++++++++++++++---------- 9 files changed, 358 insertions(+), 48 deletions(-) create mode 100644 src/lk.erl create mode 100644 src/riak_kv_keylister.erl create mode 100644 src/riak_kv_keylister_master.erl create mode 100644 src/riak_kv_keylister_sup.erl diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index a3435360ab..23bd54e4e7 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -5,6 +5,7 @@ {description, "Riak Key/Value Store"}, {vsn, "0.12.0"}, {modules, [ + lk, raw_link_walker, riak, riak_client, @@ -23,6 +24,9 @@ riak_kv_js_manager, riak_kv_js_sup, riak_kv_js_vm, + riak_kv_keylister, + riak_kv_keylister_master, + riak_kv_keylister_sup, riak_kv_keys_fsm, riak_kv_legacy_vnode, riak_kv_map_executor, diff --git a/include/riak_kv_vnode.hrl b/include/riak_kv_vnode.hrl index f5491b69ae..d58e4077f0 100644 --- a/include/riak_kv_vnode.hrl +++ b/include/riak_kv_vnode.hrl @@ -15,6 +15,11 @@ bucket :: binary(), req_id :: non_neg_integer()}). +-record(riak_kv_listkeys2_req_v1, { + bucket :: binary(), + req_id :: non_neg_integer(), + caller :: pid()}). + -record(riak_kv_delete_req_v1, { bkey :: {binary(), binary()}, req_id :: non_neg_integer()}). @@ -31,9 +36,7 @@ -define(KV_PUT_REQ, #riak_kv_put_req_v1). -define(KV_GET_REQ, #riak_kv_get_req_v1). -define(KV_LISTKEYS_REQ, #riak_kv_listkeys_req_v1). +-define(KV_LISTKEYS2_REQ, #riak_kv_listkeys2_req_v1). -define(KV_DELETE_REQ, #riak_kv_delete_req_v1). -define(KV_MAP_REQ, #riak_kv_map_req_v1). -define(KV_VCLOCK_REQ, #riak_kv_vclock_req_v1). - - - diff --git a/src/lk.erl b/src/lk.erl new file mode 100644 index 0000000000..691f9d0820 --- /dev/null +++ b/src/lk.erl @@ -0,0 +1,66 @@ +-module(lk). + +-export([fsm/1, pn/1]). + +fsm(Bucket) -> + ReqId = random:uniform(10000), + Start = erlang:now(), + riak_kv_keys_fsm:start(ReqId, Bucket, 60000, plain, 0.0001, self()), + {ok, Count} = gather_fsm_results(ReqId, 0), + End = erlang:now(), + Ms = erlang:round(timer:now_diff(End, Start) / 1000), + io:format("Found ~p keys in ~pms.~n", [Count, Ms]). + +pn(Bucket) -> + ReqId = random:uniform(10000), + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + {ok, Bloom} = ebloom:new(10000000,0.0001,ReqId), + BucketProps = riak_core_bucket:get_bucket(Bucket, Ring), + N = proplists:get_value(n_val,BucketProps), + PLS = lists:flatten(riak_core_ring:all_preflists(Ring,N)), + Nodes = [node()|nodes()], + Start = erlang:now(), + start_listers(Nodes, ReqId, Bucket, PLS), + {ok, Count} = gather_pn_results(ReqId, Bloom, length(Nodes), 0), + End = erlang:now(), + Ms = erlang:round(timer:now_diff(End, Start) / 1000), + io:format("Found ~p keys in ~pms.~n", [Count, Ms]). + +gather_fsm_results(ReqId, Count) -> + receive + {ReqId, {keys, Keys}} -> + gather_fsm_results(ReqId, Count + length(Keys)); + {ReqId, done} -> + {ok, Count} + after 120000 -> + {error, timeout} + end. + +start_listers([], _ReqId, _Bucket, _VNodes) -> + ok; +start_listers([H|T], ReqId, Bucket, VNodes) -> + riak_kv_keylister_master:start_keylist(H, ReqId, self(), Bucket, VNodes, 60000), + start_listers(T, ReqId, Bucket, VNodes). + +gather_pn_results(_, BF, 0, Count) -> + ebloom:clear(BF), + {ok, Count}; +gather_pn_results(ReqId, BF, NodeCount, Count) -> + %%io:format("NodeCount: ~p, key count: ~p~n", [NodeCount, Count]), + receive + {ReqId, {kl, Keys0}} -> + F = fun(Key, Acc) -> + case ebloom:contains(BF, Key) of + false -> + ebloom:insert(BF, Key), + [Key|Acc]; + true -> + Acc + end end, + Keys = lists:foldl(F, [], Keys0), + gather_pn_results(ReqId, BF, NodeCount, Count + length(Keys)); + {ReqId, done} -> + gather_pn_results(ReqId, BF, NodeCount - 1, Count) + after 10000 -> + {error, timeout} + end. diff --git a/src/riak_kv_keylister.erl b/src/riak_kv_keylister.erl new file mode 100644 index 0000000000..7ba93d9c28 --- /dev/null +++ b/src/riak_kv_keylister.erl @@ -0,0 +1,74 @@ +-module(riak_kv_keylister). + +-behaviour(gen_fsm). + +%% API +-export([start_link/3, + list_keys/2]). + +%% States +-export([waiting/2]). + +%% gen_fsm callbacks +-export([init/1, state_name/2, state_name/3, handle_event/3, + handle_sync_event/4, handle_info/3, terminate/3, code_change/4]). + +-record(state, {reqid, + caller, + bucket, + bloom}). + +list_keys(ListerPid, VNode) -> + gen_fsm:send_event(ListerPid, {lk, VNode}). + +start_link(ReqId, Caller, Bucket) -> + gen_fsm:start_link({local, ?MODULE}, ?MODULE, [ReqId, Caller, Bucket], []). + +init([ReqId, Caller, Bucket]) -> + process_flag(trap_exit, true), + {ok, Bloom} = ebloom:new(10000000, 0.0001, crypto:rand_uniform(1, 5000)), + {ok, waiting, #state{reqid=ReqId, caller=Caller, bloom=Bloom, bucket=Bucket}}. + +waiting({lk, VNode}, #state{reqid=ReqId, bucket=Bucket}=State) -> + riak_kv_vnode:list_keys2(VNode, ReqId, self(), Bucket), + {next_state, waiting, State}. + +state_name(_Event, State) -> + {next_state, state_name, State}. + +state_name(_Event, _From, State) -> + Reply = ok, + {reply, Reply, state_name, State}. + +handle_event(_Event, StateName, State) -> + {next_state, StateName, State}. + +handle_sync_event(_Event, _From, StateName, State) -> + {reply, ignored, StateName, State}. + +handle_info({ReqId, {kl, Idx, Keys0}}, waiting, #state{reqid=ReqId, bloom=Bloom, + caller=Caller}=State) -> + F = fun(Key, Acc) -> + case ebloom:contains(Bloom, Key) of + true -> + Acc; + false -> + ebloom:insert(Bloom, Key), + [Key|Acc] + end end, + Keys = lists:foldl(F, [], Keys0), + gen_fsm:send_event(Caller, {ReqId, {kl, Idx, Keys}}), + {next_state, waiting, State}; +handle_info({ReqId, Idx, done}, waiting, #state{reqid=ReqId, caller=Caller}=State) -> + gen_fsm:send_event(Caller, {ReqId, Idx, done}), + {next_state, waiting, State}; +handle_info(_Info, StateName, State) -> + {next_state, StateName, State}. + +terminate(_Reason, _StateName, _State) -> + ok. + +code_change(_OldVsn, StateName, State, _Extra) -> + {ok, StateName, State}. + +%% Internal functions diff --git a/src/riak_kv_keylister_master.erl b/src/riak_kv_keylister_master.erl new file mode 100644 index 0000000000..7bd3ae9f4b --- /dev/null +++ b/src/riak_kv_keylister_master.erl @@ -0,0 +1,50 @@ +-module(riak_kv_keylister_master). + +-behaviour(gen_server). + +%% API +-export([start_link/0, + start_keylist/3]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, {}). + +start_keylist(Node, ReqId, Bucket) -> + case gen_server:call({?SERVER, Node}, {start_kl, ReqId, self(), Bucket}) of + {ok, Pid} -> + %% Link processes so the keylister doesn't run forever + erlang:link(Pid), + {ok, Pid}; + Error -> + Error + end. + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +init([]) -> + {ok, #state{}}. + +handle_call({start_kl, ReqId, Caller, Bucket}, _From, State) -> + Reply = riak_kv_keylister_sup:new_lister(ReqId, Caller, Bucket), + {reply, Reply, State}; + +handle_call(_Request, _From, State) -> + {reply, ignore, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/src/riak_kv_keylister_sup.erl b/src/riak_kv_keylister_sup.erl new file mode 100644 index 0000000000..9ea27f7c06 --- /dev/null +++ b/src/riak_kv_keylister_sup.erl @@ -0,0 +1,43 @@ +%% Copyright (c) 2010 Basho Technologies, Inc. All Rights Reserved. + +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at + +%% http://www.apache.org/licenses/LICENSE-2.0 + +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +-module(riak_kv_keylister_sup). + +-behaviour(supervisor). + +%% API +-export([start_link/0, + new_lister/3]). + +%% Supervisor callbacks +-export([init/1]). + +new_lister(ReqId, Bucket, Caller) -> + start_child([ReqId, Bucket, Caller]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + SupFlags = {simple_one_for_one, 0, 1}, + Process = {undefined, + {riak_kv_keylister, start_link, []}, + temporary, brutal_kill, worker, dynamic}, + {ok, {SupFlags, [Process]}}. + +%% Internal functions +start_child(Args) -> + supervisor:start_child(?MODULE, Args). diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index 7dacbfb7e2..f638064c6d 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -40,7 +40,8 @@ bucket :: riak_object:bucket(), timeout :: pos_integer(), req_id :: pos_integer(), - ring :: riak_core_ring:riak_core_ring() + ring :: riak_core_ring:riak_core_ring(), + listers :: [{atom(), pid()}] }). start(ReqId,Bucket,Timeout,ClientType,ErrorTolerance,From) -> @@ -51,8 +52,10 @@ start(ReqId,Bucket,Timeout,ClientType,ErrorTolerance,From) -> init([ReqId,Bucket,Timeout,ClientType,ErrorTolerance,Client]) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), {ok, Bloom} = ebloom:new(10000000,ErrorTolerance,ReqId), + Listers = start_listers(ReqId, Bucket), StateData = #state{client=Client, client_type=ClientType, timeout=Timeout, - bloom=Bloom, req_id=ReqId, bucket=Bucket, ring=Ring}, + bloom=Bloom, req_id=ReqId, bucket=Bucket, ring=Ring, + listers=Listers}, {ok,initialize,StateData,0}. %% @private @@ -68,11 +71,15 @@ initialize(timeout, StateData0=#state{bucket=Bucket, ring=Ring}) -> wait_pls=[],vns=sets:from_list([])}, reduce_pls(StateData). -waiting_kl({kl, Keys, Idx, ReqId}, - StateData0=#state{pls=PLS,vns=VNS0,wait_pls=WPL0,bloom=Bloom, +waiting_kl({ReqId, {kl, _Idx, Keys}}, + StateData=#state{bloom=Bloom, req_id=ReqId,client=Client,timeout=Timeout, bucket=Bucket,client_type=ClientType}) -> process_keys(Keys,Bucket,ClientType,Bloom,ReqId,Client), + {next_state, waiting_kl, StateData, Timeout}; + +waiting_kl({ReqId, Idx, done}, StateData0=#state{wait_pls=WPL0,vns=VNS0,pls=PLS, + req_id=ReqId,timeout=Timeout}) -> WPL = [{W_Idx,W_Node,W_PL} || {W_Idx,W_Node,W_PL} <- WPL0, W_Idx /= Idx], WNs = [W_Node || {W_Idx,W_Node,_W_PL} <- WPL0, W_Idx =:= Idx], Node = case WNs of @@ -90,6 +97,7 @@ waiting_kl({kl, Keys, Idx, ReqId}, _ -> reduce_pls(StateData) end; + waiting_kl(timeout, StateData=#state{pls=PLS,wait_pls=WPL}) -> NewPLS = lists:append(PLS, [W_PL || {_W_Idx,_W_Node,W_PL} <- WPL]), reduce_pls(StateData#state{pls=NewPLS,wait_pls=[]}). @@ -100,9 +108,9 @@ finish(StateData=#state{req_id=ReqId,client=Client,client_type=ClientType}) -> plain -> Client ! {ReqId, done} end, {stop,normal,StateData}. - -reduce_pls(StateData0=#state{timeout=Timeout, req_id=ReqId,wait_pls=WPL, - simul_pls=Simul_PLS, bucket=Bucket}) -> + +reduce_pls(StateData0=#state{timeout=Timeout, wait_pls=WPL, + listers=Listers, simul_pls=Simul_PLS}) -> case find_free_pl(StateData0) of {none_free,NewPLS} -> StateData = StateData0#state{pls=NewPLS}, @@ -111,9 +119,15 @@ reduce_pls(StateData0=#state{timeout=Timeout, req_id=ReqId,wait_pls=WPL, false -> {next_state, waiting_kl, StateData, Timeout} end; {[{Idx,Node}|RestPL],PLS} -> - case net_adm:ping(Node) of - pong -> - riak_kv_vnode:list_keys({Idx,Node},Bucket,ReqId), + case riak_core_node_watcher:services(Node) of + [] -> + reduce_pls(StateData0#state{pls=[RestPL|PLS]}); + _ -> + %% Look up keylister for that node + LPid = proplists:get_value(Node, Listers), + %% Send the keylist request to the lister + riak_kv_keylister:list_keys(LPid, {Idx, Node}), + %% riak_kv_vnode:list_keys({Idx,Node},Bucket,ReqId), WaitPLS = [{Idx,Node,RestPL}|WPL], StateData = StateData0#state{pls=PLS, wait_pls=WaitPLS}, case length(WaitPLS) > Simul_PLS of @@ -121,10 +135,8 @@ reduce_pls(StateData0=#state{timeout=Timeout, req_id=ReqId,wait_pls=WPL, {next_state, waiting_kl, StateData, Timeout}; false -> reduce_pls(StateData) - end; - pang -> - reduce_pls(StateData0#state{pls=[RestPL|PLS]}) - end + end + end end. find_free_pl(StateData) -> find_free_pl1(StateData, []). @@ -164,7 +176,7 @@ process_keys([],Bucket,ClientType,_Bloom,ReqId,Client,Acc) -> ok; process_keys([K|Rest],Bucket,ClientType,Bloom,ReqId,Client,Acc) -> case ebloom:contains(Bloom,K) of - true -> + true -> process_keys(Rest,Bucket,ClientType, Bloom,ReqId,Client,Acc); false -> @@ -192,3 +204,14 @@ terminate(Reason, _StateName, _State) -> %% @private code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. + +%% @private +start_listers(ReqId, Bucket) -> + Nodes = [node()|nodes()], + start_listers(Nodes, ReqId, Bucket, []). + +start_listers([], _ReqId, _Bucket, Accum) -> + Accum; +start_listers([H|T], ReqId, Bucket, Accum) -> + {ok, Pid} = riak_kv_keylister_master:start_keylist(H, ReqId, Bucket), + start_listers(T, ReqId, Bucket, [{H, Pid}|Accum]). diff --git a/src/riak_kv_sup.erl b/src/riak_kv_sup.erl index 5a422be0eb..678a32df95 100644 --- a/src/riak_kv_sup.erl +++ b/src/riak_kv_sup.erl @@ -60,6 +60,14 @@ init([]) -> RiakJsSup = {riak_kv_js_sup, {riak_kv_js_sup, start_link, []}, permanent, infinity, supervisor, [riak_kv_js_sup]}, + KLMaster = {riak_kv_keylister_master, + {riak_kv_keylister_master, start_link, []}, + permanent, 30000, worker, [riak_kv_keylister_master]}, + KLSup = {riak_kv_keylister_sup, + {riak_kv_keylister_sup, start_link, []}, + permanent, infinity, supervisor, [riak_kv_keylister_sup]}, + + % Figure out which processes we should run... IsPbConfigured = (app_helper:get_env(riak_kv, pb_ip) /= undefined) andalso (app_helper:get_env(riak_kv, pb_port) /= undefined), @@ -71,6 +79,8 @@ init([]) -> ?IF(HasStorageBackend, VMaster, []), ?IF(IsPbConfigured, RiakPb, []), ?IF(IsStatEnabled, RiakStat, []), + KLSup, + KLMaster, RiakJsSup, RiakJsMgr ]), diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 525c241a90..1a00af4897 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -17,13 +17,14 @@ -module(riak_kv_vnode). -behaviour(riak_core_vnode). %% API --export([start_vnode/1, - del/3, - put/6, - readrepair/6, +-export([start_vnode/1, + del/3, + put/6, + readrepair/6, list_keys/3, - map/5, - fold/3, + list_keys2/4, + map/5, + fold/3, get_vclocks/2, mapcache/4, purge_mapcaches/0]). @@ -49,7 +50,7 @@ -export([map_test/3]). -endif. --record(state, {idx :: partition(), +-record(state, {idx :: partition(), mod :: module(), modstate :: term(), mapcache :: term(), @@ -59,7 +60,7 @@ lww :: boolean(), bkey :: {binary(), binary()}, robj :: term(), - reqid :: non_neg_integer(), + reqid :: non_neg_integer(), bprops :: maybe_improper_list(), prunetime :: non_neg_integer()}). -define(CLEAR_MAPCACHE_INTERVAL, 60000). @@ -81,7 +82,7 @@ del(Preflist, BKey, ReqId) -> put(Preflist, BKey, Obj, ReqId, StartTime, Options) when is_integer(StartTime) -> put(Preflist, BKey, Obj, ReqId, StartTime, Options, {fsm, undefined, self()}). -put(Preflist, BKey, Obj, ReqId, StartTime, Options, Sender) +put(Preflist, BKey, Obj, ReqId, StartTime, Options, Sender) when is_integer(StartTime) -> riak_core_vnode_master:command(Preflist, ?KV_PUT_REQ{ @@ -94,7 +95,7 @@ put(Preflist, BKey, Obj, ReqId, StartTime, Options, Sender) riak_kv_vnode_master). %% Do a put without sending any replies -readrepair(Preflist, BKey, Obj, ReqId, StartTime, Options) -> +readrepair(Preflist, BKey, Obj, ReqId, StartTime, Options) -> put(Preflist, BKey, Obj, ReqId, StartTime, Options, ignore). list_keys(Preflist, Bucket, ReqId) -> @@ -105,6 +106,15 @@ list_keys(Preflist, Bucket, ReqId) -> {fsm, undefined, self()}, riak_kv_vnode_master). +list_keys2(Preflist, ReqId, Caller, Bucket) -> + riak_core_vnode_master:command(Preflist, + ?KV_LISTKEYS2_REQ{ + bucket=Bucket, + req_id=ReqId, + caller=Caller}, + ignore, + riak_kv_vnode_master). + map(Preflist, ClientPid, QTerm, BKey, KeyData) -> riak_core_vnode_master:command(Preflist, ?KV_MAP_REQ{ @@ -124,14 +134,14 @@ fold(Preflist, Fun, Acc0) -> purge_mapcaches() -> VNodes = riak_core_vnode_master:all_nodes(?MODULE), lists:foreach(fun(VNode) -> riak_core_vnode:send_command(VNode, purge_mapcache) end, VNodes). - + mapcache(Pid, BKey, What, R) -> riak_core_vnode:send_command(Pid, {mapcache, BKey, What, R}). - -get_vclocks(Preflist, BKeyList) -> + +get_vclocks(Preflist, BKeyList) -> riak_core_vnode_master:sync_spawn_command(Preflist, ?KV_VCLOCK_REQ{bkeys=BKeyList}, - riak_kv_vnode_master). + riak_kv_vnode_master). %% VNode callbacks @@ -154,11 +164,16 @@ handle_command(?KV_PUT_REQ{bkey=BKey, handle_command(?KV_GET_REQ{bkey=BKey,req_id=ReqId},Sender,State) -> do_get(Sender, BKey, ReqId, State); -handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId}, _Sender, +handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId}, _Sender, State=#state{mod=Mod, modstate=ModState, idx=Idx}) -> do_list_bucket(ReqId,Bucket,Mod,ModState,Idx,State); -handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, - State=#state{mod=Mod, modstate=ModState, +handle_command(?KV_LISTKEYS2_REQ{bucket=Bucket, req_id=ReqId, caller=Caller}, _Sender, + State=#state{mod=Mod, modstate=ModState, idx=Idx}) -> + do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState), + {noreply, State}; + +handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, + State=#state{mod=Mod, modstate=ModState, idx=Idx, mapcache=Cache}) -> NewState = State#state{mapcache=orddict:erase(BKey,Cache)}, case Mod:delete(ModState, BKey) of @@ -176,7 +191,7 @@ handle_command(?FOLD_REQ{foldfun=Fun, acc0=Acc},_Sender,State) -> Reply = do_fold(Fun, Acc, State), {reply, Reply, State}; %% Commands originating from inside this vnode -handle_command({backend_callback, Ref, Msg}, _Sender, +handle_command({backend_callback, Ref, Msg}, _Sender, State=#state{mod=Mod, modstate=ModState}) -> Mod:callback(ModState, Ref, Msg), {noreply, State}; @@ -188,7 +203,7 @@ handle_command({mapcache, BKey,{FunName,Arg,KeyData}, MF_Res}, _Sender, end, KeyCache = orddict:store({FunName,Arg,KeyData},MF_Res,KeyCache0), {noreply, State#state{mapcache=orddict:store(BKey,KeyCache,Cache)}}; -handle_command({mapcache, BKey,{M,F,Arg,KeyData},MF_Res}, _Sender, +handle_command({mapcache, BKey,{M,F,Arg,KeyData},MF_Res}, _Sender, State=#state{mapcache=Cache}) -> KeyCache0 = case orddict:find(BKey, Cache) of error -> orddict:new(); @@ -202,7 +217,7 @@ handle_command(clear_mapcache, _Sender, State) -> schedule_clear_mapcache(), {noreply, State#state{mapcache=orddict:new()}}. -handle_handoff_command(Req=?FOLD_REQ{}, Sender, State) -> +handle_handoff_command(Req=?FOLD_REQ{}, Sender, State) -> handle_command(Req, Sender, State); handle_handoff_command(Req={backend_callback, _Ref, _Msg}, Sender, State) -> handle_command(Req, Sender, State); @@ -269,7 +284,7 @@ do_put(Sender, {Bucket,_Key}=BKey, RObj, ReqID, PruneTime, Options, State) -> riak_core_vnode:reply(Sender, Reply), riak_kv_stat:update(vnode_put). -prepare_put(#state{}, #putargs{lww=true, robj=RObj}) -> +prepare_put(#state{}, #putargs{lww=true, robj=RObj}) -> {true, RObj}; prepare_put(#state{mod=Mod,modstate=ModState}, #putargs{bkey=BKey, robj=RObj, @@ -366,6 +381,28 @@ do_list_bucket(ReqID,Bucket,Mod,ModState,Idx,State) -> RetVal = Mod:list_bucket(ModState,Bucket), {reply, {kl, RetVal, Idx, ReqID}, State}. +do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> + F = fun(BKey, _, Acc) -> + process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, + case Mod:fold(ModState, F, []) of + [] -> + ok; + Remainder -> + Caller ! {ReqId, {kl, Idx, Remainder}} + end, + Caller ! {ReqId, Idx, done}. + +process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> + Acc = [K|Acc0], + case length(Acc) >= 100 of + true -> + Caller ! {ReqId, {kl, Idx, Acc}}, + []; + false -> + Acc + end; +process_keys(_Caller, _ReqId, _Idx, _Bucket, {_B, _K}, Acc) -> + Acc. %% @private do_fold(Fun, Acc0, _State=#state{mod=Mod, modstate=ModState}) -> Mod:fold(ModState, Fun, Acc0). @@ -491,7 +528,7 @@ dummy_backend() -> riak_core_ring_manager:set_ring_global(Ring), application:set_env(riak_kv, storage_backend, riak_kv_ets_backend), application:set_env(riak_core, default_bucket_props, []). - + %% Make sure the mapcache gets cleared when the bkey is updated mapcache_put_test() -> @@ -559,7 +596,7 @@ purge_mapcaches_test() -> %% Prove nothing there FunTerm = {modfun, ?MODULE, map_test}, - Arg = arg, + Arg = arg, QTerm = {erlang, {map, FunTerm, Arg, acc}}, KeyData = keydata, CacheKey = build_key(FunTerm, Arg, KeyData), @@ -581,14 +618,14 @@ purge_mapcaches_test() -> riak_core_node_watcher:service_down(riak_kv), cleanup_servers(). - + cleanup_servers() -> riak_kv_test_util:stop_process(riak_core_node_watcher), riak_kv_test_util:stop_process(riak_core_node_watcher_events), riak_kv_test_util:stop_process(riak_core_ring_events), riak_kv_test_util:stop_process(riak_core_vnode_sup), riak_kv_test_util:stop_process(riak_kv_vnode_master). - + check_mapcache(Index, QTerm, BKey, KeyData, Expect) -> map({Index,node()}, self(), QTerm, BKey, KeyData), @@ -600,12 +637,12 @@ check_mapcache(Index, QTerm, BKey, KeyData, Expect) -> 100 -> ?assert(false) end. - -%% Map identity function - returns what you give it + +%% Map identity function - returns what you give it map_test(Obj, _KeyData, _Arg) -> Obj. -flush_msgs() -> +flush_msgs() -> receive _Msg -> flush_msgs() @@ -613,7 +650,7 @@ flush_msgs() -> 0 -> ok end. - - + + -endif. % TEST From 654d7ac92cbdddedc0e8dce825756e931cd25b4e Mon Sep 17 00:00:00 2001 From: Rusty Klophaus Date: Fri, 13 Aug 2010 09:29:07 -0400 Subject: [PATCH 31/72] Update mapred to accept more generalized inputs (currently in modfun format). --- src/riak_client.erl | 17 +++++++++++++++-- src/riak_kv_js_vm.erl | 3 ++- src/riak_kv_wm_mapred.erl | 29 +++++++++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/riak_client.erl b/src/riak_client.erl index a564964d3c..f61ea4aba9 100644 --- a/src/riak_client.erl +++ b/src/riak_client.erl @@ -29,6 +29,7 @@ -export([mapred_stream/2,mapred_stream/3,mapred_stream/4]). -export([mapred_bucket/2,mapred_bucket/3,mapred_bucket/4]). -export([mapred_bucket_stream/3,mapred_bucket_stream/4,mapred_bucket_stream/6]). +-export([mapred_dynamic_inputs_stream/3]). -export([get/2, get/3,get/4]). -export([put/1, put/2,put/3,put/4,put/5]). -export([delete/2,delete/3,delete/4]). @@ -163,9 +164,21 @@ mapred_bucket(Bucket, Query, ResultTransformer, Timeout, ErrorTolerance) -> ResultTransformer, Timeout, ErrorTolerance), luke_flow:collect_output(MR_ReqId, Timeout). -%% -%% +-define(PRINT(Var), io:format("DEBUG: ~p:~p - ~p~n~n ~p~n~n", [?MODULE, ?LINE, ??Var, Var])). + +%% An InputDef defines a Module and Function to call to generate +%% inputs for a map/reduce job. Should return {ok, +%% LukeReqID}. Ideally, we'd combine both the other input types (BKeys +%% and Bucket) into this approach, but postponing until after a code +%% review of Map/Reduce. +mapred_dynamic_inputs_stream(FSMPid, InputDef, Timeout) -> + case InputDef of + {modfun, Mod, Fun, Options} -> + Mod:Fun(FSMPid, Options, Timeout); + _ -> + throw({invalid_inputdef, InputDef}) + end. %% @spec get(riak_object:bucket(), riak_object:key()) -> %% {ok, riak_object:riak_object()} | diff --git a/src/riak_kv_js_vm.erl b/src/riak_kv_js_vm.erl index cf15e14196..677fda5e73 100644 --- a/src/riak_kv_js_vm.erl +++ b/src/riak_kv_js_vm.erl @@ -90,7 +90,8 @@ handle_call({dispatch, _JobId, {{jsfun, JS}, Obj}}, _From, #state{ctx=Ctx}=State Reply = invoke_js(Ctx, JS, [riak_object:to_json(Obj)]), riak_kv_js_manager:mark_idle(), {reply, Reply, State}; -handle_call(_Request, _From, State) -> +handle_call(Request, _From, State) -> + io:format("Request: ~p~n", [Request]), {reply, ignore, State}. handle_cast(reload, #state{ctx=Ctx}=State) -> diff --git a/src/riak_kv_wm_mapred.erl b/src/riak_kv_wm_mapred.erl index 5c462168db..986e153c81 100644 --- a/src/riak_kv_wm_mapred.erl +++ b/src/riak_kv_wm_mapred.erl @@ -76,19 +76,22 @@ nop(RD, State) -> process_post(RD, #state{inputs=Inputs, mrquery=Query, timeout=Timeout}=State) -> Me = self(), {ok, Client} = riak:local_client(), + ResultTransformer = fun riak_kv_mapred_json:jsonify_not_found/1, case wrq:get_qs_value("chunked", RD) of "true" -> {ok, ReqId} = if is_list(Inputs) -> - {ok, {RId, FSM}} = Client:mapred_stream(Query, Me, - fun riak_kv_mapred_json:jsonify_not_found/1, - Timeout), + {ok, {RId, FSM}} = Client:mapred_stream(Query, Me, ResultTransformer, Timeout), luke_flow:add_inputs(FSM, Inputs), luke_flow:finish_inputs(FSM), {ok, RId}; is_binary(Inputs) -> - Client:mapred_bucket_stream(Inputs, Query, Me, - Timeout) + Client:mapred_bucket_stream(Inputs, Query, Me, Timeout); + is_tuple(Inputs) -> + {ok, {RId, FSM}} = Client:mapred_stream(Query, Me, ResultTransformer, Timeout), + Client:mapred_dynamic_inputs_stream(FSM, Inputs, Timeout), + luke_flow:finish_inputs(FSM), + {ok, RId} end, Boundary = riak_core_util:unique_id_62(), RD1 = wrq:set_resp_header("Content-Type", "multipart/mixed;boundary=" ++ Boundary, RD), @@ -97,12 +100,18 @@ process_post(RD, #state{inputs=Inputs, mrquery=Query, timeout=Timeout}=State) -> Param when Param =:= "false"; Param =:= undefined -> Results = if is_list(Inputs) -> - Client:mapred(Inputs, Query, - fun riak_kv_mapred_json:jsonify_not_found/1, - Timeout); + Client:mapred(Inputs, Query, ResultTransformer, Timeout); is_binary(Inputs) -> - Client:mapred_bucket(Inputs, Query, fun riak_kv_mapred_json:jsonify_not_found/1, - Timeout) + Client:mapred_bucket(Inputs, Query, ResultTransformer, Timeout); + is_tuple(Inputs) -> + case Client:mapred_stream(Query,Me,ResultTransformer,Timeout) of + {ok, {ReqId, FlowPid}} -> + Client:mapred_dynamic_inputs_stream(FlowPid, Inputs, Timeout), + luke_flow:finish_inputs(FlowPid), + luke_flow:collect_output(ReqId, Timeout); + Error -> + Error + end end, RD1 = wrq:set_resp_header("Content-Type", "application/json", RD), case Results of From 6963cd2610f1807754a831779d7f3acddcb603be Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Fri, 13 Aug 2010 15:08:17 -0600 Subject: [PATCH 32/72] Added a clause to submit a named/anonymous javascript function with a list of arguments. --- src/riak_kv_js_vm.erl | 57 +++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/src/riak_kv_js_vm.erl b/src/riak_kv_js_vm.erl index 677fda5e73..8f751819fb 100644 --- a/src/riak_kv_js_vm.erl +++ b/src/riak_kv_js_vm.erl @@ -66,18 +66,8 @@ init([Manager]) -> end. %% Reduce phase with anonymous function -handle_call({dispatch, _JobId, {{jsanon, JS}, Reduced, Arg}}, _From, #state{ctx=Ctx}=State) -> - {Reply, UpdatedState} = case define_anon_js(JS, State) of - {ok, FunName, NewState} -> - case invoke_js(Ctx, FunName, [Reduced, Arg]) of - {ok, R} -> - {{ok, R}, NewState}; - Error -> - {Error, State} - end; - {Error, undefined, NewState} -> - {Error, NewState} - end, +handle_call({dispatch, _JobId, {{jsanon, JS}, Reduced, Arg}}, _From, State) -> + {Reply, UpdatedState} = define_invoke_anon_js(JS, [Reduced, Arg], State), riak_kv_js_manager:mark_idle(), {reply, Reply, UpdatedState}; %% Reduce phase with named function @@ -85,6 +75,18 @@ handle_call({dispatch, _JobId, {{jsfun, JS}, Reduced, Arg}}, _From, #state{ctx=C Reply = invoke_js(Ctx, JS, [Reduced, Arg]), riak_kv_js_manager:mark_idle(), {reply, Reply, State}; +%% General dispatch function for anonymous function with variable number of arguments +handle_call({dispatch, _JobId, {{jsanon, Source}, Args}}, _From, + State) when is_list(Args) -> + {Reply, UpdatedState} = define_invoke_anon_js(Source, Args, State), + riak_kv_js_manager:mark_idle(), + {reply, Reply, UpdatedState}; +%% General dispatch function for named function with variable number of arguments +handle_call({dispatch, _JobId, {{jsfun, JS}, Args}}, _From, + #state{ctx=Ctx}=State) when is_list(Args) -> + Reply = invoke_js(Ctx, JS, Args), + riak_kv_js_manager:mark_idle(), + {reply, Reply, State}; %% Pre-commit hook with named function handle_call({dispatch, _JobId, {{jsfun, JS}, Obj}}, _From, #state{ctx=Ctx}=State) -> Reply = invoke_js(Ctx, JS, [riak_object:to_json(Obj)]), @@ -102,20 +104,10 @@ handle_cast(reload, #state{ctx=Ctx}=State) -> %% Map phase with anonymous function handle_cast({dispatch, _Requestor, JobId, {Sender, {map, {jsanon, JS}, Arg, _Acc}, Value, - KeyData, _BKey}}, #state{ctx=Ctx}=State) -> - {Result, UpdatedState} = case define_anon_js(JS, State) of - {ok, FunName, NewState} -> - JsonValue = riak_object:to_json(Value), - JsonArg = jsonify_arg(Arg), - case invoke_js(Ctx, FunName, [JsonValue, KeyData, JsonArg]) of - {ok, R} -> - {{ok, R}, NewState}; - Error -> - {Error, State} - end; - {Error, undefined, NewState} -> - {Error, NewState} - end, + KeyData, _BKey}}, State) -> + JsonValue = riak_object:to_json(Value), + JsonArg = jsonify_arg(Arg), + {Result, UpdatedState} = define_invoke_anon_js(JS, [JsonValue, KeyData, JsonArg], State), FinalState = case Result of {ok, ReturnValue} -> riak_core_vnode:send_command(Sender, {mapexec_reply, JobId, ReturnValue}), @@ -160,6 +152,19 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Internal functions +define_invoke_anon_js(JS, Args, #state{ctx=Ctx}=State) -> + case define_anon_js(JS, State) of + {ok, FunName, NewState} -> + case invoke_js(Ctx, FunName, Args) of + {ok, R} -> + {{ok, R}, NewState}; + Error -> + {Error, State} + end; + {Error, undefined, NewState} -> + {Error, NewState} + end. + invoke_js(Ctx, Js, Args) -> try case js:call(Ctx, Js, Args) of From 287e48706653791a5d52f90e6e172006eab2e95c Mon Sep 17 00:00:00 2001 From: Rusty Klophaus Date: Sat, 14 Aug 2010 12:54:27 -0400 Subject: [PATCH 33/72] Update format of mapred input. --- src/riak_kv_mapred_json.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/riak_kv_mapred_json.erl b/src/riak_kv_mapred_json.erl index 2928c0fa49..d961d06430 100644 --- a/src/riak_kv_mapred_json.erl +++ b/src/riak_kv_mapred_json.erl @@ -66,6 +66,8 @@ parse_request(Req) -> {error, not_json} end. +parse_inputs([<<"modfun">>,Mod, Fun, Options]) -> + {ok, {modfun, binary_to_atom(Mod, utf8), binary_to_atom(Fun, utf8), Options}}; parse_inputs(Bucket) when is_binary(Bucket) -> {ok, Bucket}; parse_inputs(Targets) when is_list(Targets) -> From 863869a3bd1f070f0312ca0b5a6872cb7f037aca Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 17 Aug 2010 18:15:11 -0400 Subject: [PATCH 34/72] Turning on bitcask key list snapshots --- rebar.config | 2 +- src/riak_kv_bitcask_backend.erl | 5 +++++ src/riak_kv_vnode.erl | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 955cf7a083..d6e0f8284e 100644 --- a/rebar.config +++ b/rebar.config @@ -11,7 +11,7 @@ "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, - {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", + {bitcask, "1.0.3", {hg, "http://bitbucket.org/kevsmith/bitcask-fk", "tip"}}, {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", "ebloom-1.0.1"}} diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 448d187a60..2df25d86f4 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -35,6 +35,7 @@ fold/3, drop/1, is_empty/1, + snapshot_keys/2, callback/3]). @@ -200,6 +201,10 @@ schedule_sync(Ref, SyncIntervalMs) when is_reference(Ref) -> schedule_merge(Ref) when is_reference(Ref) -> riak_kv_backend:callback_after(?MERGE_CHECK_INTERVAL, Ref, merge_check). +snapshot_keys(Ref, FilterFun) when is_reference(Ref), + is_fun(FilterFun) -> + bitcask_snapshot:snapshot_keys(Ref, FilterFun). + %% =================================================================== %% EUnit tests diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 1a00af4897..90f1c0cd9c 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -381,6 +381,14 @@ do_list_bucket(ReqID,Bucket,Mod,ModState,Idx,State) -> RetVal = Mod:list_bucket(ModState,Bucket), {reply, {kl, RetVal, Idx, ReqID}, State}. +do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) when Mod =:= riak_kv_bitcask_backend -> + F = fun(BKey) -> + {B,K} = binary_to_term(BKey), + B =:= Bucket end, + Snapshot = Mod:snapshot_keys(ModState, F), + Snapshot1 = bitcask_snapshot:open_snapshot(ModState, Snapshot), + spawn(fun() -> stream_keys(Snapshot1, Caller, ReqId, Idx) end), + ok; do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> F = fun(BKey, _, Acc) -> process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, @@ -392,6 +400,20 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> end, Caller ! {ReqId, Idx, done}. +stream_keys(Snapshot, Caller, ReqId, Idx) -> + try + case bitcask_snapshot:read_snapshot_records(Snapshot) of + {ok, R0} -> + R = [binary_to_term(Rec) || Rec <- R0], + Caller ! {ReqId, {kl, Idx, R}}, + stream_keys(Snapshot, Caller, ReqId, Idx); + eof -> + Caller ! {ReqId, Idx, done} + end + after + bitcask_snapshot:close_snapshot(Snapshot) + end. + process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], case length(Acc) >= 100 of From a046f49229c2449986bbfbe04a852bae844d40a3 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 18 Aug 2010 11:29:09 -0400 Subject: [PATCH 35/72] Fixing compile error --- src/riak_kv_bitcask_backend.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 2df25d86f4..20fb114833 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -202,7 +202,7 @@ schedule_merge(Ref) when is_reference(Ref) -> riak_kv_backend:callback_after(?MERGE_CHECK_INTERVAL, Ref, merge_check). snapshot_keys(Ref, FilterFun) when is_reference(Ref), - is_fun(FilterFun) -> + is_function(FilterFun) -> bitcask_snapshot:snapshot_keys(Ref, FilterFun). From 3136fdb494d8b2cf9e25341a55e554751c1d93d1 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 18 Aug 2010 15:59:17 -0400 Subject: [PATCH 36/72] Reverting key snapshotting --- src/riak_kv_vnode.erl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 90f1c0cd9c..470964f721 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -381,14 +381,6 @@ do_list_bucket(ReqID,Bucket,Mod,ModState,Idx,State) -> RetVal = Mod:list_bucket(ModState,Bucket), {reply, {kl, RetVal, Idx, ReqID}, State}. -do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) when Mod =:= riak_kv_bitcask_backend -> - F = fun(BKey) -> - {B,K} = binary_to_term(BKey), - B =:= Bucket end, - Snapshot = Mod:snapshot_keys(ModState, F), - Snapshot1 = bitcask_snapshot:open_snapshot(ModState, Snapshot), - spawn(fun() -> stream_keys(Snapshot1, Caller, ReqId, Idx) end), - ok; do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> F = fun(BKey, _, Acc) -> process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, From 8c31ee249af8521c34481b96e98bf311a58aaacb Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 18 Aug 2010 16:04:20 -0400 Subject: [PATCH 37/72] Reverting --- src/riak_kv_bitcask_backend.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 448d187a60..ac7c8067a9 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -200,6 +200,8 @@ schedule_sync(Ref, SyncIntervalMs) when is_reference(Ref) -> schedule_merge(Ref) when is_reference(Ref) -> riak_kv_backend:callback_after(?MERGE_CHECK_INTERVAL, Ref, merge_check). +snapshot_keys({Ref, _}, FilterFun) when is_reference(Ref) -> + bitcask_snapshot:snapshot_keys(Ref, FilterFun). %% =================================================================== %% EUnit tests From 6eab98fecd48c1e4f7687d6bf3d425b9974456ce Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 19 Aug 2010 07:48:30 -0400 Subject: [PATCH 38/72] Removing snapshot keys --- src/riak_kv_bitcask_backend.erl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index b6d8a4399e..60aa9e8682 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -200,10 +200,6 @@ schedule_sync(Ref, SyncIntervalMs) when is_reference(Ref) -> schedule_merge(Ref) when is_reference(Ref) -> riak_kv_backend:callback_after(?MERGE_CHECK_INTERVAL, Ref, merge_check). -snapshot_keys({Ref, _}, FilterFun) when is_reference(Ref) -> - bitcask_snapshot:snapshot_keys(Ref, FilterFun). - - %% =================================================================== %% EUnit tests %% =================================================================== From 0462fc0513c9c4ab7915a11eba64d4c33c91653f Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 19 Aug 2010 08:04:01 -0400 Subject: [PATCH 39/72] Removing stream keys for now --- src/riak_kv_vnode.erl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 470964f721..244f0f26d9 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -392,19 +392,19 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> end, Caller ! {ReqId, Idx, done}. -stream_keys(Snapshot, Caller, ReqId, Idx) -> - try - case bitcask_snapshot:read_snapshot_records(Snapshot) of - {ok, R0} -> - R = [binary_to_term(Rec) || Rec <- R0], - Caller ! {ReqId, {kl, Idx, R}}, - stream_keys(Snapshot, Caller, ReqId, Idx); - eof -> - Caller ! {ReqId, Idx, done} - end - after - bitcask_snapshot:close_snapshot(Snapshot) - end. +%% stream_keys(Snapshot, Caller, ReqId, Idx) -> +%% try +%% case bitcask_snapshot:read_snapshot_records(Snapshot) of +%% {ok, R0} -> +%% R = [binary_to_term(Rec) || Rec <- R0], +%% Caller ! {ReqId, {kl, Idx, R}}, +%% stream_keys(Snapshot, Caller, ReqId, Idx); +%% eof -> +%% Caller ! {ReqId, Idx, done} +%% end +%% after +%% bitcask_snapshot:close_snapshot(Snapshot) +%% end. process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], From 6d2ced2ad2451b583a4bee3155823e3b37e65972 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 19 Aug 2010 14:50:08 -0400 Subject: [PATCH 40/72] Using new fold over in-memory key list for bitcask --- src/lk.erl | 2 ++ src/riak_kv_bitcask_backend.erl | 8 ++++++++ src/riak_kv_vnode.erl | 36 +++++++++++++++++++++------------ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/lk.erl b/src/lk.erl index 691f9d0820..1541e66b5f 100644 --- a/src/lk.erl +++ b/src/lk.erl @@ -7,6 +7,7 @@ fsm(Bucket) -> Start = erlang:now(), riak_kv_keys_fsm:start(ReqId, Bucket, 60000, plain, 0.0001, self()), {ok, Count} = gather_fsm_results(ReqId, 0), + io:format("~n"), End = erlang:now(), Ms = erlang:round(timer:now_diff(End, Start) / 1000), io:format("Found ~p keys in ~pms.~n", [Count, Ms]). @@ -29,6 +30,7 @@ pn(Bucket) -> gather_fsm_results(ReqId, Count) -> receive {ReqId, {keys, Keys}} -> + io:format("."), gather_fsm_results(ReqId, Count + length(Keys)); {ReqId, done} -> {ok, Count} diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 60aa9e8682..358579004e 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -33,6 +33,7 @@ list/1, list_bucket/2, fold/3, + fold_keys/3, drop/1, is_empty/1, callback/3]). @@ -42,6 +43,8 @@ -include_lib("eunit/include/eunit.hrl"). -endif. +-include_lib("bitcask/include/bitcask.hrl"). + -define(MERGE_CHECK_INTERVAL, timer:minutes(3)). start(Partition, _Config) -> @@ -135,6 +138,11 @@ fold({Ref, _}, Fun0, Acc0) -> end, Acc0). +fold_keys({Ref, _}, Fun, Acc) -> + F = fun(#bitcask_entry{key=K}, Acc1) -> + Fun(binary_to_term(K), Acc1) end, + bitcask:fold_keys(Ref, F, Acc). + drop({Ref, BitcaskRoot}) -> %% todo: once bitcask has a more friendly drop function %% of its own, use that instead. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 244f0f26d9..817f958c44 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -381,6 +381,16 @@ do_list_bucket(ReqID,Bucket,Mod,ModState,Idx,State) -> RetVal = Mod:list_bucket(ModState,Bucket), {reply, {kl, RetVal, Idx, ReqID}, State}. +do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) when Mod =:= riak_kv_bitcask_backend -> + F = fun(BKey, Acc) -> + process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, + case Mod:fold_keys(ModState, F, []) of + [] -> + ok; + Remainder -> + Caller ! {ReqId, {kl, Idx, Remainder}} + end, + Caller ! {ReqId, Idx, done}; do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> F = fun(BKey, _, Acc) -> process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, @@ -392,19 +402,19 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> end, Caller ! {ReqId, Idx, done}. -%% stream_keys(Snapshot, Caller, ReqId, Idx) -> -%% try -%% case bitcask_snapshot:read_snapshot_records(Snapshot) of -%% {ok, R0} -> -%% R = [binary_to_term(Rec) || Rec <- R0], -%% Caller ! {ReqId, {kl, Idx, R}}, -%% stream_keys(Snapshot, Caller, ReqId, Idx); -%% eof -> -%% Caller ! {ReqId, Idx, done} -%% end -%% after -%% bitcask_snapshot:close_snapshot(Snapshot) -%% end. +stream_keys(Snapshot, Caller, ReqId, Idx) -> + try + case bitcask_snapshot:read_snapshot_records(Snapshot) of + {ok, R0} -> + R = [binary_to_term(Rec) || Rec <- R0], + Caller ! {ReqId, {kl, Idx, R}}, + stream_keys(Snapshot, Caller, ReqId, Idx); + eof -> + Caller ! {ReqId, Idx, done} + end + after + bitcask_snapshot:close_snapshot(Snapshot) + end. process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], From da28c1237125195110a6df5e16bdf2f0b8001b1e Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 19 Aug 2010 15:47:00 -0400 Subject: [PATCH 41/72] Making key listers anonymous processes --- src/riak_kv_keylister.erl | 2 +- src/riak_kv_keys_fsm.erl | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/riak_kv_keylister.erl b/src/riak_kv_keylister.erl index 7ba93d9c28..30b6498598 100644 --- a/src/riak_kv_keylister.erl +++ b/src/riak_kv_keylister.erl @@ -22,7 +22,7 @@ list_keys(ListerPid, VNode) -> gen_fsm:send_event(ListerPid, {lk, VNode}). start_link(ReqId, Caller, Bucket) -> - gen_fsm:start_link({local, ?MODULE}, ?MODULE, [ReqId, Caller, Bucket], []). + gen_fsm:start_link(?MODULE, [ReqId, Caller, Bucket], []). init([ReqId, Caller, Bucket]) -> process_flag(trap_exit, true), diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index f638064c6d..cc2ea6c602 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -52,22 +52,23 @@ start(ReqId,Bucket,Timeout,ClientType,ErrorTolerance,From) -> init([ReqId,Bucket,Timeout,ClientType,ErrorTolerance,Client]) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), {ok, Bloom} = ebloom:new(10000000,ErrorTolerance,ReqId), - Listers = start_listers(ReqId, Bucket), StateData = #state{client=Client, client_type=ClientType, timeout=Timeout, - bloom=Bloom, req_id=ReqId, bucket=Bucket, ring=Ring, - listers=Listers}, + bloom=Bloom, req_id=ReqId, bucket=Bucket, ring=Ring}, {ok,initialize,StateData,0}. %% @private -initialize(timeout, StateData0=#state{bucket=Bucket, ring=Ring}) -> +initialize(timeout, StateData0=#state{bucket=Bucket, ring=Ring, req_id=ReqId}) -> BucketProps = riak_core_bucket:get_bucket(Bucket, Ring), N = proplists:get_value(n_val,BucketProps), PLS0 = riak_core_ring:all_preflists(Ring,N), - {LA1, LA2} = lists:partition(fun({A,_B}) -> A rem N == 0 end, - lists:zip(lists:seq(0,(length(PLS0)-1)), PLS0)), - {_, PLS} = lists:unzip(lists:append(LA1,LA2)), + {LA1, LA2} = lists:partition(fun({A,_B}) -> + A rem N == 0 orelse A rem (N + 1) == 0 + end, + lists:zip(lists:seq(0,(length(PLS0)-1)), PLS0)), + {_, PLS} = lists:unzip(LA1 ++ LA2), Simul_PLS = trunc(length(PLS) / N), - StateData = StateData0#state{pls=PLS,simul_pls=Simul_PLS, + Listers = start_listers(ReqId, Bucket), + StateData = StateData0#state{pls=PLS,simul_pls=Simul_PLS, listers=Listers, wait_pls=[],vns=sets:from_list([])}, reduce_pls(StateData). @@ -207,7 +208,7 @@ code_change(_OldVsn, StateName, State, _Extra) -> %% @private start_listers(ReqId, Bucket) -> - Nodes = [node()|nodes()], + Nodes = riak_core_node_watcher:nodes(riak_kv), start_listers(Nodes, ReqId, Bucket, []). start_listers([], _ReqId, _Bucket, Accum) -> From 4d39d1540901f459991834e187eabf80b5d25be7 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 19 Aug 2010 15:54:48 -0400 Subject: [PATCH 42/72] Disabling key streaming from disk --- src/riak_kv_vnode.erl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 817f958c44..0e1d6a294e 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -402,19 +402,19 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> end, Caller ! {ReqId, Idx, done}. -stream_keys(Snapshot, Caller, ReqId, Idx) -> - try - case bitcask_snapshot:read_snapshot_records(Snapshot) of - {ok, R0} -> - R = [binary_to_term(Rec) || Rec <- R0], - Caller ! {ReqId, {kl, Idx, R}}, - stream_keys(Snapshot, Caller, ReqId, Idx); - eof -> - Caller ! {ReqId, Idx, done} - end - after - bitcask_snapshot:close_snapshot(Snapshot) - end. +%% stream_keys(Snapshot, Caller, ReqId, Idx) -> +%% try +%% case bitcask_snapshot:read_snapshot_records(Snapshot) of +%% {ok, R0} -> +%% R = [binary_to_term(Rec) || Rec <- R0], +%% Caller ! {ReqId, {kl, Idx, R}}, +%% stream_keys(Snapshot, Caller, ReqId, Idx); +%% eof -> +%% Caller ! {ReqId, Idx, done} +%% end +%% after +%% bitcask_snapshot:close_snapshot(Snapshot) +%% end. process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], From b3b2c32ae207206285e12cd6c4c87b592d2a9c93 Mon Sep 17 00:00:00 2001 From: justin Date: Sun, 22 Aug 2010 14:39:26 -0400 Subject: [PATCH 43/72] use new bitcask fold_keys for bucket listing --- src/riak_kv_bitcask_backend.erl | 42 ++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 448d187a60..dcc4a53fb3 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -37,6 +37,7 @@ is_empty/1, callback/3]). +-include("deps/bitcask/include/bitcask.hrl"). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -115,15 +116,38 @@ list({Ref, _}) -> Other end. -list_bucket(State, {filter, Bucket, Fun}) -> - [K || {B, K} <- ?MODULE:list(State), - B =:= Bucket, - Fun(K)]; -list_bucket(State, '_') -> - [B || {B, _K} <- ?MODULE:list(State)]; -list_bucket(State, Bucket) -> - [K || {B, K} <- ?MODULE:list(State), B =:= Bucket]. - +list_bucket({Ref, _}, {filter, Bucket, Fun}) -> + bitcask:fold_keys(Ref, + fun(#bitcask_entry{key=BK},Acc) -> + {B,K} = binary_to_term(BK), + case B of + Bucket -> + case Fun(K) of + true -> [K|Acc]; + false -> Acc + end; + _ -> + Acc + end + end, []); +list_bucket({Ref, _}, '_') -> + bitcask:fold_keys(Ref, + fun(#bitcask_entry{key=BK},Acc) -> + {B,_K} = binary_to_term(BK), + case lists:member(B,Acc) of + true -> Acc; + false -> [B|Acc] + end + end, []); +list_bucket({Ref, _}, Bucket) -> + bitcask:fold_keys(Ref, + fun(#bitcask_entry{key=BK},Acc) -> + {B,K} = binary_to_term(BK), + case B of + Bucket -> [K|Acc]; + _ -> Acc + end + end, []). fold({Ref, _}, Fun0, Acc0) -> %% When folding across the bitcask, the bucket/key tuple must From 27bf81aec3e52f5ead2e6b8c1857177d41afe768 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Mon, 23 Aug 2010 11:23:39 -0400 Subject: [PATCH 44/72] Another round of merging/testing before landing in tip --- include/riak_kv_vnode.hrl | 5 +-- rebar.config | 2 +- src/riak_kv_bitcask_backend.erl | 2 - src/riak_kv_keylister.erl | 47 +++++++++++++++++++----- src/riak_kv_keylister_master.erl | 23 +++++++++++- src/riak_kv_keylister_sup.erl | 15 +++++--- src/riak_kv_keys_fsm.erl | 35 ++++++++++++------ src/riak_kv_vnode.erl | 63 ++++++++++++-------------------- 8 files changed, 118 insertions(+), 74 deletions(-) diff --git a/include/riak_kv_vnode.hrl b/include/riak_kv_vnode.hrl index f1f73a9d40..f08e71fd30 100644 --- a/include/riak_kv_vnode.hrl +++ b/include/riak_kv_vnode.hrl @@ -15,7 +15,7 @@ bucket :: binary(), req_id :: non_neg_integer()}). --record(riak_kv_listkeys2_req_v1, { +-record(riak_kv_listkeys_req_v2, { bucket :: binary(), req_id :: non_neg_integer(), caller :: pid()}). @@ -36,8 +36,7 @@ -define(KV_PUT_REQ, #riak_kv_put_req_v1). -define(KV_GET_REQ, #riak_kv_get_req_v1). --define(KV_LISTKEYS_REQ, #riak_kv_listkeys_req_v1). --define(KV_LISTKEYS2_REQ, #riak_kv_listkeys2_req_v1). +-define(KV_LISTKEYS_REQ, #riak_kv_listkeys_req_v2). -define(KV_DELETE_REQ, #riak_kv_delete_req_v1). -define(KV_MAP_REQ, #riak_kv_map_req_v1). -define(KV_VCLOCK_REQ, #riak_kv_vclock_req_v1). diff --git a/rebar.config b/rebar.config index 593297eaac..dca22116ca 100644 --- a/rebar.config +++ b/rebar.config @@ -11,7 +11,7 @@ "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, - {bitcask, "1.0.3", {hg, "http://bitbucket.org/kevsmith/bitcask-fk", + {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", "tip"}}, {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", "ebloom-1.0.1"}} diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 638c05eb3b..8165fecf45 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -38,8 +38,6 @@ is_empty/1, callback/3]). --include("deps/bitcask/include/bitcask.hrl"). - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. diff --git a/src/riak_kv_keylister.erl b/src/riak_kv_keylister.erl index 30b6498598..7ebcb9b9b9 100644 --- a/src/riak_kv_keylister.erl +++ b/src/riak_kv_keylister.erl @@ -1,3 +1,26 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_keylister: Manage streaming keys for a bucket from a +%% cluster node +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + -module(riak_kv_keylister). -behaviour(gen_fsm). @@ -25,20 +48,19 @@ start_link(ReqId, Caller, Bucket) -> gen_fsm:start_link(?MODULE, [ReqId, Caller, Bucket], []). init([ReqId, Caller, Bucket]) -> - process_flag(trap_exit, true), + erlang:monitor(process, Caller), {ok, Bloom} = ebloom:new(10000000, 0.0001, crypto:rand_uniform(1, 5000)), {ok, waiting, #state{reqid=ReqId, caller=Caller, bloom=Bloom, bucket=Bucket}}. waiting({lk, VNode}, #state{reqid=ReqId, bucket=Bucket}=State) -> - riak_kv_vnode:list_keys2(VNode, ReqId, self(), Bucket), + riak_kv_vnode:list_keys(VNode, ReqId, self(), Bucket), {next_state, waiting, State}. state_name(_Event, State) -> - {next_state, state_name, State}. + {next_state, waiting, State}. state_name(_Event, _From, State) -> - Reply = ok, - {reply, Reply, state_name, State}. + {reply, ignored, state_name, State}. handle_event(_Event, StateName, State) -> {next_state, StateName, State}. @@ -47,7 +69,7 @@ handle_sync_event(_Event, _From, StateName, State) -> {reply, ignored, StateName, State}. handle_info({ReqId, {kl, Idx, Keys0}}, waiting, #state{reqid=ReqId, bloom=Bloom, - caller=Caller}=State) -> + caller=Caller}=State) -> F = fun(Key, Acc) -> case ebloom:contains(Bloom, Key) of true -> @@ -56,16 +78,23 @@ handle_info({ReqId, {kl, Idx, Keys0}}, waiting, #state{reqid=ReqId, bloom=Bloom, ebloom:insert(Bloom, Key), [Key|Acc] end end, - Keys = lists:foldl(F, [], Keys0), - gen_fsm:send_event(Caller, {ReqId, {kl, Idx, Keys}}), + case lists:foldl(F, [], Keys0) of + [] -> + ok; + Keys -> + gen_fsm:send_event(Caller, {ReqId, {kl, Idx, Keys}}) + end, {next_state, waiting, State}; handle_info({ReqId, Idx, done}, waiting, #state{reqid=ReqId, caller=Caller}=State) -> gen_fsm:send_event(Caller, {ReqId, Idx, done}), {next_state, waiting, State}; +handle_info({'DOWN', _MRef, _Type, Caller, _Info}, waiting, #state{caller=Caller}=State) -> + {stop, normal, State}; handle_info(_Info, StateName, State) -> {next_state, StateName, State}. -terminate(_Reason, _StateName, _State) -> +terminate(_Reason, _StateName, #state{bloom=Bloom}) -> + ebloom:clear(Bloom), ok. code_change(_OldVsn, StateName, State, _Extra) -> diff --git a/src/riak_kv_keylister_master.erl b/src/riak_kv_keylister_master.erl index 7bd3ae9f4b..6f585f9947 100644 --- a/src/riak_kv_keylister_master.erl +++ b/src/riak_kv_keylister_master.erl @@ -1,3 +1,24 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_keylister_master: Starts keylister processes on demand +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- -module(riak_kv_keylister_master). -behaviour(gen_server). @@ -17,8 +38,6 @@ start_keylist(Node, ReqId, Bucket) -> case gen_server:call({?SERVER, Node}, {start_kl, ReqId, self(), Bucket}) of {ok, Pid} -> - %% Link processes so the keylister doesn't run forever - erlang:link(Pid), {ok, Pid}; Error -> Error diff --git a/src/riak_kv_keylister_sup.erl b/src/riak_kv_keylister_sup.erl index 9ea27f7c06..5089af11df 100644 --- a/src/riak_kv_keylister_sup.erl +++ b/src/riak_kv_keylister_sup.erl @@ -1,19 +1,24 @@ -%% Copyright (c) 2010 Basho Technologies, Inc. All Rights Reserved. - +%% ------------------------------------------------------------------- +%% +%% riak_kv_keylister_sup: Supervisor for starting keylister processes +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file %% except in compliance with the License. You may obtain %% a copy of the License at - +%% %% http://www.apache.org/licenses/LICENSE-2.0 - +%% %% Unless required by applicable law or agreed to in writing, %% software distributed under the License is distributed on an %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY %% KIND, either express or implied. See the License for the %% specific language governing permissions and limitations %% under the License. - +%% +%% ------------------------------------------------------------------- -module(riak_kv_keylister_sup). -behaviour(supervisor). diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index f118acf5eb..45e2c4e354 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -50,6 +50,7 @@ start(ReqId,Bucket,Timeout,ClientType,ErrorTolerance,From) -> %% @private init([ReqId,Bucket,Timeout,ClientType,ErrorTolerance,Client]) -> + process_flag(trap_exit, true), {ok, Ring} = riak_core_ring_manager:get_my_ring(), {ok, Bloom} = ebloom:new(10000000,ErrorTolerance,ReqId), StateData = #state{client=Client, client_type=ClientType, timeout=Timeout, @@ -132,17 +133,26 @@ reduce_pls(StateData0=#state{timeout=Timeout, wait_pls=WPL, reduce_pls(StateData0#state{pls=[RestPL|PLS]}); _ -> %% Look up keylister for that node - LPid = proplists:get_value(Node, Listers), - %% Send the keylist request to the lister - riak_kv_keylister:list_keys(LPid, {Idx, Node}), - %% riak_kv_vnode:list_keys({Idx,Node},Bucket,ReqId), - WaitPLS = [{Idx,Node,RestPL}|WPL], - StateData = StateData0#state{pls=PLS, wait_pls=WaitPLS}, - case length(WaitPLS) > Simul_PLS of - true -> - {next_state, waiting_kl, StateData, Timeout}; - false -> - reduce_pls(StateData) + case proplists:get_value(Node, Listers) of + undefined -> + %% Node is down or hasn't been removed from preflists yet + %% Log a warning, skip the node and continue sending + %% out key list requests + error_logger:warning_msg("Skipping keylist request for unknown node: ~p~n", [Node]), + WaitPLS = [{Idx,Node,RestPL}|WPL], + StateData = StateData0#state{pls=PLS, wait_pls=WaitPLS}, + reduce_pls(StateData); + LPid -> + %% Send the keylist request to the lister + riak_kv_keylister:list_keys(LPid, {Idx, Node}), + WaitPLS = [{Idx,Node,RestPL}|WPL], + StateData = StateData0#state{pls=PLS, wait_pls=WaitPLS}, + case length(WaitPLS) > Simul_PLS of + true -> + {next_state, waiting_kl, StateData, Timeout}; + false -> + reduce_pls(StateData) + end end end end. @@ -211,7 +221,8 @@ handle_info(_Info, _StateName, StateData) -> {stop,badmsg,StateData}. %% @private -terminate(Reason, _StateName, _State) -> +terminate(Reason, _StateName, #state{bloom=Bloom}) -> + ebloom:clear(Bloom), Reason. %% @private diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 40743ee395..af3adfdfe0 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -21,8 +21,7 @@ del/3, put/6, readrepair/6, - list_keys/3, - list_keys2/4, + list_keys/4, map/5, fold/3, get_vclocks/2, @@ -55,7 +54,7 @@ reqid :: term(), target :: pid()}). --record(state, {idx :: partition(), +-record(state, {idx :: partition(), mod :: module(), modstate :: term(), mapcache :: term(), @@ -103,22 +102,14 @@ put(Preflist, BKey, Obj, ReqId, StartTime, Options, Sender) readrepair(Preflist, BKey, Obj, ReqId, StartTime, Options) -> put(Preflist, BKey, Obj, ReqId, StartTime, Options, ignore). -list_keys(Preflist, Bucket, ReqId) -> - riak_core_vnode_master:command(Preflist, - ?KV_LISTKEYS_REQ{ - bucket=Bucket, - req_id=ReqId}, - {fsm, undefined, self()}, - riak_kv_vnode_master). - -list_keys2(Preflist, ReqId, Caller, Bucket) -> - riak_core_vnode_master:command(Preflist, - ?KV_LISTKEYS2_REQ{ - bucket=Bucket, - req_id=ReqId, - caller=Caller}, - ignore, - riak_kv_vnode_master). +list_keys(Preflist, ReqId, Caller, Bucket) -> + riak_core_vnode_master:command(Preflist, + ?KV_LISTKEYS_REQ{ + bucket=Bucket, + req_id=ReqId, + caller=Caller}, + ignore, + riak_kv_vnode_master). map(Preflist, ClientPid, QTerm, BKey, KeyData) -> riak_core_vnode_master:sync_spawn_command(Preflist, @@ -155,7 +146,7 @@ init([Index]) -> CacheSize = app_helper:get_env(riak_kv, vnode_cache_entries, 100), Configuration = app_helper:get_env(riak_kv), {ok, ModState} = Mod:start(Index, Configuration), - + {ok, #state{idx=Index, mod=Mod, modstate=ModState, mapcache=riak_kv_lru:new(CacheSize), mrjobs=dict:new()}}. handle_command(?KV_PUT_REQ{bkey=BKey, @@ -171,12 +162,12 @@ handle_command(?KV_PUT_REQ{bkey=BKey, handle_command(?KV_GET_REQ{bkey=BKey,req_id=ReqId},Sender,State) -> do_get(Sender, BKey, ReqId, State); -handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId}, _Sender, - State=#state{mod=Mod, modstate=ModState, idx=Idx}) -> +handle_command(#riak_kv_listkeys_req_v1{bucket=Bucket, req_id=ReqId}, _Sender, + State=#state{mod=Mod, modstate=ModState, idx=Idx}) -> do_list_bucket(ReqId,Bucket,Mod,ModState,Idx,State); -handle_command(?KV_LISTKEYS2_REQ{bucket=Bucket, req_id=ReqId, caller=Caller}, _Sender, +handle_command(?KV_LISTKEYS_REQ{bucket=Bucket, req_id=ReqId, caller=Caller}, _Sender, State=#state{mod=Mod, modstate=ModState, idx=Idx}) -> - do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState), + do_list_keys(Caller,ReqId,Bucket,Idx,Mod,ModState), {noreply, State}; handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=ReqId}, _Sender, @@ -409,7 +400,10 @@ do_list_bucket(ReqID,Bucket,Mod,ModState,Idx,State) -> RetVal = Mod:list_bucket(ModState,Bucket), {reply, {kl, RetVal, Idx, ReqID}, State}. -do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) when Mod =:= riak_kv_bitcask_backend -> +%% Use in-memory key list for bitcask backend +%% @private +do_list_keys(Caller,ReqId,Bucket,Idx,Mod,ModState) + when Mod =:= riak_kv_bitcask_backend -> F = fun(BKey, Acc) -> process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, case Mod:fold_keys(ModState, F, []) of @@ -419,7 +413,8 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) when Mod =:= riak_kv_bitcask Caller ! {ReqId, {kl, Idx, Remainder}} end, Caller ! {ReqId, Idx, done}; -do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> +%% @private +do_list_keys(Caller,ReqId,Bucket,Idx,Mod,ModState) -> F = fun(BKey, _, Acc) -> process_keys(Caller, ReqId, Idx, Bucket, BKey, Acc) end, case Mod:fold(ModState, F, []) of @@ -430,20 +425,7 @@ do_list_keys2(Caller,ReqId,Bucket,Idx,Mod,ModState) -> end, Caller ! {ReqId, Idx, done}. -%% stream_keys(Snapshot, Caller, ReqId, Idx) -> -%% try -%% case bitcask_snapshot:read_snapshot_records(Snapshot) of -%% {ok, R0} -> -%% R = [binary_to_term(Rec) || Rec <- R0], -%% Caller ! {ReqId, {kl, Idx, R}}, -%% stream_keys(Snapshot, Caller, ReqId, Idx); -%% eof -> -%% Caller ! {ReqId, Idx, done} -%% end -%% after -%% bitcask_snapshot:close_snapshot(Snapshot) -%% end. - +%% @private process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], case length(Acc) >= 100 of @@ -455,6 +437,7 @@ process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> end; process_keys(_Caller, _ReqId, _Idx, _Bucket, {_B, _K}, Acc) -> Acc. + %% @private do_fold(Fun, Acc0, _State=#state{mod=Mod, modstate=ModState}) -> Mod:fold(ModState, Fun, Acc0). From 696552cb7e140c704c1b5fa62ae40a751bbc4ddd Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 24 Aug 2010 18:32:38 -0400 Subject: [PATCH 45/72] support list_buckets in the new list-keys-by-folding structure (bz://658) the "list_buckets" function in riak_client exploits the fact that all bucket names are binaries, by passing the atom '_' to list_keys to signal that buckets, and not keys, should be listed --- src/riak_kv_vnode.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index af3adfdfe0..b8d72216e0 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -426,6 +426,16 @@ do_list_keys(Caller,ReqId,Bucket,Idx,Mod,ModState) -> Caller ! {ReqId, Idx, done}. %% @private +process_keys(Caller, ReqId, Idx, '_', {Bucket, _K}, Acc0) -> + %% Bucket='_' means "list buckets" instead of "list keys" + Acc = [Bucket|Acc0], + case length(Acc) >= 100 of + true -> + Caller ! {ReqId, {kl, Idx, Acc}}, + []; + false -> + Acc + end; process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], case length(Acc) >= 100 of From 0927bd17e03b135c75713f1d7f2aef9d2593465e Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 24 Aug 2010 18:47:36 -0400 Subject: [PATCH 46/72] support filter_keys in the new list-keys-by-folding structure (related to bz://658) much like list_buckets, filter_keys exploits the fact that bucket names are always binaries, by passing the tuple {filter, Bucket, Fun} to list_keys, which signals that only keys in the given Bucket, for which Fun(Key) returns true, should be included in the result --- src/riak_kv_vnode.erl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index b8d72216e0..af20eb7ea1 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -436,6 +436,22 @@ process_keys(Caller, ReqId, Idx, '_', {Bucket, _K}, Acc0) -> false -> Acc end; +process_keys(Caller, ReqId, Idx, {filter, Bucket, Fun}, {Bucket, K}, Acc0) -> + %% Bucket={filter,Bucket,Fun} means "only include keys + %% in Bucket that make Fun(K) return 'true'" + case Fun(K) of + true -> + Acc = [K|Acc0], + case length(Acc) >= 100 of + true -> + Caller ! {ReqId, {kl, Idx, Acc}}, + []; + false -> + Acc + end; + false -> + Acc0 + end; process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> Acc = [K|Acc0], case length(Acc) >= 100 of From 0b5f920d515f141a42f171e3e302d0cae9c6b9b2 Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 24 Aug 2010 18:52:05 -0400 Subject: [PATCH 47/72] factor out the shared key-list buffering code in process_keys/6 --- src/riak_kv_vnode.erl | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index af20eb7ea1..7935529364 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -426,43 +426,31 @@ do_list_keys(Caller,ReqId,Bucket,Idx,Mod,ModState) -> Caller ! {ReqId, Idx, done}. %% @private -process_keys(Caller, ReqId, Idx, '_', {Bucket, _K}, Acc0) -> +process_keys(Caller, ReqId, Idx, '_', {Bucket, _K}, Acc) -> %% Bucket='_' means "list buckets" instead of "list keys" - Acc = [Bucket|Acc0], - case length(Acc) >= 100 of - true -> - Caller ! {ReqId, {kl, Idx, Acc}}, - []; - false -> - Acc - end; -process_keys(Caller, ReqId, Idx, {filter, Bucket, Fun}, {Bucket, K}, Acc0) -> + buffer_key_result(Caller, ReqId, Idx, [Bucket|Acc]); +process_keys(Caller, ReqId, Idx, {filter, Bucket, Fun}, {Bucket, K}, Acc) -> %% Bucket={filter,Bucket,Fun} means "only include keys %% in Bucket that make Fun(K) return 'true'" case Fun(K) of true -> - Acc = [K|Acc0], - case length(Acc) >= 100 of - true -> - Caller ! {ReqId, {kl, Idx, Acc}}, - []; - false -> - Acc - end; + buffer_key_result(Caller, ReqId, Idx, [K|Acc]); false -> - Acc0 + Acc end; -process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc0) -> - Acc = [K|Acc0], +process_keys(Caller, ReqId, Idx, Bucket, {Bucket, K}, Acc) -> + buffer_key_result(Caller, ReqId, Idx, [K|Acc]); +process_keys(_Caller, _ReqId, _Idx, _Bucket, {_B, _K}, Acc) -> + Acc. + +buffer_key_result(Caller, ReqId, Idx, Acc) -> case length(Acc) >= 100 of true -> Caller ! {ReqId, {kl, Idx, Acc}}, []; false -> Acc - end; -process_keys(_Caller, _ReqId, _Idx, _Bucket, {_B, _K}, Acc) -> - Acc. + end. %% @private do_fold(Fun, Acc0, _State=#state{mod=Mod, modstate=ModState}) -> From 82519a24a0bd48ff9717338c2454527f58ef7c3a Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 24 Aug 2010 20:06:53 -0400 Subject: [PATCH 48/72] tests for bz://658 makes sure that list_keys returns expected results for bucket='_' and bucket={filter,Bucket,Fun} --- src/riak_kv_vnode.erl | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 7935529364..2bbffe562a 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -570,6 +570,90 @@ dummy_backend() -> application:set_env(riak_kv, storage_backend, riak_kv_ets_backend), application:set_env(riak_core, default_bucket_props, []). +backend_with_known_key() -> + dummy_backend(), + {ok, S1} = init([0]), + B = <<"f">>, + K = <<"b">>, + O = riak_object:new(B, K, <<"z">>), + {noreply, S2} = handle_command(?KV_PUT_REQ{bkey={B,K}, + object=O, + req_id=123, + start_time=riak_core_util:moment(), + options=[]}, + {raw, 456, self()}, + S1), + {S2, B, K}. + +list_buckets_test() -> + {S, B, _K} = backend_with_known_key(), + Caller = new_result_listener(), + handle_command(?KV_LISTKEYS_REQ{bucket='_', + req_id=124, + caller=Caller}, + {raw, 456, self()}, S), + ?assertEqual({ok, [B]}, results_from_listener(Caller)), + flush_msgs(). + +filter_keys_test() -> + {S, B, K} = backend_with_known_key(), + + Caller1 = new_result_listener(), + handle_command(?KV_LISTKEYS_REQ{ + bucket={filter,B,fun(_) -> true end}, + req_id=124, + caller=Caller1}, + {raw, 456, self()}, S), + ?assertEqual({ok, [K]}, results_from_listener(Caller1)), + + Caller2 = new_result_listener(), + handle_command(?KV_LISTKEYS_REQ{ + bucket={filter,B,fun(_) -> false end}, + req_id=125, + caller=Caller2}, + {raw, 456, self()}, S), + ?assertEqual({ok, []}, results_from_listener(Caller2)), + + Caller3 = new_result_listener(), + handle_command(?KV_LISTKEYS_REQ{ + bucket={filter,<<"g">>,fun(_) -> true end}, + req_id=126, + caller=Caller3}, + {raw, 456, self()}, S), + ?assertEqual({ok, []}, results_from_listener(Caller3)), + + flush_msgs(). + +new_result_listener() -> + spawn(fun result_listener/0). + +result_listener() -> + result_listener_keys([]). + +result_listener_keys(Acc) -> + receive + {_,{kl,_,Keys}} -> + result_listener_keys(Keys++Acc); + {_, _, done} -> + result_listener_done(Acc) + after 5000 -> + result_listener_done({timeout, Acc}) + end. + +result_listener_done(Result) -> + receive + {get_results, Pid} -> + Pid ! {listener_results, Result} + end. + +results_from_listener(Listener) -> + Listener ! {get_results, self()}, + receive + {listener_results, Result} -> + {ok, Result} + after 5000 -> + {error, listener_timeout} + end. %% Make sure the mapcache gets cleared when the bkey is updated mapcache_put_test() -> From 00126314c9c2fa72cec253523b526335d440fc30 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 25 Aug 2010 10:45:12 -0400 Subject: [PATCH 49/72] Fixed borked unit tests in kv_vnode and legacy_vnode --- src/riak_kv_legacy_vnode.erl | 4 ++-- src/riak_kv_vnode.erl | 23 ++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/riak_kv_legacy_vnode.erl b/src/riak_kv_legacy_vnode.erl index 126d46dec5..0120761b93 100644 --- a/src/riak_kv_legacy_vnode.erl +++ b/src/riak_kv_legacy_vnode.erl @@ -71,8 +71,8 @@ rewrite_cast({vnode_get, {Partition,_Node}, rewrite_cast({vnode_list_bucket, {Partition,_Node}, {FSM_pid, Bucket, ReqID}}) -> Req = riak_core_vnode_master:make_request( - ?KV_LISTKEYS_REQ{ - bucket=Bucket, + #riak_kv_listkeys_req_v1{ + bucket=Bucket, req_id=ReqID}, {fsm, undefined, FSM_pid}, Partition), diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index af3adfdfe0..0e39e3f722 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -563,10 +563,10 @@ mapcache_put_test() -> BKey = {<<"b">>,<<"k">>}, CacheKey = {mod,func,arg,keydata}, {ok, S1} = init([0]), - ?assertEqual(not_cached, cache_fetch(BKey, CacheKey, S1#state.mapcache)), + ?assertEqual(notfound, riak_kv_lru:fetch(S1#state.mapcache, BKey, CacheKey)), {noreply, S2} = handle_command({mapcache, BKey, CacheKey, result}, noreply, S1), - ?assertEqual(result, cache_fetch(BKey, CacheKey, S2#state.mapcache)), + ?assertEqual(result, riak_kv_lru:fetch(S2#state.mapcache, BKey, CacheKey)), O = riak_object:new(<<"b">>,<<"k">>,<<"v">>), {noreply, S3} = handle_command(?KV_PUT_REQ{bkey=BKey, @@ -575,7 +575,7 @@ mapcache_put_test() -> start_time=riak_core_util:moment(), options=[]}, {raw, 456, self()}, S2), - ?assertEqual(not_cached, cache_fetch(BKey, CacheKey, S3#state.mapcache)), + ?assertEqual(notfound, riak_kv_lru:fetch(S3#state.mapcache, BKey, CacheKey)), %% The put request generates a {w,...} and {dw,...} event flush_msgs(). @@ -585,15 +585,15 @@ mapcache_delete_test() -> BKey = {<<"b">>,<<"k">>}, CacheKey = {mod,func,arg,keydata}, {ok, S1} = init([0]), - ?assertEqual(not_cached, cache_fetch(BKey, CacheKey, S1#state.mapcache)), + ?assertEqual(notfound, riak_kv_lru:fetch(S1#state.mapcache, BKey, CacheKey)), {noreply, S2} = handle_command({mapcache, BKey, CacheKey, result}, noreply, S1), - ?assertEqual(result, cache_fetch(BKey, CacheKey, S2#state.mapcache)), + ?assertEqual(result, riak_kv_lru:fetch(S2#state.mapcache, BKey, CacheKey)), {reply, {del, 0, 123}, S3} = handle_command(?KV_DELETE_REQ{bkey=BKey, req_id=123}, {raw, 456, self()}, S2), - ?assertEqual(not_cached, cache_fetch(BKey, CacheKey, S3#state.mapcache)), + ?assertEqual(notfound, riak_kv_lru:fetch(S3#state.mapcache, BKey, CacheKey)), %% The put request generates a {w,...} and {dw,...} event flush_msgs(). @@ -655,13 +655,10 @@ cleanup_servers() -> check_mapcache(Index, QTerm, BKey, KeyData, Expect) -> - map({Index,node()}, self(), QTerm, BKey, KeyData), - receive - Msg -> - {'$gen_event',{mapexec_reply,Result,_Pid}} = Msg, - ?assertEqual(Expect, Result) - after - 100 -> + case map({Index,node()}, self(), QTerm, BKey, KeyData) of + {mapexec_reply, Result, _Pid} -> + ?assertMatch(Expect, Result); + _ -> ?assert(false) end. From 18092c236c16687c27a169f35cbeec78ac1a22cd Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Thu, 26 Aug 2010 13:02:57 -0600 Subject: [PATCH 50/72] Updated riakc version --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index dca22116ca..651f595c55 100644 --- a/rebar.config +++ b/rebar.config @@ -5,7 +5,7 @@ {deps, [ {riak_core, "0.12.0", {hg, "http://bitbucket.org/basho/riak_core", "tip"}}, - {riakc, "0.2.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", + {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "tip"}}, {luke, "\.*", {hg, "http://bitbucket.org/kevsmith/luke-mr", "tip"}}, From 2eb14d92204902e6f3b9eabcf5b4b12312208ba2 Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Wed, 1 Sep 2010 08:54:14 -0400 Subject: [PATCH 51/72] reload_all is in riak_core_util, not riak_kv_util --- src/riak_client.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_client.erl b/src/riak_client.erl index f61ea4aba9..d7708498e6 100644 --- a/src/riak_client.erl +++ b/src/riak_client.erl @@ -444,7 +444,7 @@ get_bucket(BucketName) -> %% @spec reload_all(Module :: atom()) -> term() %% @doc Force all Riak nodes to reload Module. %% This is used when loading new modules for map/reduce functionality. -reload_all(Module) -> rpc:call(Node,riak_kv_util,reload_all,[Module]). +reload_all(Module) -> rpc:call(Node,riak_core_util,reload_all,[Module]). %% @spec remove_from_cluster(ExitingNode :: atom()) -> term() %% @doc Cause all partitions owned by ExitingNode to be taken over From 0e49f090a5eff72fdc728275ce0752559b690926 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 1 Sep 2010 11:36:34 -0400 Subject: [PATCH 52/72] Removing dep luke-mr repo since that work has been merged into mainline --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index dca22116ca..d80bb91938 100644 --- a/rebar.config +++ b/rebar.config @@ -7,7 +7,7 @@ "tip"}}, {riakc, "0.2.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "tip"}}, - {luke, "\.*", {hg, "http://bitbucket.org/kevsmith/luke-mr", + {luke, "0.2.0", {hg, "http://bitbucket.org/basho/luke", "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, From 76e4f1cd0eaa3f227bcb2a6cd4f87913890b7882 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 2 Sep 2010 10:17:28 -0400 Subject: [PATCH 53/72] Fix for bz #685 --- src/riak_kv_keys_fsm.erl | 4 ++-- src/riak_kv_reduce_phase.erl | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index 45e2c4e354..e1a27c7397 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -191,8 +191,8 @@ process_keys([],Bucket,ClientType,_Bloom,ReqId,Client,Acc) -> mapred -> try luke_flow:add_inputs(Client, [{Bucket,K} || K <- Acc]) - catch _Error -> - exit(self(), shutdown) + catch _:_ -> + exit(self(), normal) end; plain -> Client ! {ReqId, {keys, Acc}} end, diff --git a/src/riak_kv_reduce_phase.erl b/src/riak_kv_reduce_phase.erl index 22599eb36e..d6f926dc73 100644 --- a/src/riak_kv_reduce_phase.erl +++ b/src/riak_kv_reduce_phase.erl @@ -102,6 +102,5 @@ perform_reduce({Lang,{reduce,FunTerm,Arg,_Acc}}, end end catch _:R -> - error_logger:error_msg("Failed reduce: ~p~n", [R]), - {error, failed_reduce} + R end. From 857c15fb65f7fd568fed84fc395f49a98256754d Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 2 Sep 2010 12:52:49 -0400 Subject: [PATCH 54/72] Bumping luke vsn --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index eea592a980..9ab4356c5a 100644 --- a/rebar.config +++ b/rebar.config @@ -7,7 +7,7 @@ "tip"}}, {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "tip"}}, - {luke, "0.2.0", {hg, "http://bitbucket.org/basho/luke", + {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", "erlang_js-0.4"}}, From 41ba35c5ca8b932c38bf65eaf7329bd72bfcc446 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Tue, 7 Sep 2010 17:03:50 -0400 Subject: [PATCH 55/72] update erlang_js dependency to tip --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 9ab4356c5a..b4e3e7c6ad 100644 --- a/rebar.config +++ b/rebar.config @@ -10,7 +10,7 @@ {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", "tip"}}, {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", - "erlang_js-0.4"}}, + "tip"}}, {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", "tip"}}, {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", From fe2f8ed9e845164cfe1f97f08f43e725a44af053 Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Wed, 8 Sep 2010 15:08:47 -0600 Subject: [PATCH 56/72] Fixed comment. --- src/riak_kv_app.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_app.erl b/src/riak_kv_app.erl index 5014629a33..f0ad452d95 100644 --- a/src/riak_kv_app.erl +++ b/src/riak_kv_app.erl @@ -74,7 +74,7 @@ start(_Type, _StartArgs) -> case riak_kv_sup:start_link() of {ok, Pid} -> %% Go ahead and mark the riak_kv service as up in the node watcher. - %% The riak_kv_ring_handler blocks until all vnodes have been started + %% The riak_core_ring_handler blocks until all vnodes have been started %% synchronously. riak_core:register_vnode_module(riak_kv_vnode), riak_core_node_watcher:service_up(riak_kv, self()), From 005392f892850ce44ac3e8703106567f500664fe Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Mon, 13 Sep 2010 16:23:30 -0600 Subject: [PATCH 57/72] Added ringready and transfers commands to riak-admin --- src/riak_kv_console.erl | 113 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 1e9feafcde..1ac4cde9da 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -24,7 +24,7 @@ -module(riak_kv_console). --export([join/1, leave/1, status/1, reip/1]). +-export([join/1, leave/1, status/1, reip/1, ringready/1, transfers/1]). join([NodeStr]) -> case riak:join(NodeStr) of @@ -73,6 +73,62 @@ reip([OldNode, NewNode]) -> io:format("New ring file written to ~p~n", [element(2, riak_core_ring_manager:find_latest_ringfile())]). +%% Check if all nodes in the cluster agree on the partition assignment +ringready([]) -> + case get_rings() of + {[], Rings} -> + {N1,R1}=hd(Rings), + case rings_match(hash_ring(R1), tl(Rings)) of + true -> + Nodes = [N || {N,_} <- Rings], + io:format("TRUE All nodes agree on the ring ~p\n", [Nodes]); + {false, N2} -> + io:format("FALSE Node ~p and ~p list different partition owners\n", [N1, N2]), + false % make nodetool exit 1 + end; + {Down, _Rings} -> + io:format("FALSE ~p down. All nodes need to be up to check.\n", [Down]), + false % make nodetool exit 1 + end. + +%% Provide a list of nodes with pending partition transfers (i.e. any secondary vnodes) +%% and list any owned vnodes that are *not* running +transfers([]) -> + {Down, Rings} = get_rings(), + case Down of + [] -> + ok; + _ -> + io:format("Nodes ~p are currently down.\n", [Down]) + end, + + %% Work out which vnodes are running and which partitions they claim + F = fun({N,R}, Acc) -> + {_Pri, Sec, Stopped} = partitions(N, R), + case Sec of + [] -> + []; + _ -> + io:format("~p waiting to handoff ~p partitions\n", [N, length(Sec)]), + [{waiting_to_handoff, N, length(Sec)}] + end ++ + case Stopped of + [] -> + []; + _ -> + io:format("~p does not have ~p primary partitions running\n", + [N, length(Stopped)]), + [{stopped, N}] + end ++ + Acc + end, + case lists:foldl(F, [], Rings) of + [] -> + io:format("No transfers active\n"); + _ -> + ok + end. + format_stats([], Acc) -> lists:reverse(Acc); @@ -80,4 +136,59 @@ format_stats([{vnode_gets, V}|T], Acc) -> format_stats(T, [io_lib:format("vnode gets : ~p~n", [V])|Acc]); format_stats([{Stat, V}|T], Acc) -> format_stats(T, [io_lib:format("~p : ~p~n", [Stat, V])|Acc]). + +%% Retrieve the rings for all other nodes by RPC +get_rings() -> + {ok, MyRing} = riak_core_ring_manager:get_my_ring(), + Nodes = riak_core_ring:all_members(MyRing), + {RawRings, Down} = rpc:multicall(Nodes, riak_core_ring_manager, get_my_ring, [], 30000), + Rings = orddict:from_list([{riak_core_ring:owner_node(R), R} || {ok, R} <- RawRings]), + {lists:sort(Down), Rings}. + +%% Produce a hash of the 'chash' portion of the ring +hash_ring(R) -> + erlang:phash2(riak_core_ring:all_owners(R)). + +%% Check if all rings match given a hash and a list of [{N,P}] to check +rings_match(_, []) -> + true; +rings_match(R1hash, [{N2, R2} | Rest]) -> + case hash_ring(R2) of + R1hash -> + rings_match(R1hash, Rest); + _ -> + {false, N2} + end. + +%% Get a list of active partition numbers - regardless of vnode type +active_partitions(Node) -> + lists:foldl(fun({_,P}, Ps) -> + ordsets:add_element(P, Ps) + end, [], running_vnodes(Node)). + + +%% Get a list of running vnodes for a node +running_vnodes(Node) -> + Pids = vnode_pids(Node), + [rpc:call(Node, riak_core_vnode, get_mod_index, [Pid], 30000) || Pid <- Pids]. + +%% Get a list of vnode pids for a node +vnode_pids(Node) -> + [Pid || {_,Pid,_,_} <- supervisor:which_children({riak_core_vnode_sup, Node})]. + +%% Return a list of active primary partitions, active secondary partitions (to be handed off) +%% and stopped partitions that should be started +partitions(Node, Ring) -> + Owners = riak_core_ring:all_owners(Ring), + Owned = ordsets:from_list(owned_partitions(Owners, Node)), + Active = ordsets:from_list(active_partitions(Node)), + Stopped = ordsets:subtract(Owned, Active), + Secondary = ordsets:subtract(Active, Owned), + Primary = ordsets:subtract(Active, Secondary), + {Primary, Secondary, Stopped}. + +%% Return the list of partitions owned by a node +owned_partitions(Owners, Node) -> + [P || {P, Owner} <- Owners, Owner =:= Node]. + From f452380e10015dbb00483c4938944eaf5fb8dfbc Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 14 Sep 2010 09:08:29 -0400 Subject: [PATCH 58/72] change JSON spec for modfun mapred inputs (bz://184-related) to use the {modfun, Module, Function, Options} mapred input, use the JSON form: { "inputs":{ "module":"my_module", "function":"my_function", "arg":["my","arguments"] }, ...query follows... } this provides more flexibility than the list format previously used --- src/riak_kv_mapred_json.erl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_mapred_json.erl b/src/riak_kv_mapred_json.erl index d961d06430..c8a10c8ee9 100644 --- a/src/riak_kv_mapred_json.erl +++ b/src/riak_kv_mapred_json.erl @@ -66,18 +66,36 @@ parse_request(Req) -> {error, not_json} end. -parse_inputs([<<"modfun">>,Mod, Fun, Options]) -> - {ok, {modfun, binary_to_atom(Mod, utf8), binary_to_atom(Fun, utf8), Options}}; parse_inputs(Bucket) when is_binary(Bucket) -> {ok, Bucket}; parse_inputs(Targets) when is_list(Targets) -> parse_inputs(Targets, []); +parse_inputs({struct, ModFun}) -> + case {proplists:lookup(<<"module">>, ModFun), + proplists:lookup(<<"function">>, ModFun), + proplists:lookup(<<"arg">>, ModFun)} of + {{_, Module}, {_, Function}, {_, Options}} -> + {ok, {modfun, + binary_to_atom(Module, utf8), + binary_to_atom(Function, utf8), + Options}}; + _ -> + {error, ["Missing fields in modfun input specification.\n" + "Required fields are:\n" + " - module : string name of a module\n", + " - function : string name of a function in module\n", + " - arg : argument to pass function\n"]} + end; parse_inputs(Invalid) -> {error, ["Unrecognized format of \"inputs\" field:", " ",mochijson2:encode(Invalid), "\n\nValid formats are:\n" " - a bucket name, as a string\n" - " - a list of bucket/key pairs\n"]}. + " - a list of bucket/key pairs\n", + " - an object naming a module and function to run, ex:\n", + " {\"module\":\"my_module\",\n", + " \"function\":\"my_function\",\n", + " \"arg\":[\"my\",\"arguments\"]}\n"]}. parse_inputs([], Accum) -> if From 4a7260a14bc26c24912c64342722ebdd4927706f Mon Sep 17 00:00:00 2001 From: Bryan Fink Date: Tue, 14 Sep 2010 11:33:13 -0400 Subject: [PATCH 59/72] support new modfun mapred input tuple on protobuff interface The client already encodes the {modfun, _, _, _} tuple just fine. Just needed to teach the server side what to do with it. --- src/riak_kv_mapred_term.erl | 5 ++++- src/riak_kv_pb_socket.erl | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_mapred_term.erl b/src/riak_kv_mapred_term.erl index fb6199deab..179a7ce005 100644 --- a/src/riak_kv_mapred_term.erl +++ b/src/riak_kv_mapred_term.erl @@ -60,8 +60,11 @@ valid_inputs(Bucket) when is_binary(Bucket) -> ok; valid_inputs(Targets) when is_list(Targets) -> valid_input_targets(Targets); +valid_inputs({modfun, Module, Function, _Options}) + when is_atom(Module), is_atom(Function) -> + ok; valid_inputs(Invalid) -> - {error, {"Inputs must be a binary bucket or a list of target tuples:", Invalid}}. + {error, {"Inputs must be a binary bucket, a list of target tuples, or a modfun tuple:", Invalid}}. valid_input_targets([]) -> ok; diff --git a/src/riak_kv_pb_socket.erl b/src/riak_kv_pb_socket.erl index c3e1ff4042..83937c9873 100644 --- a/src/riak_kv_pb_socket.erl +++ b/src/riak_kv_pb_socket.erl @@ -274,6 +274,23 @@ process_message(#rpbmapredreq{request=MrReq, content_type=ContentType}=Req, {ok, ReqId} -> {pause, State#state{req = Req, req_ctx = ReqId}} + end; + is_tuple(Inputs), size(Inputs)==4, + element(1, Inputs) == modfun, + is_atom(element(2, Inputs)), + is_atom(element(3, Inputs)) -> + case C:mapred_stream(Query, self(), Timeout) of + {stop, Error} -> + send_error("~p", [Error], State); + + {ok, {ReqId, FSM}} -> + C:mapred_dynamic_inputs_stream( + FSM, Inputs, Timeout), + luke_flow:finish_inputs(FSM), + %% Pause incoming packets - map/reduce results + %% will be processed by handle_info, it will + %% set socket active again on completion of streaming. + {pause, State#state{req = Req, req_ctx = ReqId}} end end end. From 64c3954620732b9c824299d535231957714f98e3 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 14 Sep 2010 14:58:47 -0400 Subject: [PATCH 60/72] Fix for bz #708 --- src/riak_kv_keys_fsm.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/riak_kv_keys_fsm.erl b/src/riak_kv_keys_fsm.erl index e1a27c7397..2e89c13189 100644 --- a/src/riak_kv_keys_fsm.erl +++ b/src/riak_kv_keys_fsm.erl @@ -217,6 +217,8 @@ handle_sync_event(_Event, _From, _StateName, StateData) -> {stop,badmsg,StateData}. %% @private +handle_info({'EXIT', Pid, normal}, _StateName, #state{client=Pid}=StateData) -> + {stop,normal,StateData}; handle_info(_Info, _StateName, StateData) -> {stop,badmsg,StateData}. From d6e8ac7e8c24d776d8e7906b7cd3aa203bc07ca4 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 17 Sep 2010 15:27:07 -0600 Subject: [PATCH 61/72] Bumping to 0.13.0pre --- ebin/riak_kv.app | 2 +- rebar.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index ba5792eea2..f59455aa30 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -3,7 +3,7 @@ {application, riak_kv, [ {description, "Riak Key/Value Store"}, - {vsn, "0.12.0"}, + {vsn, "0.13.0pre"}, {modules, [ lk, raw_link_walker, diff --git a/rebar.config b/rebar.config index b4e3e7c6ad..ae6a435d20 100644 --- a/rebar.config +++ b/rebar.config @@ -3,7 +3,7 @@ {erl_opts, [debug_info, fail_on_warning]}. {deps, [ - {riak_core, "0.12.0", {hg, "http://bitbucket.org/basho/riak_core", + {riak_core, "0.13.0pre", {hg, "http://bitbucket.org/basho/riak_core", "tip"}}, {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "tip"}}, From 9b8da66d1a2b049e311c09c90ca39ab669edca99 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 21 Sep 2010 14:42:34 -0400 Subject: [PATCH 62/72] Bumping erlang_js vsn dep --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index b4e3e7c6ad..c08b741572 100644 --- a/rebar.config +++ b/rebar.config @@ -9,7 +9,7 @@ "tip"}}, {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", "tip"}}, - {erlang_js, "0\.4", {hg, "http://bitbucket.org/basho/erlang_js", + {erlang_js, "0.4.1", {hg, "http://bitbucket.org/basho/erlang_js", "tip"}}, {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", "tip"}}, From 21c00208a680e5c1c58c879d59a1ac6e900f0f72 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Tue, 21 Sep 2010 15:32:42 -0600 Subject: [PATCH 63/72] Use bitcask:fold_keys in lieu of bitcask:fold to avoid issues with partition transfer; the early exit function was leaking file handles --- src/riak_kv_bitcask_backend.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 8165fecf45..d8ed1343ea 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -176,12 +176,12 @@ drop({Ref, BitcaskRoot}) -> is_empty({Ref, _}) -> %% Determining if a bitcask is empty requires us to find at least - %% one value that is NOT a tombstone. Accomplish this by doing a fold - %% that forcibly bails on the very first k/v encountered. - F = fun(_K, _V, _Acc0) -> + %% one value that is NOT a tombstone. Accomplish this by doing a fold_keys + %% that forcibly bails on the very first key encountered. + F = fun(_K, _Acc0) -> throw(found_one_value) end, - case catch(bitcask:fold(Ref, F, undefined)) of + case catch(bitcask:fold_keys(Ref, F, undefined)) of found_one_value -> false; _ -> From 62137ff3007ad3bca8dc4950ca5f9c9414aff7a9 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Wed, 22 Sep 2010 10:27:31 -0600 Subject: [PATCH 64/72] Fixing bjorked/missing license headers --- src/lk.erl | 21 +++++++++++++++++++++ src/raw_link_walker.erl | 15 ++++++++++----- src/riak_kv_backend.erl | 22 ++++++++++++++++++++++ src/riak_kv_lru.erl | 23 ++++++++++++++++++++++- src/riak_kv_vnode.erl | 11 ++++++++--- src/riak_kv_wm_mapred.erl | 2 +- src/riak_kv_wm_ping.erl | 2 +- 7 files changed, 85 insertions(+), 11 deletions(-) diff --git a/src/lk.erl b/src/lk.erl index 1541e66b5f..000b63a515 100644 --- a/src/lk.erl +++ b/src/lk.erl @@ -1,3 +1,24 @@ +%% ------------------------------------------------------------------- +%% +%% lk: Helper functions for list keys +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- -module(lk). -export([fsm/1, pn/1]). diff --git a/src/raw_link_walker.erl b/src/raw_link_walker.erl index ec7ba38193..7b83a53743 100644 --- a/src/raw_link_walker.erl +++ b/src/raw_link_walker.erl @@ -1,19 +1,24 @@ +%% ------------------------------------------------------------------- +%% +%% raw_link_walker: Backwards compatibility module for link traversal +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file %% except in compliance with the License. You may obtain %% a copy of the License at - +%% %% http://www.apache.org/licenses/LICENSE-2.0 - +%% %% Unless required by applicable law or agreed to in writing, %% software distributed under the License is distributed on an %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY %% KIND, either express or implied. See the License for the %% specific language governing permissions and limitations %% under the License. - -%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. - +%% +%% ------------------------------------------------------------------- -module(raw_link_walker). -export([mapreduce_linkfun/3]). diff --git a/src/riak_kv_backend.erl b/src/riak_kv_backend.erl index df32771dda..e9c6b3d7a1 100644 --- a/src/riak_kv_backend.erl +++ b/src/riak_kv_backend.erl @@ -1,3 +1,25 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_backend: Riak backend behaviour +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + -module(riak_kv_backend). -export([behaviour_info/1]). -export([callback_after/3]). diff --git a/src/riak_kv_lru.erl b/src/riak_kv_lru.erl index 8a88e1df47..339d611d70 100644 --- a/src/riak_kv_lru.erl +++ b/src/riak_kv_lru.erl @@ -1,3 +1,24 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_lru: ETS-based LRU cache +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- -module(riak_kv_lru). -ifdef(TEST). @@ -79,7 +100,7 @@ clear_bkey(#kv_lru{max_size=0}, _BKey) -> ok; clear_bkey(#kv_lru{bucket_idx=BucketIdx}=LRU, BKey) -> R = ets:match(BucketIdx, {BKey, '$1'}), - case R of + case R of [] -> ok; Keys -> diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 65a55effed..5f7560722e 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -1,3 +1,9 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_vnode: VNode Implementation +%% +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file %% except in compliance with the License. You may obtain @@ -11,9 +17,8 @@ %% KIND, either express or implied. See the License for the %% specific language governing permissions and limitations %% under the License. - -%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. - +%% +%% ------------------------------------------------------------------- -module(riak_kv_vnode). -behaviour(riak_core_vnode). %% API diff --git a/src/riak_kv_wm_mapred.erl b/src/riak_kv_wm_mapred.erl index 986e153c81..7d1814691d 100644 --- a/src/riak_kv_wm_mapred.erl +++ b/src/riak_kv_wm_mapred.erl @@ -1,6 +1,6 @@ %% ------------------------------------------------------------------- %% -%% mapred_resource: webmachine resource for mapreduce requests +%% riak_kv_wm_mapred: webmachine resource for mapreduce requests %% %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. %% diff --git a/src/riak_kv_wm_ping.erl b/src/riak_kv_wm_ping.erl index 7238f06ea2..5cfd3e4196 100644 --- a/src/riak_kv_wm_ping.erl +++ b/src/riak_kv_wm_ping.erl @@ -1,6 +1,6 @@ %% ------------------------------------------------------------------- %% -%% ping_http_resource: simple Webmachine resource for availability test +%% riak_kv_wm_ping: simple Webmachine resource for availability test %% %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. %% From 5b9275a1cdebf01799544a0025830d6b070b9c54 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 22 Sep 2010 15:02:23 -0700 Subject: [PATCH 65/72] update riak_kv dependencies to initial 0.13.0 revisions --- rebar.config | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rebar.config b/rebar.config index 3c36ab471f..c58f7c3e94 100644 --- a/rebar.config +++ b/rebar.config @@ -3,16 +3,16 @@ {erl_opts, [debug_info, fail_on_warning]}. {deps, [ - {riak_core, "0.13.0pre", {hg, "http://bitbucket.org/basho/riak_core", - "tip"}}, + {riak_core, "0.13.0rc1", {hg, "http://bitbucket.org/basho/riak_core", + "riak_core-0.13.0rc1"}}, {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", - "tip"}}, + "riakc-1.0.0"}}, {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", - "tip"}}, + "luke-0.2.1"}}, {erlang_js, "0.4.1", {hg, "http://bitbucket.org/basho/erlang_js", - "tip"}}, - {bitcask, "1.0.3", {hg, "http://bitbucket.org/basho/bitcask", - "tip"}}, - {ebloom, "1.0.1", {hg, "http://bitbucket.org/basho/ebloom", - "ebloom-1.0.1"}} + "90"}}, + {bitcask, "1.1", {hg, "http://bitbucket.org/basho/bitcask", + "bitcask-1.1"}}, + {ebloom, "1.0.2", {hg, "http://bitbucket.org/basho/ebloom", + "ebloom-1.0.2"}} ]}. From 006c1deec58c2ecc84eda488eeea68d4d814d19c Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 22 Sep 2010 15:02:56 -0700 Subject: [PATCH 66/72] update app version to 0.13.0rc1 --- ebin/riak_kv.app | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index f59455aa30..4a546ab171 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -3,7 +3,7 @@ {application, riak_kv, [ {description, "Riak Key/Value Store"}, - {vsn, "0.13.0pre"}, + {vsn, "0.13.0rc1"}, {modules, [ lk, raw_link_walker, From ad49e19885ff613de5daf6c7ffeb4695241ad4e7 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 22 Sep 2010 15:03:12 -0700 Subject: [PATCH 67/72] Added tag riak_kv-0.13.0rc1 for changeset e716ebd150ff --- .hgtags | 1 + 1 file changed, 1 insertion(+) create mode 100644 .hgtags diff --git a/.hgtags b/.hgtags new file mode 100644 index 0000000000..689667ecc6 --- /dev/null +++ b/.hgtags @@ -0,0 +1 @@ +e716ebd150ff8698a89a1ae28bc868385a164497 riak_kv-0.13.0rc1 From d1b8340eb52bbe4ca6a51c3d14069e16f098dd59 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 24 Sep 2010 06:41:50 -0600 Subject: [PATCH 68/72] Bumping to 0.13.0rc2 --- ebin/riak_kv.app | 2 +- rebar.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index 4a546ab171..3d4eecb049 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -3,7 +3,7 @@ {application, riak_kv, [ {description, "Riak Key/Value Store"}, - {vsn, "0.13.0rc1"}, + {vsn, "0.13.0rc2"}, {modules, [ lk, raw_link_walker, diff --git a/rebar.config b/rebar.config index c58f7c3e94..eaca403feb 100644 --- a/rebar.config +++ b/rebar.config @@ -3,8 +3,8 @@ {erl_opts, [debug_info, fail_on_warning]}. {deps, [ - {riak_core, "0.13.0rc1", {hg, "http://bitbucket.org/basho/riak_core", - "riak_core-0.13.0rc1"}}, + {riak_core, "0.13.0rc2", {hg, "http://bitbucket.org/basho/riak_core", + "riak_core-0.13.0rc2"}}, {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "riakc-1.0.0"}}, {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", From dc3015ede49f8c35b0ee202d03d7a858bd906e0d Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 24 Sep 2010 06:42:00 -0600 Subject: [PATCH 69/72] Added tag riak_kv-0.13.0rc2 for changeset 04f5cfd0b8ca --- .hgtags | 1 + 1 file changed, 1 insertion(+) diff --git a/.hgtags b/.hgtags index 689667ecc6..9ca68f91aa 100644 --- a/.hgtags +++ b/.hgtags @@ -1 +1,2 @@ e716ebd150ff8698a89a1ae28bc868385a164497 riak_kv-0.13.0rc1 +04f5cfd0b8ca7c195e67658367afa5625c186218 riak_kv-0.13.0rc2 From eed4a0cc44f4c6be68e9e69d2d030851460724a1 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 24 Sep 2010 14:00:57 -0600 Subject: [PATCH 70/72] Updating bitcask dep to 1.1.1 to incorporate read_ahead patches --- rebar.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index eaca403feb..9229c2601a 100644 --- a/rebar.config +++ b/rebar.config @@ -11,8 +11,8 @@ "luke-0.2.1"}}, {erlang_js, "0.4.1", {hg, "http://bitbucket.org/basho/erlang_js", "90"}}, - {bitcask, "1.1", {hg, "http://bitbucket.org/basho/bitcask", - "bitcask-1.1"}}, + {bitcask, "1.1.1", {hg, "http://bitbucket.org/basho/bitcask", + "bitcask-1.1.1"}}, {ebloom, "1.0.2", {hg, "http://bitbucket.org/basho/ebloom", "ebloom-1.0.2"}} ]}. From cb088309f51a5f7426b838bb985fbb726993b27a Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 24 Sep 2010 14:53:22 -0600 Subject: [PATCH 71/72] Bumping to 0.13.0rc3 --- ebin/riak_kv.app | 2 +- rebar.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ebin/riak_kv.app b/ebin/riak_kv.app index 3d4eecb049..635a20118f 100644 --- a/ebin/riak_kv.app +++ b/ebin/riak_kv.app @@ -3,7 +3,7 @@ {application, riak_kv, [ {description, "Riak Key/Value Store"}, - {vsn, "0.13.0rc2"}, + {vsn, "0.13.0rc3"}, {modules, [ lk, raw_link_walker, diff --git a/rebar.config b/rebar.config index 9229c2601a..9d3fbc9811 100644 --- a/rebar.config +++ b/rebar.config @@ -3,8 +3,8 @@ {erl_opts, [debug_info, fail_on_warning]}. {deps, [ - {riak_core, "0.13.0rc2", {hg, "http://bitbucket.org/basho/riak_core", - "riak_core-0.13.0rc2"}}, + {riak_core, "0.13.0rc3", {hg, "http://bitbucket.org/basho/riak_core", + "riak_core-0.13.0rc3"}}, {riakc, "1.0.0", {hg, "http://bitbucket.org/basho/riak-erlang-client", "riakc-1.0.0"}}, {luke, "0.2.1", {hg, "http://bitbucket.org/basho/luke", From 0e0e78e42de1b01fea84ccb7d593a5bb2717ff45 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Fri, 24 Sep 2010 14:53:39 -0600 Subject: [PATCH 72/72] Added tag riak_kv-0.13.0rc3 for changeset a5e0a7b843b5 --- .hgtags | 1 + 1 file changed, 1 insertion(+) diff --git a/.hgtags b/.hgtags index 9ca68f91aa..c3e7f9825f 100644 --- a/.hgtags +++ b/.hgtags @@ -1,2 +1,3 @@ e716ebd150ff8698a89a1ae28bc868385a164497 riak_kv-0.13.0rc1 04f5cfd0b8ca7c195e67658367afa5625c186218 riak_kv-0.13.0rc2 +a5e0a7b843b52fe846b8006543d1484b548b9a18 riak_kv-0.13.0rc3