Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Don't run extraction on tombstones
Riak KV has a 2-phase delete.

1. Write a tombstone value.

2. Reap the tombstones and delete data at backend level.

Yokozuna must honor both phases because the tombstone value could live
for an unspecified amount of time due to either configuration or
failure.  Since active anti-entropy and other sub-systems may consider
a tombstone an object Yokozuna must know about it even if there is no
content to index.  Since the tombstone object is void of content no
extraction needs to be performed.  Only the special _yz fields need to
be written.
  • Loading branch information
rzezeski committed Sep 26, 2012
1 parent e6a894d commit 78fb0d7
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 9 deletions.
1 change: 1 addition & 0 deletions include/yokozuna.hrl
Expand Up @@ -119,6 +119,7 @@
%%%===================================================================

-type obj() :: riak_object:riak_object().
-type obj_metadata() :: dict().

%%%===================================================================
%%% Docs
Expand Down
23 changes: 14 additions & 9 deletions src/yz_doc.erl
Expand Up @@ -49,15 +49,20 @@ make_doc(O, FPN, Partition) ->

-spec extract_fields(obj()) -> fields() | {error, any()}.
extract_fields(O) ->
CT = yz_kv:get_obj_ct(O),
Value = hd(riak_object:get_values(O)),
ExtractorDef = yz_extractor:get_def(CT, [check_default]),
case yz_extractor:run(Value, ExtractorDef) of
{error, Reason} ->
?ERROR("failed to index with reason ~s~nValue: ~s", [Reason, Value]),
{error, Reason};
Fields ->
Fields
case yz_kv:is_tombstone(O) of
false ->
CT = yz_kv:get_obj_ct(O),
Value = hd(riak_object:get_values(O)),
ExtractorDef = yz_extractor:get_def(CT, [check_default]),
case yz_extractor:run(Value, ExtractorDef) of
{error, Reason} ->
?ERROR("failed to index with reason ~s~nValue: ~s", [Reason, Value]),
{error, Reason};
Fields ->
Fields
end;
true ->
[]
end.

%%%===================================================================
Expand Down
13 changes: 13 additions & 0 deletions src/yz_kv.erl
Expand Up @@ -57,6 +57,19 @@ get(C, Bucket, Key) ->
get_obj_ct(Obj) ->
dict:fetch(<<"content-type">>, riak_object:get_metadata(Obj)).

%% @doc Determine if the `Obj' is a tombstone.
-spec is_tombstone(obj()) -> boolean().
is_tombstone(Obj) ->
case yz_misc:dict_get(<<"X-Riak-Deleted">>, metadata(Obj), false) of
"true" -> true;
false -> false
end.

%% @doc Get the metadata of the `Obj'.
-spec metadata(obj()) -> obj_metadata().
metadata(Obj) ->
riak_object:get_metadata(Obj).

%% @doc An object modified hook to create indexes as object data is
%% written or modified.
%%
Expand Down
9 changes: 9 additions & 0 deletions src/yz_misc.erl
Expand Up @@ -57,6 +57,15 @@ delta(Old, New) ->
Same = ordsets:intersection(New, Old),
{Removed, Added, Same}.

%% @doc Attempt to get the `Key' from `Dict'. If it doesn't exist
%% then return `Default'.
-spec dict_get(term(), dict(), term()) -> term().
dict_get(Key, Dict, Default) ->
case dict:find(Key, Dict) of
{ok, Val} -> Val;
error -> Default
end.

%% @doc Get either the `raw' or `transformed' ring. The raw ring is
%% what is stored on disk. The transformed ring is the raw ring
%% with processing done to it such as bucket fixups.
Expand Down

0 comments on commit 78fb0d7

Please sign in to comment.