Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add tagging support

Tagging is the process of indexing Riak Object metadata.
  • Loading branch information...
commit 12c5fc35a9985116e94006a030d51bbaca1293c1 1 parent a8deb84
@rzezeski authored
View
52 docs/TAGGING.md
@@ -0,0 +1,52 @@
+Tagging
+==========
+
+Data stored in Riak is opaque to Riak. It doesn't know anything about
+the structure of the data stored in it. Whether the data be JSON or
+JPEG, it's all the same to Riak.
+
+On the contrary, the application storing the data often has intimate
+knowledge of the data. The application may want to tag it with
+attributes that give additional context. For example, tagging a
+picture with information such as who uploaded it and when it was
+taken.
+
+This is called _tagging_ in Yokozuna. It provides the ability to
+create additional index entries based on the object's metadata.
+
+HTTP
+----------
+
+**NOTE: This is subject to change in the 0.2 release. The current
+ implementation of metadata in Riak requires all tags to be
+ prefixed with `x-riak-meta`.**
+
+Tags can be added via custom HTTP headers. The `x-riak-meta-yz-tags`
+header tells Yokozuna which headers to use as tags. It's a CSV.
+
+ x-riak-meta-yz-tags: x-riak-meta-user_s, x-riak-meta-description_t
+
+ x-riak-meta-user_s: rzezeski
+ x-riak-meta-description_t: Federal Hill at dusk.
+
+Yokozuna strips the `x-riak-meta` prefix and lower cases tag names
+before indexing. In this case the tags will be: `{<<"user_s">>,
+<<"rzezeski">>}`, and `{<<"description_t">>, <<"Federal Hill at
+dusk">>}`.
+
+A query against the description tag would look like so.
+
+ q=description_t:dusk
+
+### Multi-Valued Fields
+
+The tag values are passed verbatim to Solr. If you want a tag to be
+treated as a multi-valued field then you'll have to configure Solr to
+do so. This should be possible via Solr update processing but
+probably requires a custom processor.
+
+TODO: Create a custom processor and show example of creating a
+ multi-valued field via tagging.
+
+ x-riak-meta-keywords_ss: baltimore, dusk, landscape
+
View
10 misc/bench/src/yz_driver.erl
@@ -4,15 +4,7 @@
%% Callbacks
-export([new/1,
run/4]).
-
-%% Key Gens
--export([always/2,
- fruit_key_val_gen/1,
- fruit_key_val_gen/2,
- valgen/4,
- valgen_i/1,
- mfa_valgen/3,
- mfa_valgen_i/1]).
+-compile(export_all).
-include_lib("basho_bench/include/basho_bench.hrl").
-record(state, {pb_conns, index, iurls, surls}).
View
50 riak_test/yokozuna_essential.erl
@@ -10,6 +10,7 @@
confirm() ->
YZBenchDir = rt:get_os_env("YZ_BENCH_DIR"),
+ code:add_path(filename:join([YZBenchDir, "ebin"])),
random:seed(now()),
Nodes = rt:deploy_nodes(4),
Cluster = join_three(Nodes),
@@ -19,10 +20,53 @@ confirm() ->
Ref = async_query(Cluster, YZBenchDir),
Cluster2 = join_rest(Cluster, Nodes),
check_status(wait_for(Ref)),
+ ok = test_tagging(Cluster),
KeysDeleted = delete_some_data(Cluster2, reap_sleep()),
verify_deletes(Cluster2, KeysDeleted, YZBenchDir),
pass.
+test_tagging(Cluster) ->
+ lager:info("Test tagging"),
+ HP = hd(host_entries(rt:connection_info(Cluster))),
+ ok = write_with_tag(HP),
+ %% TODO: the test fails if this sleep isn't here
+ timer:sleep(5000),
+ ok = query_tag(HP, "user_s", "rzezeski"),
+ ok = query_tag(HP, "desc_t", "description").
+
+write_with_tag({Host, Port}) ->
+ lager:info("Tag the object tagging/test"),
+ URL = lists:flatten(io_lib:format("http://~s:~s/riak/tagging/test",
+ [Host, integer_to_list(Port)])),
+ %% Opts = [{content_type, "text/plain"}],
+ Opts = [],
+ Body = <<"testing tagging">>,
+ Headers = [{"content-type", "text/plain"},
+ {"x-riak-meta-yz-tags", "x-riak-meta-user_s, x-riak-meta-desc_t"},
+ {"x-riak-meta-user_s", "rzezeski"},
+ {"x-riak-meta-desc_t", "This is a description"}],
+ {ok, "204", _, _} = ibrowse:send_req(URL, Headers, put, Body, Opts),
+ ok.
+
+query_tag({Host, Port}, Name, Term) ->
+ URL = lists:flatten(io_lib:format("http://~s:~s/search/tagging?q=~s:~s&wt=json",
+ [Host, integer_to_list(Port), Name, Term])),
+ lager:info("Run query ~s", [URL]),
+ Opts = [{response_format, binary}],
+ case ibrowse:send_req(URL, [], get, [], Opts) of
+ {ok, "200", _, Resp} ->
+ lager:info("Query resp ~p", [Resp]),
+ verify_count(1, Resp),
+ ok;
+ Other ->
+ {bad_response, Other}
+ end.
+
+verify_count(Expected, Resp) ->
+ Struct = mochijson2:decode(Resp),
+ NumFound = yz_driver:get_path(Struct, [<<"response">>, <<"numFound">>]),
+ ?assertEqual(Expected, NumFound).
+
async_query(Cluster, YZBenchDir) ->
lager:info("Run async query against cluster ~p", [Cluster]),
Hosts = host_entries(rt:connection_info(Cluster)),
@@ -46,6 +90,10 @@ async_query(Cluster, YZBenchDir) ->
check_status({Status,_}) ->
?assertEqual(?SUCCESS, Status).
+create_index(Node, Index) ->
+ lager:info("Creating index ~s [~p]", [Index, Node]),
+ rpc:call(Node, yz_index, create, [Index]).
+
create_index(Node, Index, SchemaName) ->
lager:info("Creating index ~s [~p]", [Index, Node]),
rpc:call(Node, yz_index, create, [Index, SchemaName]).
@@ -130,6 +178,8 @@ setup_indexing(Cluster, YZBenchDir) ->
ok = store_schema(Node, ?FRUIT_SCHEMA_NAME, RawSchema),
ok = create_index(Node, ?INDEX_S, ?FRUIT_SCHEMA_NAME),
ok = install_hook(Node, ?INDEX_B),
+ ok = create_index(Node, "tagging"),
+ ok = install_hook(Node, <<"tagging">>),
%% Give Solr time to build index
timer:sleep(5000).
View
77 src/yz_doc.erl
@@ -39,13 +39,14 @@ doc_id(O, Partition) ->
-spec make_doc(riak_object:riak_object(), binary(), binary()) -> doc().
make_doc(O, FPN, Partition) ->
ExtractedFields = extract_fields(O),
+ Tags = extract_tags(O),
Fields = [{id, doc_id(O, Partition)},
{?YZ_ED_FIELD, gen_vc(O)},
{?YZ_FPN_FIELD, FPN},
{?YZ_NODE_FIELD, ?ATOM_TO_BIN(node())},
{?YZ_PN_FIELD, Partition},
{?YZ_RK_FIELD, riak_key(O)}],
- {doc, lists:append([ExtractedFields, Fields])}.
+ {doc, lists:append([Tags, ExtractedFields, Fields])}.
-spec extract_fields(obj()) -> fields() | {error, any()}.
extract_fields(O) ->
@@ -65,6 +66,80 @@ extract_fields(O) ->
[]
end.
+%% @private
+%%
+%% @doc Extract tags from object metadata.
+-spec extract_tags(obj()) -> fields().
+extract_tags(O) ->
+ MD = yz_kv:metadata(O),
+ MD2 = get_user_meta(MD),
+ TagNames = get_tag_names(MD2),
+ lists:foldl(get_tag(MD2), [], TagNames).
+
+%% @private
+%%
+%% @doc Get the user metdata from the Riak Object metadata.
+%%
+%% NOTE: This function should return the same type as the top-level
+%% Riak Object metadata so that `yz_kv:get_md_entry' may be
+%% used. This way when KV is altered to store user meta at the
+%% top-level the migration will be easier.
+-spec get_user_meta(dict()) -> dict().
+get_user_meta(MD) ->
+ case yz_kv:get_md_entry(MD, <<"X-Riak-Meta">>) of
+ none ->
+ dict:new();
+ MetaMeta ->
+ %% NOTE: Need to call `to_lower' because
+ %% `erlang:decode_packet' which is used by mochiweb
+ %% will modify the case of certain header names
+ MM2 = [{list_to_binary(string:to_lower(K)), list_to_binary(V)}
+ || {K,V} <- MetaMeta],
+ dict:from_list(MM2)
+ end.
+
+-spec get_tag(list()) -> function().
+get_tag(MD) ->
+ fun(TagName, Fields) ->
+ case yz_kv:get_md_entry(MD, TagName) of
+ none ->
+ Fields;
+ Value ->
+ case strip_prefix(TagName) of
+ ignore -> Fields;
+ TagName2 -> [{TagName2, Value}|Fields]
+ end
+ end
+ end.
+
+-spec strip_prefix(binary()) -> binary() | ignore.
+strip_prefix(<<"x-riak-meta-",Tag/binary>>) ->
+ Tag;
+strip_prefix(_) ->
+ %% bad tag, silently discard
+ ignore.
+
+
+%% @private
+%%
+%% @doc Get the tags names.
+-spec get_tag_names(dict()) -> list().
+get_tag_names(MD) ->
+ case yz_kv:get_md_entry(MD, <<"x-riak-meta-yz-tags">>) of
+ none -> [];
+ TagNames -> split_tag_names(TagNames)
+ end.
+
+%% @private
+%%
+%% @doc Split the tag names. Input is assumed to be CSV. Whitespace
+%% is stripped. Tag names are converted to lower-case.
+-spec split_tag_names(binary()) -> [binary()].
+split_tag_names(TagNames) ->
+ NoSpace = binary:replace(TagNames, <<" ">>, <<"">>, [global]),
+ Lower = list_to_binary(string:to_lower(binary_to_list(NoSpace))),
+ binary:split(Lower, <<",">>, [global]).
+
%%%===================================================================
%%% Private
%%%===================================================================
View
3  src/yz_kv.erl
@@ -70,6 +70,9 @@ is_tombstone(Obj) ->
metadata(Obj) ->
riak_object:get_metadata(Obj).
+get_md_entry(MD, Key) ->
+ yz_misc:dict_get(Key, MD, none).
+
%% @doc An object modified hook to create indexes as object data is
%% written or modified.
%%
Please sign in to comment.
Something went wrong with that request. Please try again.