Permalink
Browse files

Initial commit.

  • Loading branch information...
0 parents commit c090887b817c6e5f41d96ff2e3913bad5980980c @knutin committed Aug 2, 2011
Showing with 963 additions and 0 deletions.
  1. +3 −0 .gitignore
  2. +55 −0 README.md
  3. BIN rebar
  4. +5 −0 rebar.config
  5. +12 −0 src/kprof.app.src
  6. +73 −0 src/kprof.erl
  7. +184 −0 src/kprof_aggregator.erl
  8. +17 −0 src/kprof_app.erl
  9. +26 −0 src/kprof_couchdb.erl
  10. +298 −0 src/kprof_server.erl
  11. +32 −0 src/kprof_sup.erl
  12. +90 −0 src/kprof_token_server.erl
  13. +117 −0 src/kprof_tracer.erl
  14. +51 −0 src/sample_app.erl
@@ -0,0 +1,3 @@
+*~
+ebin
+deps
@@ -0,0 +1,55 @@
+# kprof
+
+kprof allows you to profile the path a request takes from entry point
+all the way through your code, even across processes.
+
+The trace is broken down into "tiers", so you can see exactly how much
+time is spent in the different layers of your system. The VM traces
+only these tiers, which makes the performance impact much lower. A
+full trace is also possible.
+
+kprof is lightweight and allows tracing thousands of requests per
+second. The results are aggregated on the fly, to save space and make
+statistical analysis easier.
+
+The entry points you want traced need to be patched to set and remove
+a token that is used to identify the request. kprof provides helpers
+to do this, for custom code and for popular webservers.
+
+## Usage
+
+## Tiers
+
+
+## Code-reload
+
+If a module with a traced function is reloaded, the trace BIFs no
+longer traces that function. In this case, the trace has to be stopped
+and restarted.
+
+## Full trace
+
+It is possible to trace all functions invoked during a request which
+is very helpful in exploration. However, a full trace has a much
+higher performance impact than tiered tracing.
+
+Aggregation is done per {Module, Function, Arity} not on the position
+of the call within the call trace. This might change in the future.
+
+## Identity
+
+## Overload protection
+
+To avoid overload of the process receiving trace messages, it will
+drop messages that it considers to be old. Each trace message is
+timestamped by the VM and if a message older than 1(this number is
+pulled out of thin air) second is encountered, tracing is aborted.
+
+## TODO
+ * overload protection in kprof_server, disable tracing, look at timestamp of trace messages
+ * sampling based on frequency, sample 10% of all calls
+ * full tracing of all calls in a seq trace (fabian), "slow calls" in rpm
+ * run the aggregation externally to the node using {tracer, Port}
+ * count number of requests to the token server
+ * should be able to specify tiers using match specs
+ * stop tracing gc when a request is finished
BIN rebar
Binary file not shown.
@@ -0,0 +1,5 @@
+{deps, [
+ {basho_stats, "1.0.1", {git, "git://github.com/basho/basho_stats.git", "master"}},
+ {ibrowse, "2.1.3", {git,"git://github.com/cmullaparthi/ibrowse.git", {tag, "v2.2.0"}}},
+ {jiffy, "", {git, "https://github.com/davisp/jiffy.git", "master"}}
+]}.
@@ -0,0 +1,12 @@
+{application, kprof,
+ [
+ {description, "Knuts aggregating profiler"},
+ {vsn, "1"},
+ {registered, []},
+ {applications, [
+ kernel,
+ stdlib
+ ]},
+ {mod, { kprof_app, []}},
+ {env, []}
+ ]}.
@@ -0,0 +1,73 @@
+%% @author Knut Nesheim <knutin@gmail.com>
+%% @copyright 2011 Knut Nesheim
+%%
+%% @doc Public API
+-module(kprof).
+
+-export([start/0, stop/0]).
+
+-export([start_trace/1, stop_trace/0]).
+
+-export([do_apply/3,
+ misultin_req_loop/2, handle_misultin_req/3]).
+
+%%
+%% API
+%%
+
+start() -> application:start(kprof).
+stop() -> application:stop(kprof).
+
+%% @doc Start tracing with the given tier configuration. IdentityFun
+%% must take a MFA tuple and return the identity of this sequence.
+start_trace(Options) ->
+ kprof_server:start_trace(Options).
+
+stop_trace() ->
+ kprof_server:stop_trace().
+
+%% @doc: Wrapper around erlang:apply/3. Any code executed is traced.
+do_apply(M, F, A) ->
+ ok = set_token(),
+ Result = erlang:apply(M, F, A),
+ ok = clear_token(),
+ Result.
+
+%% @doc: Returns a fun suitable for use with misultin. Any code
+%% executed is traced.
+misultin_req_loop(M, F) ->
+ fun (R) ->
+ ?MODULE:handle_misultin_req(M, F, R)
+ end.
+
+handle_misultin_req(M, F, R) ->
+ ok = set_token(),
+ Res = M:F(R),
+ ok = clear_token(),
+ Res.
+
+
+%%
+%% INTERNAL HELPERS
+%%
+
+clear_token() ->
+ seq_trace:set_token([]),
+ ok.
+
+%% @doc: Tries to get a trace token, if the token server is
+%% running. Does a gen_server call with timeout of 1 millisecond to
+%% avoid tracing at all if the system is loaded.
+set_token() ->
+ case whereis(kprof_token_server) of
+ undefined ->
+ ok;
+ _Pid ->
+ case catch(kprof_token_server:get_token(1)) of
+ {ok, Label} when is_integer(Label) ->
+ seq_trace:set_token(label, Label),
+ ok;
+ _ ->
+ ok
+ end
+ end.
@@ -0,0 +1,184 @@
+%% @author Knut Nesheim <knutin@gmail.com>
+%% @copyright 2011 Knut Nesheim
+%%
+%% @doc Aggregates and sends results to the kprof viewer
+%%
+%% For every tier and id a histogram is updated when a new timing
+%% comes in. At every reporting interval, the data is shipped to the
+%% kprof viewer and the histograms are reset.
+
+-module(kprof_aggregator).
+
+-behaviour(gen_server).
+
+%% API
+-export([start_link/0, tier_timings/2, clear/0, set_dumper/1]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-define(SERVER, ?MODULE).
+-define(NEW_HISTOGRAM, basho_stats_histogram:new(0, 1000000, 1000000)).
+-define(REPORT_INTERVAL, 60).
+
+-record(state, {last_write_time, timer, dumper = false}).
+
+%%%===================================================================
+%%% API
+%%%===================================================================
+
+start_link() ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+tier_timings(Tiers, Id) ->
+ gen_server:cast(?SERVER, {tier_timings, Tiers, Id}).
+
+clear() ->
+ gen_server:call(?SERVER, clear_stats).
+
+set_dumper(Dumper) ->
+ gen_server:call(?SERVER, {set_dumper, Dumper}).
+
+
+%%%===================================================================
+%%% gen_server callbacks
+%%%===================================================================
+
+init([]) ->
+ ReportInterval = timer:seconds(?REPORT_INTERVAL),
+ TimerRef = erlang:send_after(ReportInterval, self(), report),
+ {ok, #state{timer = TimerRef}}.
+
+handle_call(clear_stats, _From, State) ->
+ ClearF = fun ({{tier_timing, _} = Key, {hist, _}}) ->
+ erlang:put(Key, {hist, ?NEW_HISTOGRAM});
+ (_) ->
+ ok
+ end,
+ lists:foreach(ClearF, get()),
+ {reply, ok, State};
+
+handle_call({set_dumper, Dumper}, _From, State) ->
+ {reply, ok, State#state{dumper = Dumper}}.
+
+
+handle_cast({tier_timing, {Tier, Id, ElapsedUs}}, State) ->
+ ok = update_histogram({tier_timing, {Tier, Id}}, ElapsedUs),
+ ok = update_histogram({tier_timing, {Tier, '_total'}}, ElapsedUs),
+ {noreply, State};
+
+handle_cast({tier_timings, Tiers, Id}, State) ->
+ UpdateF =
+ fun ({Tier, ElapsedUs}) ->
+ ok = update_histogram({tier_timing, {Tier, Id}}, ElapsedUs),
+ ok = update_histogram({tier_timing, {Tier, '_total'}}, ElapsedUs)
+ end,
+ lists:foreach(UpdateF, Tiers),
+ {noreply, State};
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(report, #state{timer = OldTimerRef} = State) ->
+ erlang:cancel_timer(OldTimerRef),
+ Now = now_to_seconds(),
+ send_stats(State#state.dumper),
+
+ ReportInterval = timer:seconds(?REPORT_INTERVAL),
+ TimerRef = erlang:send_after(ReportInterval, self(), report),
+ {noreply, State#state{last_write_time = Now, timer = TimerRef}}.
+
+terminate(_Reason, #state{timer = TimerRef}) ->
+ erlang:cancel_timer(TimerRef),
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%===================================================================
+%%% Internal functions
+%%%===================================================================
+
+get_or_create_histogram(Key) ->
+ case get(Key) of
+ undefined -> ?NEW_HISTOGRAM;
+ {hist, Value} -> Value;
+ _ ->
+ throw({kingdom, unexpected_value_in_stats_server_pdict, []})
+ end.
+
+%% @doc: Updates the histogram stored under the given key in the
+%% process dictionary with the given value.
+update_histogram(Key, Value) ->
+ Hist = get_or_create_histogram(Key),
+ NewHist = basho_stats_histogram:update(Value, Hist),
+ erlang:put(Key, {hist, NewHist}),
+ ok.
+
+%% @doc: Pulls statistics out from the histograms and sends it off to
+%% disk or couchdb. If no handler is specified, stats are accumulated
+%% in the process forever.
+send_stats(false) ->
+ ok;
+
+send_stats({file, Path}) ->
+ case get_tier_timings() of
+ [] -> ok;
+ TierTimings ->
+ Data = {[{tier_timings, TierTimings}]},
+ {ok, Json} = jiffy:encode(Data),
+ ok = file:write_file(Path, Json, [append]),
+ ok
+ end;
+
+send_stats({couchdb, _CouchdbOptions}) ->
+ case get_tier_timings() of
+ [] -> ok;
+ Timings ->
+ kprof_couchdb:put_tier_timings(Timings)
+ end.
+
+
+%% @doc: Extra data that will be appended to the json document
+extra_data() ->
+ [{node, list_to_binary(atom_to_list(node()))}].
+
+
+%% @doc: Returns all tier timing histograms that actually contains an
+%% observation
+get_tier_timings() ->
+ Ts = lists:filter(fun ({{tier_timing, _}, {hist, Hist}}) ->
+ basho_stats_histogram:observations(Hist) =/= 0;
+ (_) ->
+ false
+ end, get()),
+ [erlang:put(Key, {hist, ?NEW_HISTOGRAM}) || {Key, _Value} <- Ts],
+ lists:map(fun (H) -> format_timing(H, json) end, Ts).
+
+format_timing({{tier_timing, {Tier, Id}}, {hist, Hist}}, json) ->
+ {Min, Mean, Max, _, SD} =
+ basho_stats_histogram:summary_stats(Hist),
+
+ {[{timestamp, now_to_seconds()},
+ {key, list_to_binary(io_lib:format("~w.~s", [Tier, id2str(Id)]))},
+ {observations, basho_stats_histogram:observations(Hist)},
+ {min, Min},
+ {max, Max},
+ {mean, Mean},
+ {sd, SD},
+ {quantile_25, basho_stats_histogram:quantile(0.250, Hist)},
+ {quantile_75, basho_stats_histogram:quantile(0.750, Hist)},
+ {quantile_99, basho_stats_histogram:quantile(0.990, Hist)},
+ {quantile_999, basho_stats_histogram:quantile(0.999, Hist)} |
+ extra_data()]}.
+
+now_to_seconds() ->
+ {MegaSeconds, Seconds, _} = now(),
+ MegaSeconds * 1000000 + Seconds.
+
+
+id2str({Resource, Action}) when is_atom(Resource) andalso is_atom(Action) ->
+ lists:flatten(io_lib:format("~w_~w", [Resource, Action]));
+id2str(Key) ->
+ Key.
@@ -0,0 +1,17 @@
+-module(kprof_app).
+
+-behaviour(application).
+
+%% Application callbacks
+-export([start/2, stop/1]).
+
+%% ===================================================================
+%% Application callbacks
+%% ===================================================================
+
+start(_StartType, _StartArgs) ->
+ application:start(ibrowse),
+ kprof_sup:start_link().
+
+stop(_State) ->
+ ok.
@@ -0,0 +1,26 @@
+-module(kprof_couchdb).
+
+-export([put_tier_timings/1]).
+
+-define(HEADERS, [{"Content-Type", "application/json"}]).
+
+put_tier_timings(Timings) ->
+ Body = jiffy:encode({[{docs, Timings}]}),
+ do_post(url(), Body).
+
+
+do_post(Url, Body) ->
+ Options = [{response_format, binary}],
+ case ibrowse:send_req(Url, ?HEADERS, post, Body, Options) of
+ {ok, _Status, _ResponseHeaders, ResponseBody} ->
+ Response = jiffy:decode(ResponseBody),
+ error_logger:info_msg("Response: ~p~n", [Response]),
+ ok;
+ {error, Reason} ->
+ error_logger:info_msg("error: ~p~n", [Reason]),
+ ok
+ end.
+
+
+url() ->
+ lists:flatten(["http://localhost:5984", "/kprof_couchapp/_bulk_docs"]).
Oops, something went wrong.

0 comments on commit c090887

Please sign in to comment.