Skip to content

Commit

Permalink
Merge 956f680 into 2b2f5e7
Browse files Browse the repository at this point in the history
  • Loading branch information
WoelkiM committed Mar 9, 2020
2 parents 2b2f5e7 + 956f680 commit 76b675f
Show file tree
Hide file tree
Showing 49 changed files with 1,908 additions and 1,497 deletions.
51 changes: 51 additions & 0 deletions elvis.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
%linting and style rules
[{elvis,
[{config,
[#{dirs => ["apps/*/src", "src"],
filter => "*.erl",
rules => [{elvis_style, line_length,
#{ignore => [],
limit => 100,
skip_comments => false}},
{elvis_style, no_tabs},
{elvis_style, no_trailing_whitespace},
{elvis_style, macro_names, #{ignore => []}},
{elvis_style, macro_module_names},
{elvis_style, operator_spaces, #{rules => [{right, ","},
{right, "++"},
{left, "++"},
{right, "--"},
{left, "--"}]}},
%{elvis_style, god_modules,
%#{limit => 40,
% ignore => []}},
{elvis_style, used_ignored_variable},
{elvis_style, no_behavior_info},
{
elvis_style,
module_naming_convention,
#{regex => "^[a-z]([a-z0-9]*_?)*(_SUITE)?$",
ignore => []}
},
{
elvis_style,
function_naming_convention,
#{regex => "^[a-z]([a-z0-9]*_?)*$"} %base: ^([a-z][a-z0-9]*_?)*$
},
{elvis_style, state_record_and_type},
{elvis_style, no_spec_with_records}
]
},
#{dirs => ["."],
filter => "Makefile",
rules => [{elvis_project, no_deps_master_erlang_mk, #{ignore => []}},
{elvis_project, protocol_for_deps_erlang_mk, #{ignore => []}}]
},
#{dirs => ["."],
filter => "rebar.config",
rules => [{elvis_project, no_deps_master_rebar, #{ignore => []}},
{elvis_project, protocol_for_deps_rebar, #{ignore => []}}]
}
]
}]
}].
63 changes: 34 additions & 29 deletions src/basho_stats_histogram.erl
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@
-module(basho_stats_histogram).

-export([new/3,
update/2, update_all/2,
update/2,
update_all/2,
quantile/2,
counts/1,
observations/1,
summary_stats/1]).

-ifdef(EQC).
-export([prop_count/0, prop_quantile/0]).
-export([prop_count/0,
prop_quantile/0
]).
-endif.

-include("stats.hrl").
Expand Down Expand Up @@ -81,8 +84,6 @@ update(Value, Hist) ->
update_all(Values, Hist) ->
lists:foldl(fun(Value, H) -> update(Value, H) end,
Hist, Values).



%%
%% Estimate the quantile from the histogram. Quantile should be a value
Expand All @@ -92,7 +93,8 @@ quantile(_Quantile, #hist { n = 0 }) ->
'NaN';
quantile(Quantile, Hist)
when Quantile > 0; Quantile < 1 ->
%% Sort out how many complete samples we need to satisfy the requested quantile
%% Sort out how many complete samples we need to satisfy the
%%requested quantile
MaxSamples = Quantile * Hist#hist.n,

%% Now iterate over the bins, until we have gathered enough samples
Expand Down Expand Up @@ -147,7 +149,6 @@ which_bin(Value, Hist) ->
Bin
end.


quantile_itr(none, _Samples, _MaxSamples) ->
max;
quantile_itr({Bin, Counter, Itr2}, Samples, MaxSamples) ->
Expand All @@ -171,7 +172,6 @@ bin_count(Bin, Hist) ->
none ->
0
end.

%% ===================================================================
%% Unit Tests
%% ===================================================================
Expand All @@ -180,16 +180,16 @@ bin_count(Bin, Hist) ->

simple_test() ->
%% Pre-calculated tests
[7,0] = counts(update_all([10,10,10,10,10,10,14], new(10,18,2))).
[7, 0] = counts(update_all([10, 10, 10, 10, 10, 10, 14], new(10, 18, 2))).

-ifdef(EQC).


qc_count_check(Min, Max, Bins, Xs) ->
LCounts = counts(update_all(Xs, new(Min, Max, Bins))),
RCounts = basho_stats_utils:r_run(Xs,
?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
[Min, Max, Bins+1])),
?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
[Min, Max, Bins+1])),
case LCounts == RCounts of
true ->
true;
Expand All @@ -205,13 +205,14 @@ prop_count() ->
?LET(Xs, vector(Xlen, choose(Min, Max)),
?WHENFAIL(
begin
io:format("Min ~p, Max ~p, Bins ~p, Xs ~w~n",
[Min, Max, Bins, Xs]),
Command = ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
[Min, Max, Bins+1]),
InputStr = [integer_to_list(I) || I <- Xs],
io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])),
io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command]))
io:format("Min ~p, Max ~p, Bins ~p, Xs ~w~n",
[Min, Max, Bins, Xs]),
Command = ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
[Min, Max, Bins+1]),
InputStr = [integer_to_list(I) || I <- Xs],
io:format(?FMT("x <- c(~s)\n",
[string:join(InputStr, ",")])),
io:format(?FMT("write(~s, ncolumns=1,file=stdout())\n", [Command]))
end,
qc_count_check(Min, Max, Bins, Xs))))).

Expand All @@ -222,7 +223,8 @@ qc_quantile_check(Q, Min, Max, Bins, Xs) ->
Hist = new(Min, Max, Bins),
LCounts = counts(update_all(Xs, Hist)),
Lq = quantile(Q * 0.01, update_all(Xs, Hist)),
[Rq] = basho_stats_utils:r_run(Xs, ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01])),
[Rq] = basho_stats_utils:r_run(Xs,
?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01])),
case abs(Lq - Rq) < 1 of
true ->
true;
Expand All @@ -235,33 +237,36 @@ qc_quantile_check(Q, Min, Max, Bins, Xs) ->
end.

prop_quantile() ->
%% Loosey-goosey checking of the quantile estimation against R's more precise method.
%% Loosey-goosey checking of the quantile estimation
%% against R's more precise method.
%%
%% To ensure a minimal level of accuracy, we ensure that we have between 50-200 bins
%% To ensure a minimal level of accuracy,
%% we ensure that we have between 50-200 bins
%% and between 100-500 data points.
%%
%% TODO: Need to nail down the exact error bounds
%%
%% XXX since we try to generate the quantile from the histogram, not the
%% original data, our results and Rs don't always agree and this means the
%% test will occasionally fail. There's not an easy way to fix this.
?FORALL({Min, Bins, Xlen, Q}, {choose(1, 99), choose(50, 200), choose(100, 500),
choose(0,100)},
?FORALL({Min, Bins, Xlen, Q}, {choose(1, 99), choose(50, 200),
choose(100, 500), choose(0, 100)},
?LET(Max, choose(Min+1, 100),
?LET(Xs, vector(Xlen, choose(Min, Max)),
?WHENFAIL(
begin
io:format("Min ~p, Max ~p, Bins ~p, Q ~p, Xs ~w~n",
[Min, Max, Bins, Q, Xs]),
Command = ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01]),
InputStr = [integer_to_list(I) || I <- Xs],
io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])),
io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command]))
io:format("Min ~p, Max ~p, Bins ~p, Q ~p, Xs ~w~n",
[Min, Max, Bins, Q, Xs]),
Command = ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01]),
InputStr = [integer_to_list(I) || I <- Xs],
io:format(?FMT("x <- c(~s)\n",
[string:join(InputStr, ",")])),
io:format(?FMT("write(~s, ncolumns=1,file=stdout())\n", [Command]))
end,
qc_quantile_check(Q, Min, Max, Bins, Xs))))).

qc_quantile_test() ->
true = eqc:quickcheck(prop_quantile()).

-endif.
-endif.
-endif.
28 changes: 17 additions & 11 deletions src/basho_stats_sample.erl
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@
-module(basho_stats_sample).

-export([new/0,
update/2, update_all/2,
update/2,
update_all/2,
count/1,
min/1, mean/1, max/1,
variance/1, sdev/1,
min/1,
mean/1,
max/1,
variance/1,
sdev/1,
summary/1]).

-include("stats.hrl").
Expand All @@ -47,7 +51,7 @@

new() ->
#state{}.

update(Value, State) ->
State#state {
n = State#state.n + 1,
Expand Down Expand Up @@ -92,7 +96,6 @@ sdev(State) ->

summary(State) ->
{min(State), mean(State), max(State), variance(State), sdev(State)}.


%% ===================================================================
%% Internal functions
Expand All @@ -105,7 +108,7 @@ nan_min(V1, V2) -> erlang:min(V1, V2).
nan_max(V1, 'NaN') -> V1;
nan_max('NaN', V1) -> V1;
nan_max(V1, V2) -> erlang:max(V1, V2).


%% ===================================================================
%% Unit Tests
Expand All @@ -114,12 +117,14 @@ nan_max(V1, V2) -> erlang:max(V1, V2).
-ifdef(EUNIT).

simple_test() ->
%% A few hand-checked values
{1,3.0,5,2.5,1.5811388300841898} = summary(update_all([1,2,3,4,5], new())),
{1,5.5,10,15.0,3.872983346207417} = summary(update_all(lists:seq(1,10,3), new())).
%% A few hand-checked values
{1, 3.0, 5, 2.5, 1.5811388300841898} = summary(
update_all([1, 2, 3, 4, 5], new())),
{1, 5.5, 10, 15.0, 3.872983346207417} = summary(
update_all(lists:seq(1, 10, 3), new())).

empty_test() ->
{'NaN','NaN','NaN','NaN','NaN'} = summary(new()).
{'NaN', 'NaN', 'NaN', 'NaN', 'NaN'} = summary(new()).


-ifdef(EQC).
Expand All @@ -137,7 +142,8 @@ lists_equal([V1 | R1], [V2 | R2]) ->
prop_main() ->
?FORALL(Xlen, choose(2, 100),
?LET(Xs, vector(Xlen, int()),
lists_equal(basho_stats_utils:r_run(Xs,"c(min(x), mean(x), max(x), var(x), sd(x))"),
lists_equal(basho_stats_utils:r_run(Xs, "c(min(x), mean(x), max(x),
var(x), sd(x))"),
tuple_to_list(summary(update_all(Xs, new())))))).

qc_test() ->
Expand Down
6 changes: 4 additions & 2 deletions src/basho_stats_utils.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ r_run(Input, Command) ->
case r_port() of
{ok, Port} ->
InputStr = [integer_to_list(I) || I <- Input],
port_command(Port, ?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])),
port_command(Port, ?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command])),
port_command(Port, ?FMT("x <- c(~s)\n",
[string:join(InputStr, ",")])),
port_command(Port, ?FMT("write(~s, ncolumns=1, file=stdout())\n",
[Command])),
port_command(Port, "write('', file=stdout())\n"),
r_simple_read_loop(Port, []);
{error, Reason} ->
Expand Down
35 changes: 22 additions & 13 deletions src/bloom.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,28 @@
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved via the world wide web at http://www.erlang.org/.
%%
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%%
-module(bloom).
-author("Paulo Sergio Almeida <psa@di.uminho.pt>").
-export([sbf/1, sbf/2, sbf/3, sbf/4,
bloom/1, bloom/2,
member/2, add/2,
size/1, capacity/1]).
-export([is_element/2, add_element/2]). % alternative names
-export([sbf/1,
sbf/2,
sbf/3,
sbf/4,
bloom/1,
bloom/2,
member/2,
add/2,
size/1,
capacity/1]).

-export([is_element/2,
add_element/2]). % alternative names

-import(math, [log/1, pow/2]).

is_element(E, B) -> member(E, B).
Expand All @@ -25,7 +34,7 @@ add_element(E, B) -> add(E, B).
%% Scalable Bloom Filters
%% Paulo Sérgio Almeida, Carlos Baquero, Nuno Preguiça, David Hutchison
%% Information Processing Letters
%% Volume 101, Issue 6, 31 March 2007, Pages 255-261
%% Volume 101, Issue 6, 31 March 2007, Pages 255-261
%%
%% Provides scalable bloom filters that can grow indefinitely while
%% ensuring a desired maximum false positive probability. Also provides
Expand Down Expand Up @@ -204,9 +213,9 @@ fixed_case_test_() ->
fixed_case(Bloom, Size, FalseRate) ->
?assert(bloom:capacity(Bloom) > Size),
?assertEqual(0, bloom:size(Bloom)),
RandomList = simple_shuffle(lists:seq(1,100*Size), Size),
RandomList = simple_shuffle(lists:seq(1, 100*Size), Size),
[?assertEqual(false, bloom:is_element(E, Bloom)) || E <- RandomList],
Bloom2 =
Bloom2 =
lists:foldl(fun(E, Bloom0) ->
bloom:add_element(E, Bloom0)
end, Bloom, RandomList),
Expand All @@ -218,9 +227,9 @@ fixed_case(Bloom, Size, FalseRate) ->
scalable_case(Bloom, Size, FalseRate) ->
?assertEqual(infinity, bloom:capacity(Bloom)),
?assertEqual(0, bloom:size(Bloom)),
RandomList = simple_shuffle(lists:seq(1,100*Size), 10*Size),
RandomList = simple_shuffle(lists:seq(1, 100*Size), 10*Size),
[?assertEqual(false, bloom:is_element(E, Bloom)) || E <- RandomList],
Bloom2 =
Bloom2 =
lists:foldl(fun(E, Bloom0) ->
bloom:add_element(E, Bloom0)
end, Bloom, RandomList),
Expand All @@ -231,5 +240,5 @@ scalable_case(Bloom, Size, FalseRate) ->
bloom_test() ->
scalable_case(sbf(1000, 0.2), 1000, 0.2),
ok.

-endif.
Loading

0 comments on commit 76b675f

Please sign in to comment.