Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Implements a first draft of a visitor timeline. Small fixes after run…

…ning dialyzer.
  • Loading branch information...
commit f8a5cc01ac22f4043f96e12ac8f7c7008a7d492b 1 parent 156442c
@m2w authored
View
2  priv/scripts/parse_eval.py
@@ -38,7 +38,7 @@ def handle_path(self, path):
countries.add(gi.country_code_by_addr(ip))
ips.add(ip)
total_visits += 1
- return list(countries), list(ips), total_visits
+ return list(countries), list(ips), total_visits, log_timestamp
if __name__ == "__main__":
View
11 src/erli.hrl
@@ -1,16 +1,19 @@
-define(FLAG_LIMIT, 5). % number of reports before Target URLs are banned
-define(STAT_COLLECT_INTERVAL, 3600000). % time in ms between calls to the parser script
-define(MAX_CONFLICTS, 100). % the maximal number of attempts to generate a new URL path
--define(REQ_LIMIT, 5). % The number of requests allowed during a set timespan
+-define(REQ_LIMIT, 100000). % The number of requests allowed during a set timespan
-define(THROTTLE_TIME_SPAN, "hour"). % The intervall to which throttling
% rules are applied
% (e.g. X req / THROTTLE_TIME_SPAN)
-define(SCRIPT_NAME, "parse_eval.py").
--record(target, {target, paths=[], reported = 0, rep_num = 0}).
--record(path, {path, total_clicks = 0, unique_clicks = 0, country_lst = []}).
--record(visitor_ip,{visitor_ip, paths=[]}).
+-record(target, {target, paths=[], reported=0, rep_num=0}).
+-record(timeslots, {timeslots, night=0, morning=0, afternoon=0, evening=0}).
+-record(path, {path, total_clicks=0, unique_clicks=0, country_lst=[],
+ timeslot_visits=#timeslots{}}).
+-record(visitor_ip, {visitor_ip, paths=[]}).
+
View
4 src/erli_stats.erl
@@ -70,8 +70,8 @@ grab_path_stats(Port, [Path | RemPaths]) ->
{reschedule, Time} ->
grab_path_stats(Port, []),
{reschedule, Time};
- {Countries, UniqueIPs, ClickCount} ->
- erli_storage:update_path_stats(Path, Countries, UniqueIPs, ClickCount),
+ {Countries, UniqueIPs, ClickCount, TimeStamp} ->
+ erli_storage:update_path_stats(Path, Countries, UniqueIPs, ClickCount, TimeStamp),
grab_path_stats(Port, RemPaths)
end
after
View
24 src/erli_storage.erl
@@ -11,7 +11,7 @@
read/1,
delete/1,
path_list/0,
- update_path_stats/4]).
+ update_path_stats/5]).
-include_lib("stdlib/include/qlc.hrl").
-include("erli.hrl").
@@ -136,11 +136,11 @@ path_list() ->
%% @doc Update a shortened URL's visit statistics
%% @end
%%------------------------------------------------------------------------------
-update_path_stats(Path, Countries, UniqueIPs, ClickCount) ->
+update_path_stats(Path, Countries, UniqueIPs, ClickCount, {_Date, {H, _M, _S}}) ->
% grab the target record
{ok, #target{paths=Paths, _=_} = Target} = read(Path#path.path),
% filter out the important path (not doing this in the read call leaves it 'cheap'
- {[#path{country_lst=CL, total_clicks=TC, unique_clicks=UC, _=_}=ThePath],
+ {[#path{country_lst=CL, total_clicks=TC, unique_clicks=UC, timeslot_visits=TSV, _=_}=ThePath],
OtherPaths} =
lists:partition(fun(P) ->
Path =:= P
@@ -157,10 +157,12 @@ update_path_stats(Path, Countries, UniqueIPs, ClickCount) ->
sets:from_list(Countries)
)
),
+
% update the path stats
NewPath = ThePath#path{country_lst = CountryUnion,
unique_clicks = UC + UniqueClicks,
- total_clicks = TC + ClickCount},
+ total_clicks = TC + ClickCount,
+ timeslot_visits = classify_timeslot(H, ClickCount, TSV)},
NewTarget = Target#target{paths = [NewPath | OtherPaths]},
% flush to mnesia
mnesia:dirty_write(NewTarget).
@@ -168,6 +170,19 @@ update_path_stats(Path, Countries, UniqueIPs, ClickCount) ->
%%%=============================================================================
%%% Internal functions
%%%=============================================================================
+classify_timeslot(Hour, Clicks, TSV) ->
+ % classify the data according to a time-slot
+ case Hour of
+ H when H =< 6 ->
+ TSV#timeslots.night + Clicks;
+ H when H > 6, H =< 12 ->
+ TSV#timeslots.morning + Clicks;
+ H when H > 12, H =< 18 ->
+ TSV#timeslots.afternoon + Clicks;
+ H when H > 18 ->
+ TSV#timeslots.evening + Clicks
+ end.
+
is_unique_for_path(Path, IP) ->
case mnesia:dirty_read(visitor_ip, #visitor_ip{visitor_ip=IP, _='_'}) of
[] ->
@@ -237,7 +252,6 @@ make_target(TargetUrl, MatchingTarget) ->
%% the target URL.
%% @end
%%------------------------------------------------------------------------------
-
make_unique_path(TargetUrl) ->
make_unique_path(TargetUrl, 0).
make_unique_path(TargetUrl, NrOfHashConflicts) ->
View
2  src/path_resource.erl
@@ -150,7 +150,7 @@ from_json(RD, Ctx) ->
{true, RD, Target};
conflict ->
{{halt, 409}, RD, Ctx};
- {target_banned, _Target} ->
+ target_banned ->
{{halt, 410}, RD, Ctx}
end
end;
View
2  src/root_resource.erl
@@ -87,7 +87,7 @@ maybe_store(RD, Ctx) ->
case erli_storage:put(Ctx#target.target) of
error ->
{error, RD, Ctx}; % storage errors return a 500
- {target_banned, _Target} ->
+ target_banned ->
{{halt, 410}, RD, Ctx}; % banned target urls return a 410
{ok, Path} ->
NRD = wrq:set_resp_header("Location", Path#path.path, RD),
Please sign in to comment.
Something went wrong with that request. Please try again.