Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
jprovidence committed Nov 14, 2011
1 parent b49155a commit e92c33d
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 21 deletions.
17 changes: 8 additions & 9 deletions src/httpu.erl
Expand Up @@ -22,12 +22,11 @@ get_http(Url) ->

%% -----------------------------------------------------------------------------------------

%% retrieves the content-type from http response headers

get_content_type(Headers) ->
lists:foldl(fun({Type, Val}, Acc) ->
case Type of
"content-type" -> Val;
_ -> Acc
end,
end, [], Headers)
%% http post

post_http(Url, Data) ->
JsonData = mochijson2:encode(Data),
http:request(post, {Url, [{"User-Agent", ?UA}], "text/json", JsonData}, [], []).



40 changes: 40 additions & 0 deletions src/interface.erl
@@ -0,0 +1,40 @@
-module(interface).
-export([start/1, wait/0]).

%% -----------------------------------------------------------------------------------------

%% starts the specified number of interfaces

start(Number) ->
Pid = spawn(?MODULE, wait, []),
start(Number - 1, [Pid]).

start(N, L) when N =/= 0 ->
Pid = spawn(?MODULE, wait, []),
start(N - 1, [Pid|L]);

start(N, L) when N == 0 ->
L.


%% -----------------------------------------------------------------------------------------

%% waits to provide an interface to the Simple REST api

wait() ->
receive
{From, large_batch_unvisited_urls} ->
{Headers, Body} = httpu:get_http("http://localhost:3000/detritus?size=large"),
BinaryBody = list_to_binary(Body),
Links = mochijson2:decode(BinaryBody),
From ! {self(), {large_batch_unvisited_urls, Links}};
{From, std_batch_unvisited_urls} ->
{Headers, Body} = httpu:get_http("http://localhost:3000/detritus?size=standard"),
BinaryBody = list_to_binary(Body),
Links = mochijson2:decode(BinaryBody),
From ! {self(), {std_batch_unvisited_urls, Links}};
{From, {store_feeds, Xml}} ->
{Response} = httpu:post_http("http://localhost:3000/arborage", Xml);
{From, {store_unvisited, Urls}} ->
{Response} = httpu:post_http("http://localhost:3000/rake", Urls)
end.
45 changes: 33 additions & 12 deletions src/master.erl
@@ -1,4 +1,5 @@
-module(master).
-export([start_system/1, wait/0, interface_distribute/0, pid_distribute/0]).

%% -----------------------------------------------------------------------------------------

Expand All @@ -16,7 +17,7 @@ start_system(NumCrawlers) ->

%% spawns a process which distributes individual interface pids in a cycle

spawn_interface_distributer(Ints),
spawn_interface_distributer(Ints) ->
Dist = spawn(?MODULE, interface_distribute, []),
Dist ! {self(), Ints},
Dist.
Expand All @@ -26,7 +27,7 @@ spawn_interface_distributer(Ints),

%% spawns a process which distributes individual crawler pids in a cycle

spawn_pid_distributer(Pids),
spawn_pid_distributer(Pids) ->
Dist = spawn(?MODULE, pid_distribute, []),
Dist ! {self(), Pids},
Dist.
Expand Down Expand Up @@ -54,7 +55,7 @@ interface_distribute(Ints, Index) ->
_ ->
interface_distribute(Ints, Index)
end,
ListLen = length(Ints)
ListLen = length(Ints),
case (ListLen - Index) of
0 -> interface_distribute(Ints, 1);
_ -> interface_distribute(Ints, (Index + 1))
Expand Down Expand Up @@ -98,7 +99,7 @@ pid_distribute(Pids, Index) ->
%% spawns the master

spawn_listener(PidDist, IntDist) ->
Listener = spawn(?MODULE, listen, []),
Listener = spawn(?MODULE, wait, []),
Listener ! {self(), {init, {PidDist, IntDist}}}.


Expand All @@ -122,14 +123,14 @@ spawn_interfaces(Num) ->

%% oversees the crawler system

listen() ->
wait() ->
receive
{From, {init, {PidDist, IntDist}}} ->
self() ! init,
listen(PidDist, IntDist)
wait(PidDist, IntDist)
end.

listen(PidDist, IntDist) ->
wait(PidDist, IntDist) ->
receive
%% initialization of all crawlers
init ->
Expand All @@ -149,7 +150,27 @@ listen(PidDist, IntDist) ->
submit_to_crawler(self(), PidDist, Urls);

%% store feeds

{From, {crawled, {NewLinks, Xml}}} ->
Str = spawn(?MODULE, store_crawl_results, []),
Str ! {self, {IntDist, NewLinks, Xml}}
end,
wait(PidDist, IntDist).


%% -----------------------------------------------------------------------------------------

%% handle storing results of web crawler

store_crawl_results() ->
receive
{From, {IntDist, NewLinks, Xml}} ->
IntDist ! {self(), int_request},
receive
{From, {requested_int, Int}} ->
Int ! {self(), {store_feeds, Xml}},
Int ! {self(), {store_unvisited, NewLinks}}
end
end.


%% -----------------------------------------------------------------------------------------
Expand All @@ -163,7 +184,7 @@ entask_crawlers() ->
Respondant = From
end,
receive
{From, {length_pids, L}} ->
{_, {length_pids, L}} ->
LengthPids = L
end,
LengthUrls = length(Urls),
Expand All @@ -177,16 +198,16 @@ entask_crawlers() ->
submit_to_crawler(Respondant, PidDist, [X|List]),
{1, []};
_ ->
{(Count + 1), [X|List]};
end.
{(Count + 1), [X|List]}
end
end, {1, []}, Urls).


%% -----------------------------------------------------------------------------------------

%% submits a list of urls to a crawler, given a crawler distributer

submit_to_crawler(Respondant, PidDist, List)
submit_to_crawler(Respondant, PidDist, List) ->
PidDist ! {self(), pid_request},
receive
{From, {requested_pid, Pid}} ->
Expand Down

0 comments on commit e92c33d

Please sign in to comment.