Permalink
Browse files

Avoid crashing ddfs_master when calls to it from linked processes tim…

…eout. Should fix issue #312.
  • Loading branch information...
1 parent fb5fb31 commit 4f2eeb29943f7e67240ab51c16d6fb2b0ea24d23 @pmundkur committed Apr 24, 2012
Showing with 32 additions and 8 deletions.
  1. +32 −8 master/src/ddfs/ddfs_master.erl
@@ -397,21 +397,45 @@ do_get_tags(gc, Nodes) ->
end
end.
+% Timeouts in this call by the below processes can cause ddfs_master
+% itself to crash, since the processes are linked to it.
+-spec safe_get_read_nodes() -> {ok, [node()], non_neg_integer()} | error.
+safe_get_read_nodes() ->
+ try get_read_nodes() of
+ {ok, _ReadableNodes, _RBSize} = RN ->
+ RN;
+ E ->
+ lager:error("unexpected response retrieving readable nodes: ~p", [E]),
+ error
+ catch
+ K:E ->
+ lager:error("error retrieving readable nodes: ~p:~p", [K, E]),
+ error
+ end.
+
-spec monitor_diskspace() -> no_return().
monitor_diskspace() ->
- {ok, ReadableNodes, _RBSize} = get_read_nodes(),
- {Space, _F} = gen_server:multi_call(ReadableNodes,
- ddfs_node,
- get_diskspace,
- ?NODE_TIMEOUT),
- update_nodestats(gb_trees:from_orddict(lists:keysort(1, Space))),
+ case safe_get_read_nodes() of
+ {ok, ReadableNodes, _RBSize} ->
+ {Space, _F} = gen_server:multi_call(ReadableNodes,
+ ddfs_node,
+ get_diskspace,
+ ?NODE_TIMEOUT),
+ update_nodestats(gb_trees:from_orddict(lists:keysort(1, Space)));
+ error ->
+ ok
+ end,
timer:sleep(?DISKSPACE_INTERVAL),
monitor_diskspace().
-spec refresh_tag_cache_proc() -> no_return().
refresh_tag_cache_proc() ->
- {ok, ReadableNodes, RBSize} = get_read_nodes(),
- refresh_tag_cache(ReadableNodes, RBSize),
+ case safe_get_read_nodes() of
+ {ok, ReadableNodes, RBSize} ->
+ refresh_tag_cache(ReadableNodes, RBSize);
+ error ->
+ ok
+ end,
receive
refresh ->
ok

0 comments on commit 4f2eeb2

Please sign in to comment.