Skip to content

Commit

Permalink
Improve RADIUS fail-over mechanism
Browse files Browse the repository at this point in the history
When RADIUS client process is started it collects all secondary RADIUS
servers from RADIUS server pools into ets table to use them for fail-over.

This commit also adds primary servers to this ets table. We need this
to avoid following situation:

A primary RADIUS server was used but eradius client failed to send requests
there by some reasons. So such RADIUS server will be marked as inactive for
a some configurable period of time and secondary RADIUS server will be used.
The issue is that eradius client could be used again with the primary server
while it is still in inactive state.

This commit adds all primary and secondary servers to an ets table that stores
endpoints of RADIUS servers that could be used. So if eradius client will be
called with currently inactive RADIUS server - it will not be used if there is
another possible active RADIUS server exists.

Backward compatibility is preserved and new functionality will be used only
in a case when fail-over is enabled.
  • Loading branch information
0xAX committed Jul 26, 2021
1 parent 919bda6 commit acafb5b
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 13 deletions.
60 changes: 53 additions & 7 deletions src/eradius_client.erl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
-export([start_link/0, send_request/2, send_request/3, send_remote_request/3, send_remote_request/4]).
%% internal
-export([reconfigure/0, send_remote_request_loop/8, find_suitable_peer/1,
restore_upstream_server/1]).
restore_upstream_server/1, store_radius_server_from_pool/3]).

-behaviour(gen_server).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
Expand Down Expand Up @@ -80,13 +80,35 @@ send_request({IP, Port, Secret}, Request, Options) when ?GOOD_CMD(Request) andal
TS1 = eradius_lib:timestamp(milli_seconds),
ServerName = proplists:get_value(server_name, Options, undefined),
MetricsInfo = make_metrics_info(Options, {IP, Port}),
update_client_requests(MetricsInfo),
Retries = proplists:get_value(retries, Options, ?DEFAULT_RETRIES),
Timeout = proplists:get_value(timeout, Options, ?DEFAULT_TIMEOUT),
Peer = {ServerName, {IP, Port}},
{Socket, ReqId} = gen_server:call(?SERVER, {wanna_send, Peer, MetricsInfo}),
Response = send_request_loop(Socket, ReqId, Peer, Request#radius_request{reqid = ReqId, secret = Secret}, Retries, Timeout, MetricsInfo),
proceed_response(Request, Response, Peer, TS1, MetricsInfo, Options);
SendReqFn = fun () ->
Peer = {ServerName, {IP, Port}},
update_client_requests(MetricsInfo),
{Socket, ReqId} = gen_server:call(?SERVER, {wanna_send, Peer, MetricsInfo}),
Response = send_request_loop(Socket, ReqId, Peer,
Request#radius_request{reqid = ReqId, secret = Secret},
Retries, Timeout, MetricsInfo),
proceed_response(Request, Response, Peer, TS1, MetricsInfo, Options)
end,
% If we have other RADIUS upstream servers check current one,
% maybe it is already marked as inactive and try to find another
% one
case proplists:get_value(failover, Options, []) of
[] ->
SendReqFn();
UpstreamServers ->
case find_suitable_peer([{IP, Port, Secret} | UpstreamServers]) of
[] ->
no_active_servers;
{{IP, Port, Secret}, _NewPool} ->
SendReqFn();
{NewPeer, NewPool} ->
% current server is not in list of active servers, so use another one
NewOptions = lists:keyreplace(failover, 1, Options, {failover, NewPool}),
send_request(NewPeer, Request, NewOptions)
end
end;
send_request({_IP, _Port, _Secret}, _Request, _Options) ->
error(badarg).

Expand Down Expand Up @@ -385,7 +407,9 @@ configure(State) ->
%% private
prepare_pools() ->
ets:new(?MODULE, [ordered_set, public, named_table, {keypos, 1}, {write_concurrency,true}]),
lists:foreach(fun({_PoolName, Servers}) -> prepare_pool(Servers) end, application:get_env(eradius, servers_pool, [])).
lists:foreach(fun({_PoolName, Servers}) -> prepare_pool(Servers) end, application:get_env(eradius, servers_pool, [])),
lists:foreach(fun(Server) -> store_upstream_servers(Server) end, application:get_env(eradius, servers, [])).

prepare_pool([]) -> ok;
prepare_pool([{Addr, Port, _, Opts} | Servers]) ->
Retries = proplists:get_value(retries, Opts, ?DEFAULT_RETRIES),
Expand All @@ -395,6 +419,28 @@ prepare_pool([{Addr, Port, _} | Servers]) ->
store_radius_server_from_pool(Addr, Port, ?DEFAULT_RETRIES),
prepare_pool(Servers).

store_upstream_servers({Server, _}) ->
store_upstream_servers(Server);
store_upstream_servers({Server, _, _}) ->
store_upstream_servers(Server);
store_upstream_servers(Server) ->
HandlerDefinitions = application:get_env(eradius, Server, []),
UpdatePoolFn = fun (HandlerOpts) ->
{DefaultRoute, Routes, Retries} = eradius_proxy:get_routes_info(HandlerOpts),
eradius_proxy:put_default_route_to_pool(DefaultRoute, Retries),
eradius_proxy:put_routes_to_pool(Routes, Retries)
end,
lists:foreach(fun (HandlerDefinition) ->
case HandlerDefinition of
{{_, []}, _} -> ok;
{{_, _, []}, _} -> ok;
{{_, HandlerOpts}, _} -> UpdatePoolFn(HandlerOpts);
{{_, _, HandlerOpts}, _} -> UpdatePoolFn(HandlerOpts);
_HandlerDefinition -> ok
end
end,
HandlerDefinitions).

%% private
store_radius_server_from_pool(Addr, Port, Retries) when is_tuple(Addr) and is_integer(Port) and is_integer(Retries) ->
ets:insert(?MODULE, {{Addr, Port}, Retries, Retries});
Expand Down
41 changes: 40 additions & 1 deletion src/eradius_proxy.erl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
-module(eradius_proxy).

-behaviour(eradius_server).
-export([radius_request/3, validate_arguments/1]).
-export([radius_request/3, validate_arguments/1, get_routes_info/1,
put_default_route_to_pool/2, put_routes_to_pool/2]).

-ifdef(TEST).
-export([resolve_routes/4, validate_options/1, new_request/3,
Expand All @@ -43,6 +44,7 @@
-define(DEFAULT_SEPARATOR, "@").
-define(DEFAULT_TIMEOUT, 5000).
-define(DEFAULT_RETRIES, 1).
-define(DEFAULT_CLIENT_RETRIES, 3).

-define(DEFAULT_OPTIONS, [{type, ?DEFAULT_TYPE},
{strip, ?DEFAULT_STRIP},
Expand Down Expand Up @@ -113,6 +115,10 @@ send_to_server(#radius_request{reqid = ReqID} = Request, {{Server, Port, Secret}
case eradius_client:send_request({Server, Port, Secret}, Request, [{failover, UpstreamServers} | Options]) of
{ok, Result, Auth} ->
decode_request(Result, ReqID, Secret, Auth);
no_active_servers ->
% If all RADIUS servers are marked as inactive for now just use
% just skip fail-over mechanism and use default given Peer
send_to_server(Request, {Server, Port, Secret}, Options);
Error ->
?LOG(error, "~p: error during send_request (~p)", [?MODULE, Error]),
Error
Expand Down Expand Up @@ -243,6 +249,39 @@ strip(Username, prefix, true, Separator) ->
route({RouteName, RouteRelay}) -> {RouteName, RouteRelay, undefined};
route({_RouteName, _RouteRelay, _Pool} = Route) -> Route.

get_routes_info(HandlerOpts) ->
DefaultRoute = lists:keyfind(default_route, 1, HandlerOpts),
Routes = lists:keyfind(routes, 1, HandlerOpts),
Options = lists:keyfind(options, 1, HandlerOpts),
Retries = case Options of
false ->
?DEFAULT_CLIENT_RETRIES;
{options, Opts} ->
proplists:get_value(retries, Opts, ?DEFAULT_CLIENT_RETRIES)
end,
{DefaultRoute, Routes, Retries}.

put_default_route_to_pool(false, _) -> ok;
put_default_route_to_pool({default_route, {Host, Port, _Secret}}, Retries) ->
eradius_client:store_radius_server_from_pool(Host, Port, Retries);
put_default_route_to_pool({default_route, {Host, Port, _Secret}, _PoolName}, Retries) ->
eradius_client:store_radius_server_from_pool(Host, Port, Retries);
put_default_route_to_pool(_, _) -> ok.

put_routes_to_pool(false, _Retries) -> ok;
put_routes_to_pool({routes, Routes}, Retries) ->
lists:foreach(fun (Route) ->
case Route of
{_RouteName, {Host, Port, _Secret}} ->
eradius_client:store_radius_server_from_pool(Host, Port, Retries);
{_RouteName, {Host, Port, _Secret}, _Pool} ->
eradius_client:store_radius_server_from_pool(Host, Port, Retries);
{Host, Port, _Secret, _Opts} ->
eradius_client:store_radius_server_from_pool(Host, Port, Retries);
_ -> ok
end
end, Routes).

get_proxy_opt(_, [], Default) -> Default;
get_proxy_opt(OptName, [{OptName, AddrOrRoutes} | _], _) -> AddrOrRoutes;
get_proxy_opt(OptName, [{OptName, Addr, Pool} | _], _) -> {Addr, Pool};
Expand Down
35 changes: 32 additions & 3 deletions test/eradius_client_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,27 @@
-include("test/eradius_test.hrl").

-define(BAD_SERVER_IP, {eradius_test_handler:localhost(ip), 1820, "secret"}).
-define(BAD_SERVER_TUPLE, {{eradius_test_handler:localhost(tuple), 1820}, 3, 3}).
-define(BAD_SERVER_INITIAL_RETRIES, 3).
-define(BAD_SERVER_TUPLE_INITIAL, {{eradius_test_handler:localhost(tuple), 1820},
?BAD_SERVER_INITIAL_RETRIES,
?BAD_SERVER_INITIAL_RETRIES}).
-define(BAD_SERVER_TUPLE, {{eradius_test_handler:localhost(tuple), 1820},
?BAD_SERVER_INITIAL_RETRIES - 1,
?BAD_SERVER_INITIAL_RETRIES}).
-define(BAD_SERVER_IP_ETS_KEY, {eradius_test_handler:localhost(tuple), 1820}).

-define(GOOD_SERVER_INITIAL_RETRIES, 3).
-define(GOOD_SERVER_TUPLE, {{eradius_test_handler:localhost(tuple), 1812},
?GOOD_SERVER_INITIAL_RETRIES,
?GOOD_SERVER_INITIAL_RETRIES}).
-define(GOOD_SERVER_2_TUPLE, {{{127, 0, 0, 2}, 1813},
?GOOD_SERVER_INITIAL_RETRIES,
?GOOD_SERVER_INITIAL_RETRIES}).

-define(RADIUS_SERVERS, [?GOOD_SERVER_TUPLE,
?BAD_SERVER_TUPLE_INITIAL,
?GOOD_SERVER_2_TUPLE]).

all() -> [
send_request,
wanna_send,
Expand All @@ -36,7 +54,8 @@ all() -> [
wanna_send,
reconf_ports_10,
wanna_send,
send_request_failover
send_request_failover,
check_upstream_servers
].

init_per_suite(Config) ->
Expand All @@ -57,6 +76,10 @@ init_per_testcase(send_request_failover, Config) ->
application:stop(eradius),
eradius_test_handler:start(),
Config;
init_per_testcase(check_upstream_servers, Config) ->
application:stop(eradius),
eradius_test_handler:start(),
Config;
init_per_testcase(_Test, Config) ->
Config.

Expand All @@ -66,6 +89,9 @@ end_per_testcase(send_request, Config) ->
end_per_testcase(send_request_failover, Config) ->
eradius_test_handler:stop(),
Config;
end_per_testcase(check_upstream_servers, Config) ->
eradius_test_handler:stop(),
Config;
end_per_testcase(_Test, Config) ->
Config.

Expand Down Expand Up @@ -218,8 +244,11 @@ reconf_ports_10(_Config) ->

send_request_failover(_Config) ->
?equal(accept, eradius_test_handler:send_request_failover(?BAD_SERVER_IP)),
?equal([], ets:lookup(eradius_client, ?BAD_SERVER_IP_ETS_KEY)),
{ok, Timeout} = application:get_env(eradius, unreachable_timeout),
timer:sleep(Timeout * 1000),
?equal([?BAD_SERVER_TUPLE], ets:lookup(eradius_client, ?BAD_SERVER_IP_ETS_KEY)),
ok.

check_upstream_servers(_Config) ->
?equal(?RADIUS_SERVERS, ets:tab2list(eradius_client)),
ok.
9 changes: 7 additions & 2 deletions test/eradius_test_handler.erl
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@ start() ->
application:set_env(eradius, client_ip, localhost(tuple)),
application:set_env(eradius, session_nodes, local),
application:set_env(eradius, one, [{{"ONE", []}, [{localhost(ip), "secret"}]}]),
application:set_env(eradius, servers, [{one, {localhost(ip), [1812]}}]),
application:set_env(eradius, two, [{{"TWO", [{default_route, {{127, 0, 0, 2}, 1813, <<"secret">>}}]},
[{localhost(ip), "secret"}]}]),
application:set_env(eradius, servers, [{one, {localhost(ip), [1812]}},
{two, {localhost(ip), [1813]}}]),
application:set_env(eradius, unreachable_timeout, 2),
application:set_env(eradius, servers_pool, [{test_pool, [{localhost(tuple), 1812, "secret"}]}]),
application:set_env(eradius, servers_pool, [{test_pool, [{localhost(tuple), 1812, "secret"},
% fake upstream server for fail-over
{localhost(tuple), 1820, "secret"}]}]),
application:ensure_all_started(eradius),
eradius:modules_ready([?MODULE]).

Expand Down

0 comments on commit acafb5b

Please sign in to comment.