Permalink
Browse files

Optimize cowboy_protocol

* #state{} changes are avoided where possible
* #state{} is now smaller and use less memory
* the Req object is created only after the whole request is parsed
* parsing makes use of a single binary match context
* external calls are avoided in the critical path
* URL fragment is now extracted properly (retrieval API next commit)
* argument orders to local functions modified to avoid extra operations
* dispatching waits as long as possible before tokenizing host/path
* handler opts are no longer shown in the error messages except in init

The code may not look as beautiful as it was before. But it really
is, for parsing code. The parsing section of the file may be skipped
if your eyes start to burn.
  • Loading branch information...
1 parent bfab8d4 commit b2243aa54438dbea4137960c9dc6f54ac746f429 @essen essen committed Sep 26, 2012
Showing with 517 additions and 573 deletions.
  1. +2 −2 Makefile
  2. +117 −158 src/cowboy_dispatcher.erl
  3. +0 −51 src/cowboy_http.erl
  4. +368 −217 src/cowboy_protocol.erl
  5. +29 −37 src/cowboy_req.erl
  6. +0 −70 test/dispatcher_prop.erl
  7. +1 −1 test/http_SUITE.erl
  8. +0 −37 test/proper_SUITE.erl
View
@@ -43,10 +43,10 @@ eunit:
@$(REBAR) -C rebar.tests.config eunit skip_deps=true
ct:
- @$(REBAR) -C rebar.tests.config ct skip_deps=true suites=http,proper,ws
+ @$(REBAR) -C rebar.tests.config ct skip_deps=true suites=http,ws
intct:
- @$(REBAR) -C rebar.tests.config ct skip_deps=true suites=http,proper,ws,autobahn
+ @$(REBAR) -C rebar.tests.config ct skip_deps=true suites=http,ws,autobahn
# Dialyzer.
View
@@ -17,16 +17,14 @@
-module(cowboy_dispatcher).
%% API.
--export([split_host/1]).
--export([split_path/2]).
--export([match/3]).
+-export([match/4]).
--type bindings() :: list({atom(), binary()}).
--type tokens() :: list(binary()).
--type match_rule() :: '_' | '*' | list(binary() | '_' | '...' | atom()).
--type dispatch_path() :: list({match_rule(), module(), any()}).
+-type bindings() :: [{atom(), binary()}].
+-type tokens() :: [binary()].
+-type match_rule() :: '_' | '*' | [binary() | '_' | '...' | atom()].
+-type dispatch_path() :: [{match_rule(), module(), any()}].
-type dispatch_rule() :: {Host::match_rule(), Path::dispatch_path()}.
--type dispatch_rules() :: list(dispatch_rule()).
+-type dispatch_rules() :: [dispatch_rule()].
-export_type([bindings/0]).
-export_type([tokens/0]).
@@ -38,60 +36,6 @@
%% API.
-%% @doc Split a hostname into a list of tokens.
--spec split_host(binary())
- -> {tokens(), binary(), undefined | inet:port_number()}.
-split_host(Host) ->
- case binary:match(Host, <<":">>) of
- nomatch ->
- {do_split_host(Host, []), Host, undefined};
- {Pos, _} ->
- << Host2:Pos/binary, _:8, Port/bits >> = Host,
- {do_split_host(Host2, []), Host2,
- list_to_integer(binary_to_list(Port))}
- end.
-
-do_split_host(Host, Acc) ->
- case binary:match(Host, <<".">>) of
- nomatch when Host =:= <<>> ->
- Acc;
- nomatch ->
- [Host|Acc];
- {Pos, _} ->
- << Segment:Pos/binary, _:8, Rest/bits >> = Host,
- false = byte_size(Segment) == 0,
- do_split_host(Rest, [Segment|Acc])
- end.
-
-%% @doc Split a path into a list of path segments.
-%%
-%% Following RFC2396, this function may return path segments containing any
-%% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
-%% and part of a path segment.
--spec split_path(binary(), fun((binary()) -> binary())) ->
- {tokens(), binary(), binary()}.
-split_path(Path, URLDec) ->
- case binary:match(Path, <<"?">>) of
- nomatch ->
- {do_split_path(Path, URLDec), Path, <<>>};
- {Pos, _} ->
- << Path2:Pos/binary, _:8, Qs/bits >> = Path,
- {do_split_path(Path2, URLDec), Path2, Qs}
- end.
-
-do_split_path(<< "/", Path/bits >>, URLDec) ->
- do_split_path(Path, URLDec, []).
-do_split_path(Path, URLDec, Acc) ->
- case binary:match(Path, <<"/">>) of
- nomatch when Path =:= <<>> ->
- lists:reverse([URLDec(S) || S <- Acc]);
- nomatch ->
- lists:reverse([URLDec(S) || S <- [Path|Acc]]);
- {Pos, _} ->
- << Segment:Pos/binary, _:8, Rest/bits >> = Path,
- do_split_path(Rest, URLDec, [Segment|Acc])
- end.
-
%% @doc Match hostname tokens and path tokens against dispatch rules.
%%
%% It is typically used for matching tokens for the hostname and path of
@@ -119,47 +63,93 @@ do_split_path(Path, URLDec, Acc) ->
%% options found in the dispatch list, a key-value list of bindings and
%% the tokens that were matched by the <em>'...'</em> atom for both the
%% hostname and path.
--spec match(Host::tokens(), Path::tokens(), dispatch_rules())
+-spec match(dispatch_rules(), fun((binary()) -> binary()),
+ Host::binary() | tokens(), Path::binary())
-> {ok, module(), any(), bindings(),
HostInfo::undefined | tokens(),
PathInfo::undefined | tokens()}
| {error, notfound, host} | {error, notfound, path}.
-match(_Host, _Path, []) ->
+match([], _, _, _) ->
{error, notfound, host};
-match(_Host, Path, [{'_', PathMatchs}|_Tail]) ->
- match_path(Path, PathMatchs, [], undefined);
-match(Host, Path, [{HostMatch, PathMatchs}|Tail]) ->
- case list_match(Host, lists:reverse(HostMatch), []) of
+match([{'_', PathMatchs}|_Tail], URLDecode, _, Path) ->
+ match_path(PathMatchs, URLDecode, undefined, Path, []);
+match([{HostMatch, PathMatchs}|Tail], URLDecode, Tokens, Path)
+ when is_list(Tokens) ->
+ case list_match(Tokens, lists:reverse(HostMatch), []) of
false ->
- match(Host, Path, Tail);
- {true, HostBinds, undefined} ->
- match_path(Path, PathMatchs, HostBinds, undefined);
- {true, HostBinds, HostInfo} ->
- match_path(Path, PathMatchs, HostBinds, lists:reverse(HostInfo))
- end.
+ match(Tail, URLDecode, Tokens, Path);
+ {true, Bindings, undefined} ->
+ match_path(PathMatchs, URLDecode, undefined, Path, Bindings);
+ {true, Bindings, HostInfo} ->
+ match_path(PathMatchs, URLDecode, lists:reverse(HostInfo),
+ Path, Bindings)
+ end;
+match(Dispatch, URLDecode, Host, Path) ->
+ match(Dispatch, URLDecode, split_host(Host), Path).
--spec match_path(tokens(), dispatch_path(), bindings(),
- HostInfo::undefined | tokens())
+-spec match_path(dispatch_path(), fun((binary()) -> binary()),
+ HostInfo::undefined | tokens(), binary() | tokens(), bindings())
-> {ok, module(), any(), bindings(),
HostInfo::undefined | tokens(),
PathInfo::undefined | tokens()}
| {error, notfound, path}.
-match_path(_Path, [], _HostBinds, _HostInfo) ->
+match_path([], _, _, _, _) ->
{error, notfound, path};
-match_path(_Path, [{'_', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
- {ok, Handler, Opts, HostBinds, HostInfo, undefined};
-match_path('*', [{'*', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
- {ok, Handler, Opts, HostBinds, HostInfo, undefined};
-match_path(Path, [{PathMatch, Handler, Opts}|Tail], HostBinds, HostInfo) ->
- case list_match(Path, PathMatch, []) of
+match_path([{'_', Handler, Opts}|_Tail], _, HostInfo, _, Bindings) ->
+ {ok, Handler, Opts, Bindings, HostInfo, undefined};
+match_path([{'*', Handler, Opts}|_Tail], _, HostInfo, '*', Bindings) ->
+ {ok, Handler, Opts, Bindings, HostInfo, undefined};
+match_path([{PathMatch, Handler, Opts}|Tail], URLDecode, HostInfo, Tokens,
+ Bindings) when is_list(Tokens) ->
+ case list_match(Tokens, PathMatch, []) of
false ->
- match_path(Path, Tail, HostBinds, HostInfo);
+ match_path(Tail, URLDecode, HostInfo, Tokens, Bindings);
{true, PathBinds, PathInfo} ->
- {ok, Handler, Opts, HostBinds ++ PathBinds, HostInfo, PathInfo}
- end.
+ {ok, Handler, Opts, Bindings ++ PathBinds, HostInfo, PathInfo}
+ end;
+match_path(Dispatch, URLDecode, HostInfo, Path, Bindings) ->
+ match_path(Dispatch, URLDecode, HostInfo,
+ split_path(Path, URLDecode), Bindings).
%% Internal.
+%% @doc Split a hostname into a list of tokens.
+-spec split_host(binary()) -> tokens().
+split_host(Host) ->
+ split_host(Host, []).
+
+split_host(Host, Acc) ->
+ case binary:match(Host, <<".">>) of
+ nomatch when Host =:= <<>> ->
+ Acc;
+ nomatch ->
+ [Host|Acc];
+ {Pos, _} ->
+ << Segment:Pos/binary, _:8, Rest/bits >> = Host,
+ false = byte_size(Segment) == 0,
+ split_host(Rest, [Segment|Acc])
+ end.
+
+%% @doc Split a path into a list of path segments.
+%%
+%% Following RFC2396, this function may return path segments containing any
+%% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
+%% and part of a path segment.
+-spec split_path(binary(), fun((binary()) -> binary())) -> tokens().
+split_path(<< $/, Path/bits >>, URLDec) ->
+ split_path(Path, URLDec, []).
+
+split_path(Path, URLDec, Acc) ->
+ case binary:match(Path, <<"/">>) of
+ nomatch when Path =:= <<>> ->
+ lists:reverse([URLDec(S) || S <- Acc]);
+ nomatch ->
+ lists:reverse([URLDec(S) || S <- [Path|Acc]]);
+ {Pos, _} ->
+ << Segment:Pos/binary, _:8, Rest/bits >> = Path,
+ split_path(Rest, URLDec, [Segment|Acc])
+ end.
+
-spec list_match(tokens(), match_rule(), bindings())
-> {true, bindings(), undefined | tokens()} | false.
%% Atom '...' matches any trailing path, stop right now.
@@ -188,64 +178,32 @@ list_match(_List, _Match, _Binds) ->
split_host_test_() ->
%% {Host, Result}
Tests = [
- {<<"">>, {[], <<"">>, undefined}},
- {<<"*">>, {[<<"*">>], <<"*">>, undefined}},
+ {<<"">>, []},
+ {<<"*">>, [<<"*">>]},
{<<"cowboy.ninenines.eu">>,
- {[<<"eu">>, <<"ninenines">>, <<"cowboy">>],
- <<"cowboy.ninenines.eu">>, undefined}},
+ [<<"eu">>, <<"ninenines">>, <<"cowboy">>]},
{<<"ninenines.eu">>,
- {[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, undefined}},
- {<<"ninenines.eu:8080">>,
- {[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, 8080}},
+ [<<"eu">>, <<"ninenines">>]},
{<<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>,
- {[<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>,
- <<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>,
- <<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>,
- <<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>],
- <<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>,
- undefined}}
+ [<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>,
+ <<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>,
+ <<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>,
+ <<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>]}
],
[{H, fun() -> R = split_host(H) end} || {H, R} <- Tests].
-split_host_fail_test_() ->
- Tests = [
- <<".........">>,
- <<"ninenines..eu">>,
- <<"ninenines.eu:owns">>,
- <<"ninenines.eu: owns">>,
- <<"ninenines.eu:42fun">>,
- <<"ninenines.eu: 42fun">>,
- <<"ninenines.eu:42 fun">>,
- <<"ninenines.eu:fun 42">>,
- <<"ninenines.eu: 42">>,
- <<":owns">>,
- <<":42 fun">>
- ],
- [{H, fun() -> case catch split_host(H) of
- {'EXIT', _Reason} -> ok
- end end} || H <- Tests].
-
split_path_test_() ->
%% {Path, Result, QueryString}
Tests = [
- {<<"/?">>, [], <<"/">>, <<"">>},
- {<<"/???">>, [], <<"/">>, <<"??">>},
- {<<"/">>, [], <<"/">>, <<"">>},
- {<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>],
- <<"/extend//cowboy">>, <<>>},
- {<<"/users">>, [<<"users">>], <<"/users">>, <<"">>},
- {<<"/users?">>, [<<"users">>], <<"/users">>, <<"">>},
- {<<"/users?a">>, [<<"users">>], <<"/users">>, <<"a">>},
- {<<"/users/42/friends?a=b&c=d&e=notsure?whatever">>,
- [<<"users">>, <<"42">>, <<"friends">>],
- <<"/users/42/friends">>, <<"a=b&c=d&e=notsure?whatever">>},
- {<<"/users/a+b/c%21d?e+f=g+h">>,
- [<<"users">>, <<"a b">>, <<"c!d">>],
- <<"/users/a+b/c%21d">>, <<"e+f=g+h">>}
+ {<<"/">>, []},
+ {<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>]},
+ {<<"/users">>, [<<"users">>]},
+ {<<"/users/42/friends">>, [<<"users">>, <<"42">>, <<"friends">>]},
+ {<<"/users/a+b/c%21d">>, [<<"users">>, <<"a b">>, <<"c!d">>]}
],
URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
- [{P, fun() -> {R, RawP, Qs} = split_path(P, URLDecode) end}
- || {P, R, RawP, Qs} <- Tests].
+ [{P, fun() -> R = split_path(P, URLDecode) end}
+ || {P, R} <- Tests].
match_test_() ->
Dispatch = [
@@ -270,28 +228,31 @@ match_test_() ->
],
%% {Host, Path, Result}
Tests = [
- {[<<"any">>], [], {ok, match_any, [], []}},
- {[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>],
- [<<"users">>, <<"42">>, <<"mails">>],
+ {<<"any">>, <<"/">>, {ok, match_any, [], []}},
+ {<<"www.any.ninenines.eu">>, <<"/users/42/mails">>,
{ok, match_any_subdomain_users, [], []}},
- {[<<"eu">>, <<"ninenines">>, <<"www">>],
- [<<"users">>, <<"42">>, <<"mails">>], {ok, match_any, [], []}},
- {[<<"eu">>, <<"ninenines">>, <<"www">>], [], {ok, match_any, [], []}},
- {[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>],
- [<<"not_users">>, <<"42">>, <<"mails">>], {error, notfound, path}},
- {[<<"eu">>, <<"ninenines">>], [], {ok, match_extend, [], []}},
- {[<<"eu">>, <<"ninenines">>], [<<"users">>, <<"42">>, <<"friends">>],
+ {<<"www.ninenines.eu">>, <<"/users/42/mails">>,
+ {ok, match_any, [], []}},
+ {<<"www.ninenines.eu">>, <<"/">>,
+ {ok, match_any, [], []}},
+ {<<"www.any.ninenines.eu">>, <<"/not_users/42/mails">>,
+ {error, notfound, path}},
+ {<<"ninenines.eu">>, <<"/">>,
+ {ok, match_extend, [], []}},
+ {<<"ninenines.eu">>, <<"/users/42/friends">>,
{ok, match_extend_users_friends, [], [{id, <<"42">>}]}},
- {[<<"fr">>, <<"erlang">>], '_',
+ {<<"erlang.fr">>, '_',
{ok, match_erlang_ext, [], [{ext, <<"fr">>}]}},
- {[<<"any">>], [<<"users">>, <<"444">>, <<"friends">>],
+ {<<"any">>, <<"/users/444/friends">>,
{ok, match_users_friends, [], [{id, <<"444">>}]}},
- {[<<"fr">>, <<"ninenines">>], [<<"threads">>, <<"987">>],
+ {<<"ninenines.fr">>, <<"/threads/987">>,
{ok, match_duplicate_vars, [we, {expect, two}, var, here],
- [{var, <<"fr">>}, {var, <<"987">>}]}}
+ [{var, <<"fr">>}, {var, <<"987">>}]}}
],
+ URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
[{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() ->
- {ok, Handler, Opts, Binds, undefined, undefined} = match(H, P, Dispatch)
+ {ok, Handler, Opts, Binds, undefined, undefined}
+ = match(Dispatch, URLDecode, H, P)
end} || {H, P, {ok, Handler, Opts, Binds}} <- Tests].
match_info_test_() ->
@@ -304,24 +265,22 @@ match_info_test_() ->
]}
],
Tests = [
- {[<<"eu">>, <<"ninenines">>], [],
+ {<<"ninenines.eu">>, <<"/">>,
{ok, match_any, [], [], [], undefined}},
- {[<<"eu">>, <<"ninenines">>, <<"bugs">>], [],
+ {<<"bugs.ninenines.eu">>, <<"/">>,
{ok, match_any, [], [], [<<"bugs">>], undefined}},
- {[<<"eu">>, <<"ninenines">>, <<"bugs">>, <<"cowboy">>], [],
+ {<<"cowboy.bugs.ninenines.eu">>, <<"/">>,
{ok, match_any, [], [], [<<"cowboy">>, <<"bugs">>], undefined}},
- {[<<"eu">>, <<"ninenines">>, <<"www">>],
- [<<"pathinfo">>, <<"is">>, <<"next">>],
+ {<<"www.ninenines.eu">>, <<"/pathinfo/is/next">>,
{ok, match_path, [], [], undefined, []}},
- {[<<"eu">>, <<"ninenines">>, <<"www">>],
- [<<"pathinfo">>, <<"is">>, <<"next">>, <<"path_info">>],
+ {<<"www.ninenines.eu">>, <<"/pathinfo/is/next/path_info">>,
{ok, match_path, [], [], undefined, [<<"path_info">>]}},
- {[<<"eu">>, <<"ninenines">>, <<"www">>],
- [<<"pathinfo">>, <<"is">>, <<"next">>, <<"foo">>, <<"bar">>],
+ {<<"www.ninenines.eu">>, <<"/pathinfo/is/next/foo/bar">>,
{ok, match_path, [], [], undefined, [<<"foo">>, <<"bar">>]}}
],
+ URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
[{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() ->
- R = match(H, P, Dispatch)
+ R = match(Dispatch, URLDecode, H, P)
end} || {H, P, R} <- Tests].
-endif.
Oops, something went wrong.

0 comments on commit b2243aa

Please sign in to comment.