Browse files

Add cowboy_bstr:capitalize_token/1

For optional header name capitalization. See the guide section about it.
  • Loading branch information...
1 parent cd68070 commit 1b996794eedbfee87997cde8d05d8fae9548094a @essen essen committed Jan 17, 2013
Showing with 71 additions and 3 deletions.
  1. +24 −0 guide/internals.md
  2. +1 −0 guide/toc.md
  3. +46 −3 src/cowboy_bstr.erl
View
24 guide/internals.md
@@ -6,6 +6,30 @@ Architecture
@todo Describe.
+Lowercase header names
+----------------------
+
+For consistency reasons it has been chosen to convert all header names
+to lowercase binary strings. This prevents the programmer from making
+case mistakes, and is possible because header names are case insensitive.
+
+This works fine for the large majority of clients. However, some badly
+implemented clients, especially ones found in corporate code or closed
+source products, may not handle header names in a case insensitive manner.
+This means that when Cowboy returns lowercase header names, these clients
+will not find the headers they are looking for.
+
+A simple way to solve this is to create an `onresponse` hook that will
+format the header names with the expected case.
+
+``` erlang
+capitalize_hook(Status, Headers, Body, Req) ->
+ Headers2 = [{cowboy_bstr:capitalize_token(N), V}
+ || {N, V} <- Headers],
+ {ok, Req2} = cowboy_req:reply(State, Headers2, Body, Req),
+ Req2.
+```
+
Efficiency considerations
-------------------------
View
1 guide/toc.md
@@ -51,4 +51,5 @@ Cowboy User Guide
* Handler middleware
* [Internals](internals.md)
* Architecture
+ * Lowercase header names
* Efficiency considerations
View
49 src/cowboy_bstr.erl
@@ -16,16 +16,40 @@
-module(cowboy_bstr).
%% Binary strings.
+-export([capitalize_token/1]).
-export([to_lower/1]).
%% Characters.
-export([char_to_lower/1]).
-export([char_to_upper/1]).
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+%% @doc Capitalize a token.
+%%
+%% The first letter and all letters after a dash are capitalized.
+%% This is the form seen for header names in the HTTP/1.1 RFC and
+%% others. Note that using this form isn't required, as header name
+%% are case insensitive, and it is only provided for use with eventual
+%% badly implemented clients.
+-spec capitalize_token(B) -> B when B::binary().
+capitalize_token(B) ->
+ capitalize_token(B, true, <<>>).
+capitalize_token(<<>>, _, Acc) ->
+ Acc;
+capitalize_token(<< $-, Rest/bits >>, _, Acc) ->
+ capitalize_token(Rest, true, << Acc/binary, $- >>);
+capitalize_token(<< C, Rest/bits >>, true, Acc) ->
+ capitalize_token(Rest, false, << Acc/binary, (char_to_upper(C)) >>);
+capitalize_token(<< C, Rest/bits >>, false, Acc) ->
+ capitalize_token(Rest, false, << Acc/binary, (char_to_lower(C)) >>).
+
%% @doc Convert a binary string to lowercase.
--spec to_lower(binary()) -> binary().
-to_lower(L) ->
- << << (char_to_lower(C)) >> || << C >> <= L >>.

JFYI:

to_lower(Bin) -> << <<(if C >= $A andalso C =< $Z -> C bor 2#00100000; true -> C end)/integer>> || <> <= Bin >>.

@essen
Nine Nines member
essen added a note Jan 23, 2013

Quick testing shows it to be about the same or slightly slower. The optimization the current code performs is based on the observation that C >= $A andalso C =< $Z is slow and direct pattern match in a function clause is faster. So you may avoid the function call but at the same time the check is slower.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+-spec to_lower(B) -> B when B::binary().
+to_lower(B) ->
+ << << (char_to_lower(C)) >> || << C >> <= B >>.
%% @doc Convert [A-Z] characters to lowercase.
%% @end
@@ -88,3 +112,22 @@ char_to_upper($x) -> $X;
char_to_upper($y) -> $Y;
char_to_upper($z) -> $Z;
char_to_upper(Ch) -> Ch.
+
+%% Tests.
+
+-ifdef(TEST).
+
+capitalize_token_test_() ->
+ %% {Header, Result}
+ Tests = [
+ {<<"heLLo-woRld">>, <<"Hello-World">>},
+ {<<"Sec-Websocket-Version">>, <<"Sec-Websocket-Version">>},
+ {<<"Sec-WebSocket-Version">>, <<"Sec-Websocket-Version">>},
+ {<<"sec-websocket-version">>, <<"Sec-Websocket-Version">>},
+ {<<"SEC-WEBSOCKET-VERSION">>, <<"Sec-Websocket-Version">>},
+ {<<"Sec-WebSocket--Version">>, <<"Sec-Websocket--Version">>},
+ {<<"Sec-WebSocket---Version">>, <<"Sec-Websocket---Version">>}
+ ],
+ [{H, fun() -> R = capitalize_token(H) end} || {H, R} <- Tests].
+
+-endif.

1 comment on commit 1b99679

@rambocoder

+1 I like

Please sign in to comment.