Skip to content

Commit

Permalink
create <html> if we see a HTML5 doctype
Browse files Browse the repository at this point in the history
  • Loading branch information
etrepum committed May 6, 2013
1 parent 13f9316 commit fa0b40e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 11 deletions.
32 changes: 21 additions & 11 deletions src/mochiweb_html.erl
Expand Up @@ -5,13 +5,14 @@
-module(mochiweb_html).
-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1,
escape_attr/1, to_html/1]).
-compile([export_all]).
-ifdef(TEST).
-export([destack/1, destack/2, is_singleton/1]).
-endif.

%% This is a macro to placate syntax highlighters..
-define(QUOTE, $\").
-define(SQUOTE, $\').
-define(QUOTE, $\"). %% $\"
-define(SQUOTE, $\'). %% $\'
-define(ADV_COL(S, N),
S#decoder{column=N+S#decoder.column,
offset=N+S#decoder.offset}).
Expand Down Expand Up @@ -66,18 +67,25 @@ parse(Input) ->
%% @doc Transform the output of tokens(Doc) into a HTML tree.
parse_tokens(Tokens) when is_list(Tokens) ->
%% Skip over doctype, processing instructions
F = fun (X) ->
case X of
{start_tag, _, _, false} ->
false;
_ ->
true
end
end,
[{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens),
[{start_tag, Tag, Attrs, false} | Rest] = find_document(Tokens, normal),
{Tree, _} = tree(Rest, [norm({Tag, Attrs})]),
Tree.

find_document(Tokens=[{start_tag, _Tag, _Attrs, false} | _Rest], Mode) ->
maybe_add_html_tag(Tokens, Mode);
find_document([{doctype, [<<"html">>]} | Rest], _Mode) ->
find_document(Rest, html5);
find_document([_T | Rest], Mode) ->
find_document(Rest, Mode);
find_document([], _Mode) ->
[].

maybe_add_html_tag(Tokens=[{start_tag, Tag, _Attrs, false} | _], html5)
when Tag =/= <<"html">> ->
[{start_tag, <<"html">>, [], false} | Tokens];
maybe_add_html_tag(Tokens, _Mode) ->
Tokens.

%% @spec tokens(StringOrBinary) -> [html_token()]
%% @doc Transform the input UTF-8 HTML into a token stream.
tokens(Input) ->
Expand Down Expand Up @@ -302,6 +310,8 @@ tokenize(B, S=#decoder{offset=O}) ->
case B of
<<_:O/binary, "<!--", _/binary>> ->
tokenize_comment(B, ?ADV_COL(S, 4));
<<_:O/binary, "<!doctype", _/binary>> ->
tokenize_doctype(B, ?ADV_COL(S, 10));
<<_:O/binary, "<!DOCTYPE", _/binary>> ->
tokenize_doctype(B, ?ADV_COL(S, 10));
<<_:O/binary, "<![CDATA[", _/binary>> ->
Expand Down
9 changes: 9 additions & 0 deletions test/mochiweb_html_tests.erl
Expand Up @@ -571,6 +571,15 @@ parse_unescaped_lt_test() ->
[<<"Back">>]}]},
mochiweb_html:parse(D2)).

html5_doctype_test() ->
?assertEqual(
[{doctype,[<<"html">>]},
{start_tag,<<"head">>,[],false},
{end_tag,<<"head">>},
{start_tag,<<"body">>,[],false},
{end_tag,<<"body">>}],
mochiweb_html:tokens("<!doctype html><head></head><body></body>")).

implicit_html_test() ->
%% https://github.com/mochi/mochiweb/issues/110
?assertEqual(
Expand Down

0 comments on commit fa0b40e

Please sign in to comment.