From 46b7688503364e4b49f9b03dc59d94cb334e6bbe Mon Sep 17 00:00:00 2001
From: Seth Falcon <seth@opscode.com>
Date: Sat, 2 Feb 2013 21:19:25 -0800
Subject: [PATCH] Handle explicit code blocks separately from edoc generated
 blocks

Explicit code blocks (e.g. ```CODE''' in edoc) use an intermediate
'pre' tag and are now rendered using Github Flavored Markdown "fenced"
code block style. Using the markdown code block as opposed to 'pre'
tags avoids needing to escape content in the block and slightly
improves the readability of the generated markdown.

The edoc generated function and type spec descriptions use a 'pre_pre'
tag. The code now assumes that the pre_pre elements contain code with
anchor tags for linking, but no other HTML tags. Special processing is
added to wrap the 'pre_pre' blocks in '<pre><code>' tags and to
translate angle brackets in the body of such tags to the appropriate
HTML entity (&gt;, &lt;).

This patch also adjusts newline behavior for generated markdown.
Paragraphs now begin/end with a single \n. As a result, the formatting
of ol and ul lists in markdown is improved.
---
 src/edown_xmerl.erl | 58 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 11 deletions(-)
diff --git a/src/edown_xmerl.erl b/src/edown_xmerl.erl
index 4661978..6db416e 100644
--- a/src/edown_xmerl.erl
+++ b/src/edown_xmerl.erl
@@ -72,13 +72,6 @@ brstrip(Str) -> re:replace(Str, "\\s+\\s\$", "", [global, multiline]).
 %% for an example. (By default, we always generate the end tag, to make
 %% sure that the scope of a markup is not extended by mistake.)
 
-'#element#'('pre_pre', Data, Attrs, Parents, E) ->
-    case re:run(Data, "<a href=", []) of
-	{match, _} ->
-	    '#element#'(pre, Data, Attrs, Parents, E);
-	nomatch ->
-	    '#element#'(pre, escape_pre(Data), Attrs, Parents, E)
-    end;
 '#element#'('esc_tt', Data, Attrs, Parents, E) ->
     case within_html(Parents) of
 	true ->
@@ -86,8 +79,6 @@ brstrip(Str) -> re:replace(Str, "\\s+\\s\$", "", [global, multiline]).
 	false ->
 	    '#element#'(tt, Data, Attrs, Parents, E)
     end;
-'#element#'('pre', Data, Attrs, Parents, E) ->
-    xmerl_html:'#element#'('pre', Data, Attrs, Parents, E);
 '#element#'('div', Data, _, _Parents, _E) ->
     %% special case - we use 'div' to enforce html encoding
     Data;
@@ -120,12 +111,44 @@ elem(Tag, Data, Attrs, Parents, E) ->
 escape_pre(Data) ->
     re:replace(re:replace(Data, "<", "\\&lt;", [global]), ">", "\\&gt;", [global]).
 
+%% Given content of a pre tag in `Data', entity escape angle brackets
+%% but leave anchor tags alone. This is less than pretty, but is
+%% useful for processing function descriptions with embedded links for
+%% type definitions.
+escape_pre_with_a(Data) ->
+    escape_pre_with_a(lists:flatten(Data), [], out).
+
+escape_pre_with_a([$<, $a | Rest], Acc, out) ->
+    escape_pre_with_a(Rest, ["<a" | Acc], in);
+escape_pre_with_a([$<, $/, $a, $> | Rest], Acc, in) ->
+    escape_pre_with_a(Rest, ["</a>"|Acc], out);
+escape_pre_with_a([C|Rest], Acc, in) ->
+    escape_pre_with_a(Rest, [C|Acc], in);
+escape_pre_with_a([$<|Rest], Acc, out) ->
+    escape_pre_with_a(Rest, ["&lt;"|Acc], out);
+escape_pre_with_a([$> | Rest], Acc, out) ->
+    escape_pre_with_a(Rest, ["&gt;"|Acc], out);
+escape_pre_with_a([C|Rest], Acc, out) ->
+    escape_pre_with_a(Rest, [C|Acc], out);
+escape_pre_with_a([], Acc, _) ->
+    lists:reverse(Acc).
+
 alias_for(Tag, Data, Attrs, Parents, E) ->
     xmerl_html:'#element#'(Tag, Data, Attrs, Parents, E#xmlElement{name = Tag}).
 
 html_elem(Tag, Data, Attrs, Parents, E) ->
     HTML = fun() ->
-		   xmerl_html:'#element#'(Tag, Data, Attrs, Parents, E)
+                   {Tag1, Data1} = case Tag of
+                                       pre_pre ->
+                                           %% If pre_pre is
+                                           %% encountered within other
+                                           %% HTML markup, just use
+                                           %% code, no pre.
+                                           {code, escape_pre_with_a(Data)};
+                                       T ->
+                                           {T, Data}
+                                   end,
+		   xmerl_html:'#element#'(Tag1, Data1, Attrs, Parents, E)
 	   end,
     case within_html(Parents) of
 	true ->
@@ -169,7 +192,7 @@ md_elem(Tag, Data, Attrs, Parents, E) ->
 	'div' -> Data;
 	ul    -> Data;
 	ol    -> Data;
-	p     -> ["\n\n", Data];
+	p     -> ["\n", Data, "\n"];
 	b     -> ["__", no_nl(Data), "__"];
 	em    -> ["_", no_nl(Data), "_"];
 	i     -> ["_", no_nl(Data), "_"];
@@ -195,6 +218,19 @@ md_elem(Tag, Data, Attrs, Parents, E) ->
 	h4 -> ["\n\n#### ", no_nl(Data), " ####\n"];
 	hr -> "---------\n";
 	head -> [];
+        pre_pre ->
+            %% markdown expects inline block-level HTML elements to be
+            %% separated from content by blank lines see
+            %% http://daringfireball.net/projects/markdown/syntax.
+            ["\n<pre><code>\n", escape_pre_with_a(Data), "\n</code></pre>\n"];
+        pre ->
+            %% github flavored markdown "fenced" code block. The
+            %% advantage of rendering literal blocks this way is that
+            %% the resulting markdown will be closer to how a human
+            %% would have written it and markdown rendering will take
+            %% care of entity escaping so that a code block describing
+            %% XML or HTML will get rendered properly.
+            ["\n```\n", Data, "\n```\n"];
 	_ ->
 		    ["\n",
 		     xmerl_lib:start_tag(Tag,Attrs),