Skip to content

Commit

Permalink
[Text::Markdown] added module
Browse files Browse the repository at this point in the history
  • Loading branch information
Carl Masak committed Apr 21, 2012
0 parents commit f269131
Showing 1 changed file with 154 additions and 0 deletions.
154 changes: 154 additions & 0 deletions lib/Text/Markdown.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
module Text::Markdown;

class Text {
has $.text;
has @.children;
}

class TSpan {
has $.text;
has $.font-style = '';
has $.font-weight = '';
has $.font-family = '';
}

my %g_escape_table = <
\\ 28d397e87306b8631f3ed80d858d35f0
] 0fbd1776e1ad22c59a7080d35c7fd4db
{ f95b70fdc3088560732a5ac135644506
# 01abfc750a0c942167651c40d088531d
\> cedf8da05466bb54708268b3c694a78f
* 3389dae361af79b04c9c8e7057f60cc6
_ b14a7b8059d9c055954c92674ce60032
+ 26b17225b626fb9238849fd60eabdf60
- 336d5ebc5436534e61d16e63ddfca327
) 9371d7a2e3ae86a00aab4771e39d255d
. 5058f1af8388633f609cadb75a75dc9d
[ 815417267f76f6f460a4a61f9db75fdb
( 84c40473414caf2ed4a7b1283e48bbf4
` 833344d5e1432da82ef02e1301477ce8
} cbb184dd8e05c9709e5dcaedaa0495cf
! 9033e0e305f247c0c3c80d0c7848c8b3
>;

sub _EncodeCode($_ is copy) {
#
# Encode/escape certain characters inside Markdown code runs.
# The point is that in code, these characters are literals,
# and lose their special Markdown meanings.
#
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
s:g['&'] = '&amp';

# Do the angle bracket song and dance:
s:g['<'] = '&lt;';
s:g['>'] = '&gt;';

# Now, escape characters that are magic in Markdown:
s:g[ '*' ] = %g_escape_table<*>;
s:g[ '_' ] = %g_escape_table<_>;
s:g[ '{' ] = %g_escape_table<{>;
s:g[ '}' ] = %g_escape_table<}>;
s:g[ '[' ] = %g_escape_table<[>;
s:g[ ']' ] = %g_escape_table<]>;
s:g[ '\\' ] = %g_escape_table<\\>;

return $_;
}

sub _DoCodeSpans($text is copy) {
#
# * Backtick quotes are used for <code></code> spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
#
# There's no arbitrary limit to the number of backticks you
# can use as delimters. If you need three consecutive backticks
# in your code, use four for delimiters, etc.
#
# * You can use spaces to get literal backticks at the edges:
#
# ... type `` `bar` `` ...
#
# Turns to:
#
# ... type <code>`bar`</code> ...
#

$text ~~ s:g[ ('`'+) (.+?) <!after '`'> $0 <!before '`'> ] = (
my $c = $1.trim;
$c = _EncodeCode($c);
"<code>{$c}</code>"
);

return $text;
}

sub _DoItalicsAndBold($text is copy) {
# <strong> must go first:
$text ~~ s:g[ ('**'||'__') <?before \S> (.+?<[*_]>*) <?after \S> $0 ]
= "<strong>{$1}</strong>";

$text ~~ s:g[ ('*'||'_') <?before \S> (.+?) <?after \S> $0 ]
= "<em>{$1}</em>";

return $text;
}

sub _UnescapeSpecialChars($text) {
#
# Swap back in all the special characters we've hidden.
#
return $text.trans( [%g_escape_table.values] => [%g_escape_table.keys] );
}

sub extract_tspans($text) {
# XXX the below regex wouldn't work for e.g. <b><em><b>foo</b></em></b>
gather for $text.split(/'<'(\w+)'>'.*?'</'$0'>'/, :all) -> $normal, $taggy? {
take TSpan.new(:text($normal));
if $taggy {
# XXX highly specialized but works for our immediate purposes
$taggy.Str ~~ /^ ['<'(\w+)'>']+ (.*?) ['</'\w+'>']+ $/;
my @tags = $0».Str;
my $contents = $1;
my %attrs;
if any(@tags) eq 'em' { %attrs<font-style> = 'italics' }
if any(@tags) eq 'strong' { %attrs<font-weight> = 'bold' }
if any(@tags) eq 'code' { %attrs<font-family> = 'monospace' }
take TSpan.new(:text($contents), |%attrs);
}
}
}

grammar Markdown {
token TOP {
^ <paragraph>* % [\n\n+] $
{ make $<paragraph>».ast }
}

token paragraph {
[<!before \n\n> .]+
{
my $text = ~$/;
$text = _DoCodeSpans($text);
$text = _DoItalicsAndBold($text);
$text = _UnescapeSpecialChars($text);

my @children = extract_tspans($text);

make Text.new(:$text, :@children);
}
}
}

sub parse-markdown(Cool $text) is export {
Markdown.parse($text).ast.list;
}

0 comments on commit f269131

Please sign in to comment.