Skip to content

Commit

Permalink
Safer handling of code block languages
Browse files Browse the repository at this point in the history
  • Loading branch information
vmg committed Apr 22, 2011
1 parent e328f33 commit 3fe7a2c
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 29 deletions.
15 changes: 13 additions & 2 deletions ext/markdown.c
Expand Up @@ -24,6 +24,7 @@
#include <string.h>
#include <strings.h> /* for strncasecmp */
#include <ctype.h>
#include <stdio.h>

#define TEXT_UNIT 64 /* unit for the copy of the input buffer */
#define WORK_UNIT 64 /* block-level working buffer */
Expand Down Expand Up @@ -717,7 +718,8 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
link_b = i;

/* looking for link end: ' " ) */
while (i < size && data[i] != '\'' && data[i] != '"' && data[i] != ')')
while (i < size && data[i] != '\'' && data[i] != '"' &&
(data[i] != ')' || data[i - 1] == '\\'))
i++;

if (i >= size) goto cleanup;
Expand All @@ -728,7 +730,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
i++;
title_b = i;

while (i < size && data[i] != ')') i++;
while (i < size && (data[i] != ')' || data[i - 1] == '\\')) i++;
if (i >= size) goto cleanup;

/* skipping whitespaces after title */
Expand Down Expand Up @@ -958,6 +960,15 @@ is_codefence(char *data, size_t size, struct buf *syntax)
if (i == size || data[i] != '}')
return 0;

/* strip all whitespace at the beggining and the end
* of the {} block */
while (syn > 0 && isspace(syntax->data[0])) {
syntax->data++; syn--;
}

while (syn > 0 && isspace(syntax->data[syn - 1]))
syn--;

i++;
} else {
while (i < size && !isspace(data[i])) {
Expand Down
3 changes: 3 additions & 0 deletions ext/redcarpet.c
Expand Up @@ -49,6 +49,9 @@ static void rb_redcarpet__get_flags(VALUE ruby_obj,
if (rb_funcall(ruby_obj, rb_intern("hard_wrap"), 0) == Qtrue)
render_flags |= XHTML_HARD_WRAP;

if (rb_funcall(ruby_obj, rb_intern("gh_blockcode"), 0) == Qtrue)
render_flags |= XHTML_GITHUB_BLOCKCODE;

/**
* Markdown extensions -- all disabled by default
*/
Expand Down
77 changes: 54 additions & 23 deletions ext/xhtml.c
Expand Up @@ -136,38 +136,66 @@ rndr_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type, void *op
static void
rndr_blockcode(struct buf *ob, struct buf *text, struct buf *lang, void *opaque)
{
static char *sh_lang = "bash";
struct buf lang_shebang = {0, 0, 0, 0, 0};

if (ob->size) bufputc(ob, '\n');

/*
* Try to guess the language based on the shebang
*/
if (lang == NULL && text != NULL && text->size > 2) {
if (bufprefix(text, "#!/usr/bin/env ") == 0) {
size_t i = STRLEN("#!/usr/bin/env ");
if (lang && lang->size) {
size_t i = 0;
BUFPUTSL(ob, "<pre><code class=\"");

lang_shebang.data = text->data + i;
while (i < text->size && !isspace(text->data[i])) {
i++; lang_shebang.size++;
}
for (i = 0; i < lang->size; ++i) {
if (lang->data[i] == '.' && (i == 0 || isspace(lang->data[i - 1])))
continue;

lang = &lang_shebang;
} else if (bufprefix(text, "#!/bin/sh") == 0 && isspace(text->data[STRLEN("#!/bin/sh")])) {
lang_shebang.data = sh_lang;
lang_shebang.size = strlen(sh_lang);
lang = &lang_shebang;
bufputc(ob, lang->data[i]);
}
}

BUFPUTSL(ob, "\">");
} else
BUFPUTSL(ob, "<pre><code>");

if (text)
lus_attr_escape(ob, text->data, text->size);

BUFPUTSL(ob, "</code></pre>\n");
}

/*
* GitHub style code block:
*
* <pre lang="LANG"><code>
* ...
* </pre></code>
*
* Unlike other parsers, we store the language identifier in the <pre>,
* and don't let the user generate custom classes.
*
* The language identifier in the <pre> block gets postprocessed and all
* the code inside gets syntax highlighted with Pygments. This is much safer
* than letting the user specify a CSS class for highlighting.
*
* Note that we only generate HTML for the first specifier.
* E.g.
* ~~~~ {.python .numbered} => <pre lang="python"><code>
*/
static void
rndr_blockcode_github(struct buf *ob, struct buf *text, struct buf *lang, void *opaque)
{
if (ob->size) bufputc(ob, '\n');

if (lang && lang->size) {
BUFPUTSL(ob, "<pre><code class=\"");
size_t i = 0;
BUFPUTSL(ob, "<pre lang=\"");

for (; i < lang->size; ++i)
if (isspace(lang->data[i]))
break;

if (lang->data[0] == '.')
bufput(ob, lang->data + 1, lang->size - 1);
bufput(ob, lang->data + 1, i - 1);
else
bufput(ob, lang->data, lang->size);
BUFPUTSL(ob, "\">");
bufput(ob, lang->data, i);

BUFPUTSL(ob, "\"><code>");
} else
BUFPUTSL(ob, "<pre><code>");

Expand Down Expand Up @@ -745,6 +773,9 @@ ups_xhtml_renderer(struct mkd_renderer *renderer, unsigned int render_flags)

if (render_flags & XHTML_SMARTYPANTS)
renderer->normal_text = rndr_smartypants;

if (render_flags & XHTML_GITHUB_BLOCKCODE)
renderer->blockcode = rndr_blockcode_github;
}

void
Expand Down
1 change: 1 addition & 0 deletions ext/xhtml.h
Expand Up @@ -27,6 +27,7 @@ typedef enum {
XHTML_SAFELINK = (1 << 7),
XHTML_TOC = (1 << 8),
XHTML_HARD_WRAP = (1 << 9),
XHTML_GITHUB_BLOCKCODE = (1 << 10),
} render_mode;

extern void
Expand Down
5 changes: 4 additions & 1 deletion lib/redcarpet.rb
Expand Up @@ -26,7 +26,7 @@
# end
#
class Redcarpet
VERSION = '1.10.1'
VERSION = '1.11.0'

# Original Markdown formatted text.
attr_reader :text
Expand All @@ -52,6 +52,9 @@ class Redcarpet
# Disable superscript and relaxed emphasis processing.
attr_accessor :strict

# Generate safer HTML for code blocks (no custom CSS classes)
attr_accessor :gh_blockcode

# Don't make hyperlinks from <tt>[][]</tt> links that have unknown URL types.
attr_accessor :safelink

Expand Down
4 changes: 2 additions & 2 deletions redcarpet.gemspec
@@ -1,9 +1,9 @@
Gem::Specification.new do |s|
s.name = 'redcarpet'
s.version = '1.10.1'
s.version = '1.11.0'
s.summary = "Ruby bindings for libupskirt"
s.description = 'A fast and safe Markdown to (X)HTML parser'
s.date = '2011-04-22'
s.date = '2011-04-23'
s.email = 'vicent@github.com'
s.homepage = 'http://github.com/tanoku/redcarpet'
s.has_rdoc = true
Expand Down
12 changes: 12 additions & 0 deletions test/redcarpet_test.rb
Expand Up @@ -186,6 +186,18 @@ def test_that_fenced_flag_works
assert Redcarpet.new(text, :fenced_code).to_html =~ /<code/
end

def test_that_gh_blockcode_works
text = <<fenced
~~~~~ {.python .numbered}
This is some unsafe code block
with custom CSS classes
~~~~~
fenced

assert Redcarpet.new(text, :fenced_code).to_html =~ /<code class/
assert Redcarpet.new(text, :fenced_code, :gh_blockcode).to_html !~ /<code class/
end

def test_that_compat_is_working
rd = RedcarpetCompat.new(<<EOS)
aaa | bbbb
Expand Down
2 changes: 1 addition & 1 deletion upskirt
Submodule upskirt updated from f3fcab to ba88cb

0 comments on commit 3fe7a2c

Please sign in to comment.