Skip to content

Commit

Permalink
Suppress prefixes when rendering malicious CommonMark
Browse files Browse the repository at this point in the history
  • Loading branch information
nwellnhof committed Sep 17, 2021
1 parent 6fcf869 commit 86bbd43
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 39 deletions.
9 changes: 5 additions & 4 deletions src/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define CR() renderer->cr(renderer, false)
#define CR_OPT_PREFIX() renderer->cr(renderer, true)
#define BLANKLINE() renderer->blankline(renderer)
#define ENCODED_SIZE 20
#define LISTMARKER_SIZE 20
Expand Down Expand Up @@ -338,17 +339,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
if (!(CMARK_OPT_HARDBREAKS & options)) {
LIT(" ");
}
CR();
CR_OPT_PREFIX();
break;

case CMARK_NODE_SOFTBREAK:
if (CMARK_OPT_HARDBREAKS & options) {
LIT(" ");
CR();
CR_OPT_PREFIX();
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
!(CMARK_OPT_HARDBREAKS & options) &&
!(CMARK_OPT_NOBREAKS & options)) {
CR();
CR_OPT_PREFIX();
} else {
OUT(" ", allow_wrap, LITERAL);
}
Expand Down
2 changes: 1 addition & 1 deletion src/latex.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define CR() renderer->cr(renderer, false)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_STRING_SIZE 20

Expand Down
6 changes: 3 additions & 3 deletions src/man.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define CR() renderer->cr(renderer, false)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_SIZE 20

Expand All @@ -28,14 +28,14 @@ static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c,

switch (c) {
case 46:
if (renderer->begin_line) {
if (renderer->column == 0) {
cmark_render_ascii(renderer, "\\&.");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 39:
if (renderer->begin_line) {
if (renderer->column == 0) {
cmark_render_ascii(renderer, "\\&'");
} else {
cmark_render_code_point(renderer, c);
Expand Down
62 changes: 39 additions & 23 deletions src/render.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,43 @@
#include "node.h"
#include "cmark_ctype.h"

static CMARK_INLINE void S_cr(cmark_renderer *renderer) {
static CMARK_INLINE void S_cr(cmark_renderer *renderer, bool optional_prefix) {
if (renderer->need_cr < 1) {
renderer->need_cr = 1;
renderer->optional_prefix = optional_prefix;
}
}

static CMARK_INLINE void S_blankline(cmark_renderer *renderer) {
if (renderer->need_cr < 2) {
renderer->need_cr = 2;
renderer->optional_prefix = false;
}
}

static CMARK_INLINE void S_prefix(cmark_renderer *renderer) {
if (renderer->suppress_prefixes && renderer->optional_prefix) {
renderer->column = 0;
return;
}

bufsize_t size = renderer->prefix->size;

cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, size);
// note: this assumes prefix is ascii:
renderer->column = size;

renderer->total_prefix_size += size;
// Suppress prefixes if (running) total prefix size is larger
// than 75% of total output size to make sure that output size
// is bounded linearly. This should never be the case with
// real-world data, only with malicious or fuzzed input.
if (renderer->total_prefix_size > 100000 &&
renderer->total_prefix_size > renderer->buffer->size / 4 * 3) {
renderer->suppress_prefixes = true;
}
}

static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
cmark_escaping escape) {
int length = strlen(source);
Expand All @@ -39,23 +64,18 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
} else {
cmark_strbuf_putc(renderer->buffer, '\n');
if (renderer->need_cr > 1) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
S_prefix(renderer);
}
}
renderer->column = 0;
renderer->last_breakable = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->need_cr -= 1;
}

while (i < length) {
if (renderer->begin_line) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
// note: this assumes prefix is ascii:
renderer->column = renderer->prefix->size;
if (renderer->column == 0) {
S_prefix(renderer);
}

len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
Expand All @@ -64,11 +84,10 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
}
nextc = source[i + len];
if (c == 32 && wrap) {
if (!renderer->begin_line) {
if (renderer->column > 0) {
last_nonspace = renderer->buffer->size;
cmark_strbuf_putc(renderer->buffer, ' ');
renderer->column += 1;
renderer->begin_line = false;
renderer->begin_content = false;
// skip following spaces
while (source[i + 1] == ' ') {
Expand All @@ -85,12 +104,10 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
if (c == 10) {
cmark_strbuf_putc(renderer->buffer, '\n');
renderer->column = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->last_breakable = 0;
} else {
cmark_render_code_point(renderer, c);
renderer->begin_line = false;
// we don't set 'begin_content' to false til we've
// finished parsing a digit. Reason: in commonmark
// we need to escape a potential list marker after
Expand All @@ -100,15 +117,14 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
}
} else {
(renderer->outc)(renderer, escape, c, nextc);
renderer->begin_line = false;
renderer->begin_content =
renderer->begin_content && cmark_isdigit(c) == 1;
}

// If adding the character went beyond width, look for an
// earlier place where the line could be broken:
if (renderer->width > 0 && renderer->column > renderer->width &&
!renderer->begin_line && renderer->last_breakable > 0) {
renderer->last_breakable > 0) {

// copy from last_breakable to remainder
unsigned char *src = renderer->buffer->ptr +
Expand All @@ -122,13 +138,12 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
// add newline, prefix, and remainder
cmark_strbuf_putc(renderer->buffer, '\n');
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
renderer->optional_prefix = true;
S_prefix(renderer);
cmark_strbuf_put(renderer->buffer, remainder, remainder_len);
renderer->column = renderer->prefix->size + remainder_len;
renderer->column += remainder_len;
renderer->mem->free(remainder);
renderer->last_breakable = 0;
renderer->begin_line = false;
renderer->begin_content = false;
}

Expand Down Expand Up @@ -163,10 +178,11 @@ char *cmark_render(cmark_node *root, int options, int width,
cmark_iter *iter = cmark_iter_new(root);

cmark_renderer renderer = {options,
mem, &buf, &pref, 0, width,
0, 0, true, true, false,
false, NULL,
outc, S_cr, S_blankline, S_out};
mem, &buf, &pref,
0, width, 0, 0, 0,
true, false, false, false, false,
NULL, outc,
S_cr, S_blankline, S_out};

while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
Expand Down
6 changes: 4 additions & 2 deletions src/render.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ struct cmark_renderer {
int width;
int need_cr;
bufsize_t last_breakable;
bool begin_line;
bufsize_t total_prefix_size;
bool begin_content;
bool no_linebreaks;
bool in_tight_list_item;
bool suppress_prefixes;
bool optional_prefix;
struct block_number *block_number_in_list_item;
void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char);
void (*cr)(struct cmark_renderer *);
void (*cr)(struct cmark_renderer *, bool optional_prefix);
void (*blankline)(struct cmark_renderer *);
void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping);
};
Expand Down
25 changes: 19 additions & 6 deletions test/pathological_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,31 @@ def badhash(ref):
# re.compile("(\[0\] ){4999}"))
}

pathological_cmark = {
"lazy list":
("- " * 20000 + "a\n" * 20000,
re.compile("(( - )*a\n)+")),
"lazy blockquote":
(">" * 40000 + "a\n" * 20000,
re.compile("((> )*a\n)+")),
}

whitespace_re = re.compile('/s+/')

results = {'passed': [], 'errored': [], 'failed': [], 'ignored': []}

def run_pathological_test(description, results):
(inp, regex) = pathological[description]
[rc, actual, err] = cmark.to_html(inp)
if description in pathological:
(inp, regex) = pathological[description]
[rc, actual, err] = cmark.to_html(inp)
else:
(inp, regex) = pathological_cmark[description]
[rc, actual, err] = cmark.to_commonmark(inp)
extra = ""
if rc != 0:
print(description, '[ERRORED (return code %d)]' %rc)
print(err)
if allowed_failures[description]:
if description in allowed_failures:
results['ignored'].append(description)
else:
results['errored'].append(description)
Expand All @@ -130,14 +143,14 @@ def run_pathological_test(description, results):
else:
print(description, '[FAILED]')
print(repr(actual))
if allowed_failures[description]:
if description in allowed_failures:
results['ignored'].append(description)
else:
results['failed'].append(description)

def run_tests():
print("Testing pathological cases:")
for description in pathological:
for description in (*pathological, *pathological_cmark):
p = multiprocessing.Process(target=run_pathological_test,
args=(description, results,))
p.start()
Expand All @@ -146,7 +159,7 @@ def run_tests():
# kill it if still active
if p.is_alive():
print(description, '[TIMEOUT]')
if allowed_failures[description]:
if description in allowed_failures:
results['ignored'].append(description)
else:
results['errored'].append(description)
Expand Down

0 comments on commit 86bbd43

Please sign in to comment.