Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce an overall limit to link. ref. defs instantiations. #239

Merged
merged 2 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Fixes:
Fix quadratic time behavior caused by one-by-one walking over block lines
instead of calling `md_lookup_line()`.

- [#237](https://github.com/mity/md4c/issues/237):
Fix quadratic time and output size behavior caused by malicious misuse of
link reference definitions.


## Version 0.5.2

Expand Down
23 changes: 21 additions & 2 deletions src/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "md4c.h"

#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -143,6 +144,9 @@
#define SZ MD_SIZE
#define OFF MD_OFFSET

#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)

typedef struct MD_MARK_tag MD_MARK;
typedef struct MD_BLOCK_tag MD_BLOCK;
typedef struct MD_CONTAINER_tag MD_CONTAINER;
Expand Down Expand Up @@ -180,6 +184,7 @@ struct MD_CTX_tag {
int alloc_ref_defs;
void** ref_def_hashtable;
int ref_def_hashtable_size;
SZ max_ref_def_output;

/* Stack of inline/span markers.
* This is only used for parsing a single block contents but by storing it
Expand Down Expand Up @@ -2283,11 +2288,14 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
int is_multiline;
CHAR* label;
SZ label_size;
int ret;
int ret = FALSE;

MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
MD_ASSERT(CH(end-1) == _T(']'));

if(ctx->max_ref_def_output == 0)
return FALSE;

beg += (CH(beg) == _T('!') ? 2 : 1);
end--;

Expand Down Expand Up @@ -2315,7 +2323,17 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
if(is_multiline)
free(label);

ret = (def != NULL);
if(def != NULL) {
/* See https://github.com/mity/md4c/issues/238 */
MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
if(output_size_estimation < ctx->max_ref_def_output) {
ctx->max_ref_def_output -= output_size_estimation;
ret = TRUE;
} else {
MD_LOG("Too many link reference definition instantiations.");
ctx->max_ref_def_output = 0;
}
}

abort:
return ret;
Expand Down Expand Up @@ -6470,6 +6488,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
md_build_mark_char_map(&ctx);
ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);

/* Reset all mark stacks and lists. */
for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
Expand Down
5 changes: 4 additions & 1 deletion test/pathological-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@
"--ftables"),
"many broken links":
(("]([\n" * 50000),
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>"))
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>")),
"many link ref. def. instantiations":
(("[x]: " + "x" * 50000 + "\n[x]" * 50000),
re.compile(""))
}

whitespace_re = re.compile('/s+/')
Expand Down