Skip to content

Commit

Permalink
Introduce an overall limit to link. ref. defs instantiations.
Browse files Browse the repository at this point in the history
This is to prevent time and output size explosion in case of input
pattern generated by this:

    $ python -c 'N=1000; print("[x]: " + "x" * N + "\n[x]" * N)'

We roughly allow to blowing up the input size of the document
16 times by link reference definitions or up to 1 MB, whatever is
smaller. When the threashold is reached, following reference definitions
are sent to output unresolved as a text.

Fixes #238.
  • Loading branch information
mity committed Feb 7, 2024
1 parent ad8d411 commit 787d4b7
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
23 changes: 21 additions & 2 deletions src/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "md4c.h"

#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -143,6 +144,9 @@
#define SZ MD_SIZE
#define OFF MD_OFFSET

#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)

typedef struct MD_MARK_tag MD_MARK;
typedef struct MD_BLOCK_tag MD_BLOCK;
typedef struct MD_CONTAINER_tag MD_CONTAINER;
Expand Down Expand Up @@ -180,6 +184,7 @@ struct MD_CTX_tag {
int alloc_ref_defs;
void** ref_def_hashtable;
int ref_def_hashtable_size;
SZ max_ref_def_output;

/* Stack of inline/span markers.
* This is only used for parsing a single block contents but by storing it
Expand Down Expand Up @@ -2283,11 +2288,14 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
int is_multiline;
CHAR* label;
SZ label_size;
int ret;
int ret = FALSE;

MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
MD_ASSERT(CH(end-1) == _T(']'));

if(ctx->max_ref_def_output == 0)
return FALSE;

beg += (CH(beg) == _T('!') ? 2 : 1);
end--;

Expand Down Expand Up @@ -2315,7 +2323,17 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
if(is_multiline)
free(label);

ret = (def != NULL);
if(def != NULL) {
/* See https://github.com/mity/md4c/issues/238 */
MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
if(output_size_estimation < ctx->max_ref_def_output) {
ctx->max_ref_def_output -= output_size_estimation;
ret = TRUE;
} else {
MD_LOG("Too many link reference definition instantiations.");
ctx->max_ref_def_output = 0;
}
}

abort:
return ret;
Expand Down Expand Up @@ -6470,6 +6488,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
md_build_mark_char_map(&ctx);
ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);

/* Reset all mark stacks and lists. */
for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
Expand Down
5 changes: 4 additions & 1 deletion test/pathological-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@
"--ftables"),
"many broken links":
(("]([\n" * 50000),
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>"))
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>")),
"many link ref. def. instantiations":
(("[x]: " + "x" * 50000 + "\n[x]" * 50000),
re.compile(""))
}

whitespace_re = re.compile('/s+/')
Expand Down

0 comments on commit 787d4b7

Please sign in to comment.