Skip to content

Commit

Permalink
Set a limit of parser recursion (Fix #68)
Browse files Browse the repository at this point in the history
Our parser uses recursion, so it causes stack overflow when parsing
deeply nested capture groups. E.g.:

  x2("(" * 32767 + "a" + ")" * 32767, "a", 0, 1)

Set a limit for this.
The default value is defined in regint.h:
* DEFAULT_PARSE_DEPTH_LIMIT (Currently 4096)

Also add two APIs to support this:
* onig_get_parse_depth_limit
* onig_set_parse_depth_limit
  • Loading branch information
k-takata committed Nov 28, 2016
1 parent 1678485 commit e7a614b
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 2 deletions.
16 changes: 15 additions & 1 deletion doc/API
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Onigmo (Oniguruma-mod) API Version 6.0.0 2016/11/14
Onigmo (Oniguruma-mod) API Version 6.0.0 2016/11/28

#include <onigmo.h>

Expand Down Expand Up @@ -673,6 +673,20 @@ Onigmo (Oniguruma-mod) API Version 6.0.0 2016/11/14
normal return: ONIG_NORMAL


# unsigned int onig_get_parse_depth_limit(void)

Return the maximum depth of parser recursion.
(default: DEFAULT_PARSE_DEPTH_LIMIT defined in regint.h. Currently 4096.)


# int onig_set_parse_depth_limit(unsigned int depth)

Set the maximum depth of parser recursion.
(depth = 0: Set to the default value defined in regint.h.)

normal return: ONIG_NORMAL


# int onig_end(void)

The use of this library is finished.
Expand Down
16 changes: 15 additions & 1 deletion doc/API.ja
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Onigmo インターフェース Version 6.0.0 2016/11/14
Onigmo インターフェース Version 6.0.0 2016/11/28

#include <onigmo.h>

Expand Down Expand Up @@ -681,6 +681,20 @@ Onigmo インターフェース Version 6.0.0 2016/11/14
正常終了戻り値: ONIG_NORMAL


# unsigned int onig_get_parse_depth_limit(void)

再帰パース処理の最大深さを返す。
(デフォルト: regint.h で定義されている DEFAULT_PARSE_DEPTH_LIMIT。現在は 4096)


# int onig_set_parse_depth_limit(unsigned int depth)

再帰パース処理の最大深さを指定する。
(depth = 0: regint.h で定義されたデフォルト値に設定する。)

正常終了戻り値: ONIG_NORMAL


# int onig_end(void)

ライブラリの使用を終了する。
Expand Down
5 changes: 5 additions & 0 deletions onigmo.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_UNDEFINED_BYTECODE -13
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
/* general error */
Expand Down Expand Up @@ -912,6 +913,10 @@ unsigned int onig_get_match_stack_limit_size(void);
ONIG_EXTERN
int onig_set_match_stack_limit_size(unsigned int size);
ONIG_EXTERN
unsigned int onig_get_parse_depth_limit(void);
ONIG_EXTERN
int onig_set_parse_depth_limit(unsigned int depth);
ONIG_EXTERN
int onig_end(void);
ONIG_EXTERN
const char* onig_version(void);
Expand Down
11 changes: 11 additions & 0 deletions onigmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ def ONIG_IS_PATTERN_ERROR(ecode):
ONIGERR_UNDEFINED_BYTECODE = -13
ONIGERR_UNEXPECTED_BYTECODE = -14
ONIGERR_MATCH_STACK_LIMIT_OVER = -15
ONIGERR_PARSE_DEPTH_LIMIT_OVER = -16
ONIGERR_DEFAULT_ENCODING_IS_NOT_SET = -21
ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = -22
# general error
Expand Down Expand Up @@ -596,6 +597,16 @@ def onig_error_code_to_str(err_buf, err_code, err_info=None):
libonig.onig_set_match_stack_limit_size.restype = ctypes.c_int
onig_set_match_stack_limit_size = libonig.onig_set_match_stack_limit_size

# onig_get_parse_depth_limit
libonig.onig_get_parse_depth_limit.argtypes = []
libonig.onig_get_parse_depth_limit.restype = ctypes.c_int
onig_get_parse_depth_limit = libonig.onig_get_parse_depth_limit

# onig_set_parse_depth_limit
libonig.onig_set_parse_depth_limit.argtypes = [ctypes.c_int]
libonig.onig_set_parse_depth_limit.restype = ctypes.c_int
onig_set_parse_depth_limit = libonig.onig_set_parse_depth_limit

# onig_end
libonig.onig_end.argtypes = []
onig_end = libonig.onig_end
Expand Down
2 changes: 2 additions & 0 deletions regerror.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ onig_error_code_to_format(OnigPosition code)
p = "unexpected bytecode (bug)"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
p = "parse depth limit over"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
p = "default multibyte-encoding is not set"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
Expand Down
1 change: 1 addition & 0 deletions regint.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@

#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
#define DEFAULT_PARSE_DEPTH_LIMIT 4096

#define OPT_EXACT_MAXLEN 24

Expand Down
29 changes: 29 additions & 0 deletions regparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,26 @@ extern void onig_set_verb_warn_func(OnigWarnFunc f)

static void CC_DUP_WARN(ScanEnv *env);


static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;

extern unsigned int
onig_get_parse_depth_limit(void)
{
return ParseDepthLimit;
}

extern int
onig_set_parse_depth_limit(unsigned int depth)
{
if (depth == 0)
ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
else
ParseDepthLimit = depth;
return 0;
}


static void
bbuf_free(BBuf* bbuf)
{
Expand Down Expand Up @@ -986,6 +1006,7 @@ scan_env_clear(ScanEnv* env)
env->curr_max_regnum = 0;
env->has_recursion = 0;
#endif
env->parse_depth = 0;
env->warnings_flag = 0;
}

Expand Down Expand Up @@ -4546,6 +4567,9 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;

env->parse_depth++;
if (env->parse_depth > ParseDepthLimit)
return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
prev_cc = asc_prev_cc = (CClassNode* )NULL;
*np = *asc_np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
Expand Down Expand Up @@ -4901,6 +4925,7 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
}
}
*src = p;
env->parse_depth--;
return 0;

err:
Expand Down Expand Up @@ -6212,6 +6237,9 @@ parse_subexp(Node** top, OnigToken* tok, int term,
Node *node, **headp;

*top = NULL;
env->parse_depth++;
if (env->parse_depth > ParseDepthLimit)
return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
r = parse_branch(&node, tok, term, src, end, env);
if (r < 0) {
onig_node_free(node);
Expand Down Expand Up @@ -6249,6 +6277,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
return ONIGERR_PARSER_BUG;
}

env->parse_depth--;
return r;
}

Expand Down
1 change: 1 addition & 0 deletions regparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ typedef struct {
int curr_max_regnum;
int has_recursion;
#endif
unsigned int parse_depth;
int warnings_flag;
#ifdef RUBY
const char* sourcefile;
Expand Down
10 changes: 10 additions & 0 deletions testpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1617,6 +1617,16 @@ def main():
n("^a*$", "a" * 2000 + "b", execerr=onigmo.ONIGERR_MATCH_STACK_LIMIT_OVER)
onigmo.onig_set_match_stack_limit_size(0)

# parse depth
parse_depth = onigmo.onig_get_parse_depth_limit()
print("Default parse depth:", parse_depth)
onigmo.onig_set_parse_depth_limit(1000)
print("New parse depth:", onigmo.onig_get_parse_depth_limit())
# These patterns need deep parse stack.
x2("(" * 200 + "a" + ")" * 200, "a", 0, 1)
n("(" * 2000 + "a" + ")" * 2000, "a", err=onigmo.ONIGERR_PARSE_DEPTH_LIMIT_OVER)
onigmo.onig_set_match_stack_limit_size(0)

# syntax functions
onigmo.onig_set_syntax_op(syntax_default,
onigmo.onig_get_syntax_op(onigmo.ONIG_SYNTAX_DEFAULT))
Expand Down

0 comments on commit e7a614b

Please sign in to comment.