| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| Unicode Properties (from Unicode Version: 12.1.0) | ||
|
|
||
| 15: ASCII_Hex_Digit | ||
| 16: Adlam | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| # First-pass fuzzing dictionary for Oniguruma by Mark Griffin | ||
| "\\o{17777777777}" | ||
| "\\777" | ||
| "\\u" | ||
| "\\uFFFF" | ||
| "\\xFF" | ||
| "\\x{70000000}" | ||
| "\\C-" | ||
| "\\M-\\C-" | ||
| "\\X" | ||
| "\\p{" | ||
| "\\p{^" | ||
| "}" | ||
| "]" | ||
| ")" | ||
| "\\n" | ||
| "\\r" | ||
| "\\R" | ||
| "\\W" | ||
| "\\w" | ||
| "\\s" | ||
| "\\S" | ||
| "\\d" | ||
| "\\O" | ||
| "\\X" | ||
| "\\b" | ||
| "\\y" | ||
| "\\Y" | ||
| "\\A" | ||
| "\\z" | ||
| "\\K" | ||
| "\\G" | ||
| "\\p{Print}" | ||
| "\\p{ASCII}" | ||
| "\\p{Alnum}" | ||
| "{0,2}" | ||
| "{3,}" | ||
| "{,3}" | ||
| "{5}" | ||
| "{4,2}" | ||
| "??" | ||
| "*?" | ||
| "+?" | ||
| "*+" | ||
| "{1,3}+" | ||
| "(?>" | ||
| "\\B" | ||
| "(?y{" | ||
| "[abcd1-9]" | ||
| "[\\w\\d" | ||
| "[\\p{Alphabetic}" | ||
| "[\\P{Arabic}" | ||
| "[\\x{ffff}" | ||
| "[a-w&&" | ||
| "[^" | ||
| "[:graph:]" | ||
| "[^:cntrl:]" | ||
| "(?i:" | ||
| "(?i)" | ||
| "(?m:" | ||
| "(?x:" | ||
| "(?W:" | ||
| "(?y-:" | ||
| "(?y{w}:" | ||
| "(?P:" | ||
| "(?#" | ||
| "(?:" | ||
| "(?=" | ||
| "(?!" | ||
| "(?<=" | ||
| "(?<!" | ||
| "(?>" | ||
| "(?<name>" | ||
| "(?{" | ||
| "(?{....}[x])" | ||
| "(?{.}[x]>)" | ||
| "(?{{{.}}})" | ||
| "(?~" | ||
| "(?~a)" | ||
| "(?~|a|.*)" | ||
| "(?~|(?:a|b))" | ||
| "(?~|)" | ||
| "(?(.) |.)" | ||
| "(?('-n'))" | ||
| "(?(n+0))" | ||
| "(?(n+1))" | ||
| "(?(n-1))" | ||
| "(?(<name+0>))" | ||
| "(?(<name+1>))" | ||
| "(?(<name-1>))" | ||
| "(*ERROR{-2000})" | ||
| "(*COUNT[tag]{X})" | ||
| "\\1" | ||
| "\\2" | ||
| "\\k<name>" | ||
| "\\k<1>" | ||
| "\\k<2>" | ||
| "\\k<-1>" | ||
| "\\k<-2>" | ||
| "\\k<name+0>" | ||
| "\\k<name+1>" | ||
| "\\k<name-1>" | ||
| "\\g<-1>" | ||
| "\\g<name>" | ||
| "name" | ||
| "(?<name>a|b\\g<name>c)" | ||
| "(?-i:\\g<name>)" | ||
| "\\N{name}" | ||
| "\\p{Hiragana}" | ||
| "\\p{Katakana}" | ||
| "\\p{Emoji}" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,239 @@ | ||
| /* | ||
| * deluxe-encode-harness.c | ||
| * contributed by Mark Griffin | ||
| */ | ||
| #include <stdio.h> | ||
| #include "oniguruma.h" | ||
|
|
||
| #include <stdlib.h> | ||
| #include <string.h> | ||
|
|
||
| #define DEFAULT_LIMIT 120 | ||
| typedef unsigned char uint8_t; | ||
|
|
||
| static int | ||
| search(regex_t* reg, unsigned char* str, unsigned char* end) | ||
| { | ||
| int r; | ||
| unsigned char *start, *range; | ||
| OnigRegion *region; | ||
|
|
||
| region = onig_region_new(); | ||
|
|
||
| start = str; | ||
| range = end; | ||
| r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); | ||
| if (r >= 0) { | ||
| int i; | ||
|
|
||
| fprintf(stdout, "match at %d (%s)\n", r, | ||
| ONIGENC_NAME(onig_get_encoding(reg))); | ||
| for (i = 0; i < region->num_regs; i++) { | ||
| fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); | ||
| } | ||
| } | ||
| else if (r == ONIG_MISMATCH) { | ||
| fprintf(stdout, "search fail (%s)\n", | ||
| ONIGENC_NAME(onig_get_encoding(reg))); | ||
| } | ||
| else { /* error */ | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); | ||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| return -1; | ||
| } | ||
|
|
||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| return 0; | ||
| } | ||
|
|
||
| static int | ||
| exec(OnigEncoding enc, OnigOptionType options, | ||
| char* apattern, char* apattern_end, char* astr, char* astr_end) | ||
| { | ||
| int r; | ||
| regex_t* reg; | ||
| OnigErrorInfo einfo; | ||
| UChar* pattern = (UChar* )apattern; | ||
| UChar* str = (UChar* )astr; | ||
| UChar* pattern_end = (UChar* )apattern_end; | ||
| unsigned char *end = (unsigned char* )astr_end; | ||
|
|
||
| onig_initialize(&enc, 1); | ||
| onig_set_retry_limit_in_match(DEFAULT_LIMIT); | ||
| onig_set_parse_depth_limit(DEFAULT_LIMIT); | ||
|
|
||
| r = onig_new(®, pattern, pattern_end, | ||
| options, enc, ONIG_SYNTAX_DEFAULT, &einfo); | ||
| if (r != ONIG_NORMAL) { | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r, &einfo); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| onig_end(); | ||
| return -1; | ||
| } | ||
|
|
||
| r = search(reg, str, end); | ||
|
|
||
| onig_free(reg); | ||
| onig_end(); | ||
| return 0; | ||
| } | ||
|
|
||
| static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; | ||
|
|
||
| static int | ||
| exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, | ||
| OnigOptionType options, char* apattern, char* apattern_end, | ||
| char* astr, char* astr_end) | ||
| { | ||
| int r; | ||
| regex_t* reg; | ||
| OnigCompileInfo ci; | ||
| OnigErrorInfo einfo; | ||
| UChar* pattern = (UChar* )apattern; | ||
| UChar* str = (UChar* )astr; | ||
| UChar* pattern_end = (UChar* )apattern_end; | ||
| unsigned char* end = (unsigned char* )astr_end; | ||
|
|
||
| onig_initialize(&str_enc, 1); | ||
| onig_set_retry_limit_in_match(DEFAULT_LIMIT); | ||
| onig_set_parse_depth_limit(DEFAULT_LIMIT); | ||
|
|
||
| ci.num_of_elements = 5; | ||
| ci.pattern_enc = pattern_enc; | ||
| ci.target_enc = str_enc; | ||
| ci.syntax = ONIG_SYNTAX_DEFAULT; | ||
| ci.option = options; | ||
| ci.case_fold_flag = CF; | ||
|
|
||
| r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); | ||
| if (r != ONIG_NORMAL) { | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r, &einfo); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| onig_end(); | ||
| return -1; | ||
| } | ||
|
|
||
| if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { | ||
| r = search(reg, str, end); | ||
| } | ||
|
|
||
| onig_free(reg); | ||
| onig_end(); | ||
| return 0; | ||
| } | ||
|
|
||
| #define PATTERN_SIZE 48 | ||
| #define NUM_CONTROL_BYTES 1 | ||
| #define MIN_STR_SIZE 2 | ||
| int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) | ||
| { | ||
| int r; | ||
| size_t remaining_size; | ||
| unsigned char *data; | ||
| unsigned char pat_encoding_choice; | ||
| unsigned char str_encoding_choice; | ||
| unsigned char *pattern; | ||
| unsigned char *str; | ||
| unsigned char *pattern_end; | ||
| unsigned char *str_end; | ||
| unsigned int num_encodings; | ||
| OnigEncodingType *pattern_enc; | ||
| OnigEncodingType *str_enc; | ||
|
|
||
| OnigEncodingType *encodings[] = { | ||
| ONIG_ENCODING_ASCII, | ||
| ONIG_ENCODING_ISO_8859_1, | ||
| ONIG_ENCODING_ISO_8859_2, | ||
| ONIG_ENCODING_ISO_8859_3, | ||
| ONIG_ENCODING_ISO_8859_4, | ||
| ONIG_ENCODING_ISO_8859_5, | ||
| ONIG_ENCODING_ISO_8859_6, | ||
| ONIG_ENCODING_ISO_8859_7, | ||
| ONIG_ENCODING_ISO_8859_8, | ||
| ONIG_ENCODING_ISO_8859_9, | ||
| ONIG_ENCODING_ISO_8859_10, | ||
| ONIG_ENCODING_ISO_8859_11, | ||
| ONIG_ENCODING_ISO_8859_13, | ||
| ONIG_ENCODING_ISO_8859_14, | ||
| ONIG_ENCODING_ISO_8859_15, | ||
| ONIG_ENCODING_ISO_8859_16, | ||
| ONIG_ENCODING_UTF8, | ||
| ONIG_ENCODING_UTF16_BE, | ||
| ONIG_ENCODING_UTF16_LE, | ||
| ONIG_ENCODING_UTF32_BE, | ||
| ONIG_ENCODING_UTF32_LE, | ||
| ONIG_ENCODING_EUC_JP, | ||
| ONIG_ENCODING_EUC_TW, | ||
| ONIG_ENCODING_EUC_KR, | ||
| ONIG_ENCODING_EUC_CN, | ||
| ONIG_ENCODING_SJIS, | ||
| //ONIG_ENCODING_KOI8, | ||
| ONIG_ENCODING_KOI8_R, | ||
| ONIG_ENCODING_CP1251, | ||
| ONIG_ENCODING_BIG5, | ||
| ONIG_ENCODING_GB18030, | ||
| }; | ||
|
|
||
| if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) | ||
| return 0; | ||
| if (Size > 0x1000) | ||
| return 0; | ||
|
|
||
| remaining_size = Size; | ||
| data = (unsigned char *)(Data); | ||
|
|
||
| // pull off bytes to switch off | ||
| pat_encoding_choice = data[0]; | ||
| data++; | ||
| remaining_size--; | ||
| str_encoding_choice = data[0]; | ||
| data++; | ||
| remaining_size--; | ||
|
|
||
| // copy first PATTERN_SIZE bytes off to be the pattern | ||
| pattern = (unsigned char *)malloc(PATTERN_SIZE+4); | ||
| memset(pattern, 0, PATTERN_SIZE+4); | ||
| memcpy(pattern, data, PATTERN_SIZE); | ||
| pattern_end = pattern + PATTERN_SIZE; | ||
| data += PATTERN_SIZE; | ||
| remaining_size -= PATTERN_SIZE; | ||
|
|
||
| str = (unsigned char*)malloc(remaining_size+4); | ||
| memset(str, 0, remaining_size+4); | ||
| memcpy(str, data, remaining_size); | ||
| str_end = str + remaining_size; | ||
|
|
||
| num_encodings = sizeof(encodings) / sizeof(encodings[0]); | ||
| pattern_enc = encodings[pat_encoding_choice % num_encodings]; | ||
| str_enc = encodings[str_encoding_choice % num_encodings]; | ||
|
|
||
| r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); | ||
|
|
||
| free(pattern); | ||
| free(str); | ||
|
|
||
| return r; | ||
| } | ||
|
|
||
|
|
||
| #ifdef WITH_READ_MAIN | ||
|
|
||
| #include <unistd.h> | ||
|
|
||
| extern int main(int argc, char* argv[]) | ||
| { | ||
| size_t n; | ||
| uint8_t Data[10000]; | ||
|
|
||
| n = read(0, Data, sizeof(Data)); | ||
| fprintf(stdout, "n: %ld\n", n); | ||
| LLVMFuzzerTestOneInput(Data, n); | ||
|
|
||
| return 0; | ||
| } | ||
| #endif /* WITH_READ_MAIN */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # dict_conv.py (Python3 script) | ||
|
|
||
| import sys | ||
|
|
||
| ENC_UTF16_BE = 1 | ||
| ENC_UTF16_LE = 2 | ||
|
|
||
| def add_char(enc, s, c): | ||
| if enc == ENC_UTF16_BE: | ||
| s += "\\x00" | ||
|
|
||
| s += c | ||
| if enc == ENC_UTF16_LE: | ||
| s += "\\x00" | ||
|
|
||
| return s | ||
|
|
||
| def conv(enc, s): | ||
| n = len(s) | ||
| r = "" | ||
| i = 0 | ||
| while i < n: | ||
| c = s[i] | ||
| if c == '\\': | ||
| c = s[i+1] | ||
| if c == '\\' or c == '"': | ||
| r = add_char(enc, r, "\\" + c) | ||
| i += 2 | ||
| continue | ||
| else: | ||
| raise("Unknown escape {0}".format(s)) | ||
|
|
||
| r = add_char(enc, r, c) | ||
| i += 1 | ||
|
|
||
| return r | ||
|
|
||
| def main(enc): | ||
| print("# This file was generated by dict_conv.py.") | ||
| for line in sys.stdin: | ||
| s = line.strip() | ||
| if s[0] == '#': | ||
| print(s) | ||
| continue | ||
|
|
||
| if s[0] == '"' and s[-1] == '"': | ||
| s = conv(enc, s[1:-1]) | ||
| print("\"{0}\"".format(s)) | ||
| else: | ||
| raise("Invalid format {0}".format(s)) | ||
|
|
||
| def usage(argv): | ||
| raise RuntimeError("Usage: python {0} utf16_be/utf16_le".format(argv[0])) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| argv = sys.argv | ||
| argc = len(argv) | ||
|
|
||
| if argc >= 2: | ||
| s = argv[1] | ||
| if s == 'utf16_be': | ||
| enc = ENC_UTF16_BE | ||
| elif s == 'utf16_le': | ||
| enc = ENC_UTF16_LE | ||
| else: | ||
| usage(argv) | ||
| else: | ||
| usage(argv) | ||
|
|
||
| main(enc) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,170 @@ | ||
| /* | ||
| * encode-harness.c | ||
| * contributed by Mark Griffin | ||
| */ | ||
| #include <stdio.h> | ||
| #include "oniguruma.h" | ||
|
|
||
| #include <stdlib.h> | ||
| #include <string.h> | ||
|
|
||
| #define PARSE_DEPTH_LIMIT 120 | ||
| #define RETRY_LIMIT 4000 | ||
|
|
||
| typedef unsigned char uint8_t; | ||
|
|
||
| static int | ||
| search(regex_t* reg, unsigned char* str, unsigned char* end) | ||
| { | ||
| int r; | ||
| unsigned char *start, *range; | ||
| OnigRegion *region; | ||
|
|
||
| region = onig_region_new(); | ||
|
|
||
| start = str; | ||
| range = end; | ||
| r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); | ||
| if (r >= 0) { | ||
| int i; | ||
|
|
||
| fprintf(stdout, "match at %d (%s)\n", r, | ||
| ONIGENC_NAME(onig_get_encoding(reg))); | ||
| for (i = 0; i < region->num_regs; i++) { | ||
| fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); | ||
| } | ||
| } | ||
| else if (r == ONIG_MISMATCH) { | ||
| fprintf(stdout, "search fail (%s)\n", | ||
| ONIGENC_NAME(onig_get_encoding(reg))); | ||
| } | ||
| else { /* error */ | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); | ||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| return -1; | ||
| } | ||
|
|
||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| return 0; | ||
| } | ||
|
|
||
| static int | ||
| exec(OnigEncoding enc, OnigOptionType options, | ||
| char* apattern, char* apattern_end, char* astr, UChar* end) | ||
| { | ||
| int r; | ||
| regex_t* reg; | ||
| OnigErrorInfo einfo; | ||
| UChar* pattern = (UChar* )apattern; | ||
| UChar* str = (UChar* )astr; | ||
| UChar* pattern_end = (UChar* )apattern_end; | ||
|
|
||
| onig_initialize(&enc, 1); | ||
| onig_set_retry_limit_in_match(RETRY_LIMIT); | ||
| onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); | ||
|
|
||
| r = onig_new(®, pattern, pattern_end, | ||
| options, enc, ONIG_SYNTAX_DEFAULT, &einfo); | ||
| if (r != ONIG_NORMAL) { | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r, &einfo); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| onig_end(); | ||
| return -1; | ||
| } | ||
|
|
||
| if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { | ||
| r = search(reg, str, end); | ||
| } | ||
|
|
||
| onig_free(reg); | ||
| onig_end(); | ||
| return 0; | ||
| } | ||
|
|
||
| #define PATTERN_SIZE 32 | ||
| #define NUM_CONTROL_BYTES 1 | ||
| #define MIN_STR_SIZE 1 | ||
| int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) | ||
| { | ||
| if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) | ||
| return 0; | ||
| if (Size > 0x1000) | ||
| return 0; | ||
|
|
||
| unsigned char *pattern_end; | ||
| unsigned char *str_null_end; | ||
|
|
||
| size_t remaining_size = Size; | ||
| unsigned char *data = (unsigned char *)(Data); | ||
|
|
||
| // pull off one byte to switch off | ||
| unsigned char encoding_choice = data[0]; | ||
| data++; | ||
| remaining_size--; | ||
|
|
||
| // copy first PATTERN_SIZE bytes off to be the pattern | ||
| unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); | ||
| memset(pattern, 0, PATTERN_SIZE+4); | ||
| memcpy(pattern, data, PATTERN_SIZE); | ||
| pattern_end = pattern + PATTERN_SIZE; | ||
| data += PATTERN_SIZE; | ||
| remaining_size -= PATTERN_SIZE; | ||
|
|
||
| unsigned char *str = (unsigned char*)malloc(remaining_size+4); | ||
| memset(str, 0, remaining_size+4); | ||
| memcpy(str, data, remaining_size); | ||
| str_null_end = str + remaining_size; | ||
|
|
||
| int r; | ||
| OnigEncodingType *encodings[] = { | ||
| ONIG_ENCODING_SJIS, | ||
| ONIG_ENCODING_EUC_JP, | ||
| ONIG_ENCODING_CP1251, | ||
| ONIG_ENCODING_ISO_8859_1, | ||
| ONIG_ENCODING_UTF8, | ||
| ONIG_ENCODING_KOI8_R, | ||
| ONIG_ENCODING_BIG5 | ||
| }; | ||
|
|
||
| OnigEncodingType *enc; | ||
|
|
||
| #ifdef UTF16_BE | ||
| enc = ONIG_ENCODING_UTF16_BE; | ||
| #else | ||
| #ifdef UTF16_LE | ||
| enc = ONIG_ENCODING_UTF16_LE; | ||
| #else | ||
| int num_encodings = sizeof(encodings)/sizeof(encodings[0]); | ||
| enc = encodings[encoding_choice % num_encodings]; | ||
| #endif | ||
| #endif | ||
|
|
||
| r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, | ||
| (char *)str, str_null_end); | ||
|
|
||
| free(pattern); | ||
| free(str); | ||
|
|
||
| return r; | ||
| } | ||
|
|
||
| #ifdef WITH_READ_MAIN | ||
|
|
||
| #include <unistd.h> | ||
|
|
||
| extern int main(int argc, char* argv[]) | ||
| { | ||
| size_t n; | ||
| uint8_t Data[10000]; | ||
|
|
||
| n = read(0, Data, sizeof(Data)); | ||
| fprintf(stdout, "n: %ld\n", n); | ||
| LLVMFuzzerTestOneInput(Data, n); | ||
|
|
||
| return 0; | ||
| } | ||
| #endif /* WITH_READ_MAIN */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| /* | ||
| * syntax-harness.c | ||
| * contributed by Mark Griffin | ||
| */ | ||
| #include <stdio.h> | ||
| #include <string.h> | ||
| #include "oniguruma.h" | ||
|
|
||
| #include <stdlib.h> | ||
|
|
||
| #define DEFAULT_LIMIT 120 | ||
| typedef unsigned char uint8_t; | ||
|
|
||
| extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) | ||
| { | ||
| int r; | ||
| unsigned char *start, *range, *end; | ||
| regex_t* reg; | ||
| OnigErrorInfo einfo; | ||
| OnigRegion *region; | ||
| UChar* pattern = (UChar* )apattern; | ||
| UChar* str = (UChar* )astr; | ||
|
|
||
| r = onig_new(®, pattern, pattern + strlen((char* )pattern), | ||
| ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); | ||
| if (r != ONIG_NORMAL) { | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r, &einfo); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| return -1; | ||
| } | ||
|
|
||
| region = onig_region_new(); | ||
|
|
||
| end = str + strlen((char* )str); | ||
| start = str; | ||
| range = end; | ||
| r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); | ||
| if (r >= 0) { | ||
| int i; | ||
|
|
||
| fprintf(stdout, "match at %d\n", r); | ||
| for (i = 0; i < region->num_regs; i++) { | ||
| fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); | ||
| } | ||
| } | ||
| else if (r == ONIG_MISMATCH) { | ||
| fprintf(stdout, "search fail\n"); | ||
| } | ||
| else { /* error */ | ||
| char s[ONIG_MAX_ERROR_MESSAGE_LEN]; | ||
| onig_error_code_to_str((UChar* )s, r); | ||
| fprintf(stdout, "ERROR: %s\n", s); | ||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| onig_free(reg); | ||
| return -1; | ||
| } | ||
|
|
||
| onig_region_free(region, 1 /* 1:free self, 0:free contents only */); | ||
| onig_free(reg); | ||
| return 0; | ||
| } | ||
|
|
||
| #define PATTERN_SIZE 64 | ||
| #define NUM_CONTROL_BYTES 1 | ||
| #define MIN_STR_SIZE 1 | ||
| int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) | ||
| { | ||
| if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) | ||
| return 0; | ||
| if (Size > 0x1000) | ||
| return 0; | ||
| size_t remaining_size = Size; | ||
| unsigned char *data = (unsigned char *)(Data); | ||
|
|
||
| // pull off one byte to switch syntax choice | ||
| unsigned char syntax_choice = data[0]; | ||
| data++; | ||
| remaining_size--; | ||
|
|
||
| // copy first PATTERN_SIZE bytes off to be the pattern | ||
| unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1); | ||
| memset(pattern, 0, PATTERN_SIZE+1); | ||
| memcpy(pattern, data, PATTERN_SIZE); | ||
| data += PATTERN_SIZE; | ||
| remaining_size -= PATTERN_SIZE; | ||
|
|
||
| unsigned char *str = (unsigned char*)malloc(remaining_size+1); | ||
| memset(str, 0, remaining_size+1); | ||
| memcpy(str, data, remaining_size); | ||
|
|
||
| OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; | ||
| onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); | ||
|
|
||
| onig_set_retry_limit_in_match(DEFAULT_LIMIT); | ||
| onig_set_parse_depth_limit(DEFAULT_LIMIT); | ||
|
|
||
| OnigSyntaxType *syntaxes[] = { | ||
| ONIG_SYNTAX_POSIX_EXTENDED, | ||
| ONIG_SYNTAX_EMACS, | ||
| ONIG_SYNTAX_GREP, | ||
| ONIG_SYNTAX_GNU_REGEX, | ||
| ONIG_SYNTAX_JAVA, | ||
| ONIG_SYNTAX_PERL_NG, | ||
| ONIG_SYNTAX_RUBY, | ||
| ONIG_SYNTAX_ONIGURUMA, | ||
| }; | ||
| OnigSyntaxType *syntax = syntaxes[syntax_choice % 8]; | ||
|
|
||
| int r; | ||
| r = exec(syntax, (char *)pattern, (char *)str); | ||
| // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); | ||
|
|
||
| onig_end(); | ||
|
|
||
| free(pattern); | ||
| free(str); | ||
|
|
||
| return 0; | ||
| } |