diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 40cbedd4df6a68..703f38f5907153 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,9 +744,8 @@ typedef struct { typedef struct { int lower; int upper; - /* These fields are for cache optimization. */ - int base_point; - int inner_point; + int base_num; + int inner_num; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index b9f8411faf13ae..0bd4c8a96c9756 100644 --- a/regexec.c +++ b/regexec.c @@ -233,17 +233,19 @@ onig_get_capture_tree(OnigRegion* region) #ifdef USE_CACHE_MATCH_OPT -static int count_num_cache_index(regex_t* reg) +/* count number of jump-like opcodes for allocation of cache memory. */ +/* return -1 if we cannot optimize the regex matching by using cache. */ +static int count_num_cache_opcode(regex_t* reg, int* table_size) { - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + int num = 0; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_index = 0; - MemNumType current_repeat = NO_OUTER_REPEAT; - while (p < pend) { switch (*p++) { case OP_FINISH: @@ -296,10 +298,10 @@ static int count_num_cache_index(regex_t* reg) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num_cache_index++; break; + num++; *table_size += 1; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num_cache_index++; break; + p++; num++; *table_size += 1; break; case OP_WORD: case OP_NOT_WORD: @@ -332,7 +334,7 @@ static int count_num_cache_index(regex_t* reg) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -352,45 +354,56 @@ static int count_num_cache_index(regex_t* reg) break; case OP_PUSH: p += SIZE_RELADDR; - num_cache_index++; + num++; + *table_size += 1; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num_cache_index++; break; + p += SIZE_RELADDR + 1; num++; *table_size += 1; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_repeat != NO_OUTER_REPEAT) { + if (current_mem != -1) { // A nested OP_REPEAT is not yet supported. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - num_cache_index++; + num++; + *table_size += 1; } - current_repeat = mem; + reg->repeat_range[mem].base_num = num; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - if (mem != current_repeat) { + //fprintf(stderr, "OP_REPEAT %d\n", mem); + if (mem != current_mem) { // A lone or invalid OP_REPEAT_INC is found. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } { + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; + repeat_range->inner_num = inner_num; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { - num_cache_index++; + *table_size += 1; } - current_repeat = NO_OUTER_REPEAT; + current_mem = -1; + current_mem_num = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: Support nested OP_REPEAT. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -409,21 +422,21 @@ static int count_num_cache_index(regex_t* reg) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CALL: case OP_RETURN: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CONDITION: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -432,22 +445,21 @@ static int count_num_cache_index(regex_t* reg) } } - return num_cache_index; + return num; } -static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) +static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) { - UChar* pbegin; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int num = 0; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_point = 0; - MemNumType current_repeat = -1; - int current_repeat_base_point = 0; - while (p < pend) { pbegin = p; switch (*p++) { @@ -500,20 +512,20 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_WORD: @@ -547,7 +559,7 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -567,61 +579,59 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; case OP_PUSH: p += SIZE_RELADDR; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: p += SIZE_RELADDR + 1; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_REPEAT: case OP_REPEAT_NG: GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + num++; + table++; } - reg->repeat_range[mem].base_point = num_cache_point; - current_repeat = mem; - current_repeat_base_point = num_cache_point; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, p); + GET_MEMNUM_INC(mem, p); { - int inner_point = num_cache_point - current_repeat_base_point; + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = mem; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + table++; } - repeat_range->inner_point = inner_point; - num_cache_point -= inner_point; - num_cache_point += inner_point * repeat_range->lower + (inner_point + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - current_repeat = NO_OUTER_REPEAT; - current_repeat_base_point = 0; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + current_mem = -1; + current_mem_num = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: support OP_REPEAT opcodes. - return NUM_CACHE_INDEX_FAIL; + return; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -640,21 +650,21 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_INDEX_FAIL; + return; case OP_CALL: case OP_RETURN: - return NUM_CACHE_INDEX_FAIL; + return; case OP_CONDITION: - return NUM_CACHE_INDEX_FAIL; + return; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_INDEX_FAIL; + return; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -662,8 +672,6 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; } } - - return num_cache_point; } #endif /* USE_MATCH_CACHE */ @@ -851,16 +859,15 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) #ifdef USE_CACHE_MATCH_OPT #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\ - (msa).enable_cache_opt = 0;\ + (msa).enable_cache_match_opt = 0;\ (msa).num_fail = 0;\ - (msa).num_cache_index = NUM_CACHE_INDEX_UNINIT;\ - (msa).num_cache_point = 0;\ - (msa).cache_index = (OnigCacheIndex *)0;\ - (msa).cache = (uint8_t *)0;\ + (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ + (msa).cache_index_table = (OnigCacheIndex *)0;\ + (msa).match_cache = (uint8_t *)0;\ } while(0) #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ - if ((msa).cache_index) xfree((msa).cache_index);\ - if ((msa).cache) xfree((msa).cache);\ + if ((msa).cache_index_table) xfree((msa).cache_index_table);\ + if ((msa).match_cache) xfree((msa).match_cache);\ } while(0) #else #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) @@ -1153,9 +1160,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ if (enable) {\ - int point = find_cache_index((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ - if (point >= 0) {\ - int key = (num_cache_size) * (int)(pos) + point;\ + int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ + if (cache_index >= 0) {\ + int key = (num_cache_size) * (int)(pos) + cache_index;\ int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ @@ -1166,7 +1173,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) -static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) +static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) { int l = 0, r = num_cache_table - 1, m; OnigCacheIndex* item; @@ -1188,7 +1195,7 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re item = &table[m]; if (item->outer_repeat == -1) { - return item->point; + return item->num; } range = ®->repeat_range[item->outer_repeat]; @@ -1197,21 +1204,21 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; if (count < range->lower) { - return range->base_point + range->inner_point * count + item->point; + return range->base_num + range->inner_num * count + item->num; } if (range->upper == 0x7fffffff) { - return range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item->point; + return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num; } - return range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (count - range->lower) + item->point; + return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; } static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { int l = 0, r = num_cache_table - 1, m1, m2; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; - int p1, p2; + int k1, k2; while (l <= r) { m1 = (l + r) / 2; @@ -1234,32 +1241,32 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, item1 = &table[m1]; item2 = &table[m2]; - if (item1->outer_repeat < 0) p1 = item1->point; - else p1 = reg->repeat_range[item1->outer_repeat].base_point + item1->point; + if (item1->outer_repeat < 0) k1 = item1->num; + else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num; - if (item2->outer_repeat < 0) p2 = item2->point; + if (item2->outer_repeat < 0) k2 = item2->num; else { OnigRepeatRange *range = ®->repeat_range[item2->outer_repeat]; - if (range->upper == 0x7fffffff) p2 = range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item2->point; - else p2 = range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->point; + if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num; + else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num; } int base = pos * num_cache_size; - p1 += base; - p2 += base; + k1 += base; + k2 += base; - if ((p1 >> 3) == (p2 >> 3)) { - match_cache[p1 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)) | ((1 << (p1 & 7)) - 1); + if ((k1 >> 3) == (k2 >> 3)) { + match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); } else { - int i = p1 >> 3; - if (p1 & 7) { - match_cache[p1 >> 3] &= (1 << ((p1 & 7) - 1)) - 1; + int i = k1 >> 3; + if (k1 & 7) { + match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1; i++; } - if (i < (p2 >> 3)) { - xmemset(&match_cache[i], 0, (p2 >> 3) - i); - if (p2 & 7) { - match_cache[p2 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)); + if (i < (k2 >> 3)) { + xmemset(&match_cache[i], 0, (k2 >> 3) - i); + if (k2 & 7) { + match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); } } } @@ -2754,7 +2761,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2767,7 +2774,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2786,7 +2793,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -2802,7 +2809,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -3389,7 +3396,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; - int ischanged = 0; // set 1 when a loop is null but memory status is changed. + int ischanged = 0; // set 1 when a loop is empty but memory status is changed. GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); @@ -3402,7 +3409,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto null_check_found; } # ifdef USE_CACHE_MATCH_OPT - if (ischanged && msa->enable_cache_opt) { + if (ischanged && msa->enable_cache_match_opt) { RelAddrType rel; OnigUChar *addr; int mem; @@ -3421,7 +3428,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, default: goto unexpected_bytecode_error; } - reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->cache, msa->cache_index, msa->num_cache_index ,msa->num_cache_point); + reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); } # endif } @@ -3466,7 +3473,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3520,7 +3527,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3534,7 +3541,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3553,7 +3560,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); } } @@ -3570,7 +3577,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); p += addr; } @@ -3590,7 +3597,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { if (*pbegin == OP_REPEAT_INC) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(p, s, sprev, pkeep); p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ @@ -3623,7 +3630,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(si); if (*pbegin == OP_REPEAT_INC_NG) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(pcode, s, sprev, pkeep); } @@ -3813,33 +3820,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { - msa->enable_cache_opt = 1; - if (msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { - msa->num_cache_index = count_num_cache_index(reg); + if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + int table_size = 0; + msa->enable_cache_match_opt = 1; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); } - if (msa->num_cache_index == NUM_CACHE_INDEX_FAIL || msa->num_cache_index == 0) { - msa->enable_cache_opt = 0; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - if (msa->cache_index == NULL) { - OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_index * sizeof(OnigCacheIndex)); + if (msa->cache_index_table == NULL) { + OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); if (table == NULL) { - msa->enable_cache_opt = 0; + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - msa->num_cache_point = init_cache_index(reg, table); - msa->cache_index = table; + init_cache_index_table(reg, table); + msa->cache_index_table = table; + msa->num_cache_table = table_size; } // TODO: check arithemetic overflow. - int cache_size8 = msa->num_cache_point * ((int)(end - str) + 1); - int cache_size = (cache_size8 >> 3) + (cache_size8 & 7 ? 1 : 0); - msa->cache = (uint8_t*)xmalloc(cache_size * sizeof(uint8_t)); - if (msa->cache == NULL) { - msa->enable_cache_opt = 0; + int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); + int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); + msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); + if (msa->match_cache == NULL) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - xmemset(msa->cache, 0, cache_size * sizeof(uint8_t)); + xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); } fail_match_cache_opt: #endif diff --git a/regint.h b/regint.h index e1e48c09513a1d..12b5d5c70a5544 100644 --- a/regint.h +++ b/regint.h @@ -45,8 +45,8 @@ #define USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT -# define NUM_CACHE_INDEX_FAIL -1 -# define NUM_CACHE_INDEX_UNINIT -2 +# define NUM_CACHE_OPCODE_FAIL -1 +# define NUM_CACHE_OPCODE_UNINIT -2 #endif #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ @@ -874,11 +874,10 @@ typedef struct _OnigStackType { #ifdef USE_CACHE_MATCH_OPT typedef struct { - UChar* addr; /* pointer to corresponding opcode. */ - int point; /* cache point number (in outer repeat if `outer_repeat != -1`) */ - int outer_repeat; /* outer repeat index number */ + UChar *addr; + int num; + int outer_repeat; } OnigCacheIndex; -#define NO_OUTER_REPEAT -1 #endif typedef struct { @@ -904,12 +903,12 @@ typedef struct { uint64_t end_time; #endif #ifdef USE_CACHE_MATCH_OPT - int num_fail; /* counter of failure (backtrack) number for switching cache optimization. */ - int num_cache_point; /* number of cache point in program */ - int num_cache_index; /* size of cache index array */ - int enable_cache_opt; /* whether cache optimization is enabled */ - OnigCacheIndex* cache_index; /* cache index array for computing cache point number */ - uint8_t* cache; /* bit array for cache optimization */ + int num_fail; + int enable_cache_match_opt; + int num_cache_opcode; + int num_cache_table; + OnigCacheIndex *cache_index_table; + uint8_t *match_cache; #endif } OnigMatchArg;