Skip to content

Commit

Permalink
Fix look-around like operators and cclass
Browse files Browse the repository at this point in the history
  • Loading branch information
makenowjust authored and mame committed Nov 9, 2022
1 parent f07dea1 commit 8c9e4d3
Showing 1 changed file with 25 additions and 43 deletions.
68 changes: 25 additions & 43 deletions regexec.c
Expand Up @@ -285,9 +285,13 @@ int count_num_cache_opcode(regex_t* reg)
p += SIZE_BITSET; break;
case OP_CCLASS_MB:
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, p); p += len; break;
case OP_CCLASS_MIX:
case OP_CCLASS_MIX_NOT:
GET_LENGTH_INC(len, p); p += len; break;
p += SIZE_BITSET;
GET_LENGTH_INC(len, p);
p += len;
break;

case OP_ANYCHAR:
case OP_ANYCHAR_ML:
Expand Down Expand Up @@ -373,33 +377,17 @@ int count_num_cache_opcode(regex_t* reg)

case OP_PUSH_POS:
case OP_POP_POS:
break;
case OP_PUSH_POS_NOT:
p += SIZE_RELADDR; break;
case OP_FAIL_POS:
break;
case OP_PUSH_STOP_BT:
case OP_POP_STOP_BT:
return NUM_CACHE_OPCODE_FAIL;
case OP_LOOK_BEHIND:
/* GET_LENGTH_INC(len, p); break; */
return NUM_CACHE_OPCODE_FAIL;
case OP_PUSH_LOOK_BEHIND_NOT:
// Since optimization assumes a string offset does not back,
// we cannot optimize look-behind opcodes.
/*
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(len, p);
break;
*/
return NUM_CACHE_OPCODE_FAIL;
case OP_FAIL_LOOK_BEHIND_NOT:
return NUM_CACHE_OPCODE_FAIL;
case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END:
break;
case OP_ABSENT:
p += SIZE_RELADDR; break;
return NUM_CACHE_OPCODE_FAIL;

case OP_CALL:
case OP_RETURN:
Expand Down Expand Up @@ -427,6 +415,7 @@ int count_num_cache_opcode(regex_t* reg)

void init_cache_index_table(regex_t* reg, UChar **table)
{
UChar** tstart = table;
UChar* pbegin;
UChar* p = reg->p;
UChar* pend = p + reg->used;
Expand Down Expand Up @@ -472,11 +461,14 @@ void init_cache_index_table(regex_t* reg, UChar **table)
case OP_CCLASS:
case OP_CCLASS_NOT:
p += SIZE_BITSET; break;
case OP_CCLASS_MB:
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, p); p += len; break;
case OP_CCLASS_MIX:
case OP_CCLASS_MIX_NOT:
GET_LENGTH_INC(len, p); p += len; break;
p += SIZE_BITSET;
GET_LENGTH_INC(len, p);
p += len;
break;

case OP_ANYCHAR:
case OP_ANYCHAR_ML:
Expand Down Expand Up @@ -564,33 +556,17 @@ void init_cache_index_table(regex_t* reg, UChar **table)

case OP_PUSH_POS:
case OP_POP_POS:
break;
case OP_PUSH_POS_NOT:
p += SIZE_RELADDR; break;
case OP_FAIL_POS:
break;
case OP_PUSH_STOP_BT:
case OP_POP_STOP_BT:
return;
case OP_LOOK_BEHIND:
/* GET_LENGTH_INC(len, p); break; */
return;
case OP_PUSH_LOOK_BEHIND_NOT:
// Since optimization assumes a string offset does not back,
// we cannot optimize look-behind opcodes.
/*
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(len, p);
break;
*/
return;
case OP_FAIL_LOOK_BEHIND_NOT:
return;
case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END:
break;
case OP_ABSENT:
p += SIZE_RELADDR; break;
return;

case OP_CALL:
case OP_RETURN:
Expand Down Expand Up @@ -1096,13 +1072,19 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\
if (enable) {\
int cache_index = find_cache_index_table((table), (num_cache_table), (p));\
int key = (num_cache_table) * (pos) + cache_index;\
int index = key >> 3;\
int mask = 1 << (key & 7);\
if ((match_cache)[index] & mask) {\
goto fail;\
if (cache_index >= 0) {\
int key = (num_cache_table) * (pos) + cache_index;\
int index = key >> 3;\
int mask = 1 << (key & 7);\
if ((match_cache)[index] & mask) {\
/*fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\
goto fail;\
}\
/*fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\
(match_cache)[index] |= mask;\
} else {\
/*fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index);*/\
}\
(match_cache)[index] |= mask;\
}\
} while (0)

Expand Down

0 comments on commit 8c9e4d3

Please sign in to comment.