Skip to content

Commit

Permalink
/(?i)\u0149\u0149/ =~ "\u0149\u0149" doesn't match (Issue #40)
Browse files Browse the repository at this point in the history
'\u0149' is casefolded into '\u02bc' + 'n'.  They were compiled into
'exactn-ic:\xca\xbc' + 'exact1-ic:n'.  One character was divided into
two opcodes, so it couldn't match to a character '\u0149'.
Merge a series of 'exactn-ic' and 'exact1-ic' into one 'exactn-ic'.
(cherry picked from commit 7b61f4b)

Conflicts:

	regcomp.c
  • Loading branch information
k-takata committed Jul 31, 2014
1 parent 4c040b1 commit 5da0f45
Showing 1 changed file with 24 additions and 23 deletions.
47 changes: 24 additions & 23 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,10 @@ static int compile_tree(Node* node, regex_t* reg);
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)

static int
select_str_opcode(int mb_len, int str_len, int ignore_case)
select_str_opcode(int mb_len, int byte_len, int ignore_case)
{
int op;
OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;

if (ignore_case) {
switch (str_len) {
Expand Down Expand Up @@ -416,48 +417,48 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}

static int
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int byte_len,
regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
int op = select_str_opcode(mb_len, str_len, ignore_case);
int op = select_str_opcode(mb_len, byte_len, ignore_case);

len = SIZE_OPCODE;

if (op == OP_EXACTMBN) len += SIZE_LENGTH;
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;

len += mb_len * str_len;
len += byte_len;
return len;
}

static int
add_compile_string(UChar* s, int mb_len, int str_len,
add_compile_string(UChar* s, int mb_len, int byte_len,
regex_t* reg, int ignore_case)
{
int op = select_str_opcode(mb_len, str_len, ignore_case);
int op = select_str_opcode(mb_len, byte_len, ignore_case);
add_opcode(reg, op);

if (op == OP_EXACTMBN)
add_length(reg, mb_len);

if (IS_NEED_STR_LEN_OP_EXACT(op)) {
if (op == OP_EXACTN_IC)
add_length(reg, mb_len * str_len);
add_length(reg, byte_len);
else
add_length(reg, str_len);
add_length(reg, byte_len / mb_len);
}

add_bytes(reg, s, mb_len * str_len);
add_bytes(reg, s, byte_len);
return 0;
}


static int
compile_length_string_node(Node* node, regex_t* reg)
{
int rlen, r, len, prev_len, slen, ambig;
int rlen, r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
StrNode* sn;
Expand All @@ -471,24 +472,24 @@ compile_length_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p);
p += prev_len;
slen = 1;
blen = prev_len;
rlen = 0;

for (; p < sn->end; ) {
len = enclen(enc, p);
if (len == prev_len) {
slen++;
if (len == prev_len || ambig) {
blen += len;
}
else {
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
prev = p;
slen = 1;
blen = len;
prev_len = len;
}
p += len;
}
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
return rlen;
}
Expand All @@ -505,7 +506,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
static int
compile_string_node(Node* node, regex_t* reg)
{
int r, len, prev_len, slen, ambig;
int r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev, *end;
StrNode* sn;
Expand All @@ -520,25 +521,25 @@ compile_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p);
p += prev_len;
slen = 1;
blen = prev_len;

for (; p < end; ) {
len = enclen(enc, p);
if (len == prev_len) {
slen++;
if (len == prev_len || ambig) {
blen += len;
}
else {
r = add_compile_string(prev, prev_len, slen, reg, ambig);
r = add_compile_string(prev, prev_len, blen, reg, ambig);
if (r) return r;

prev = p;
slen = 1;
blen = len;
prev_len = len;
}

p += len;
}
return add_compile_string(prev, prev_len, slen, reg, ambig);
return add_compile_string(prev, prev_len, blen, reg, ambig);
}

static int
Expand Down

0 comments on commit 5da0f45

Please sign in to comment.