Skip to content

Commit

Permalink
[fix #68] fix the Regexp to recognize the octal literals
Browse files Browse the repository at this point in the history
  • Loading branch information
Watson1978 committed May 16, 2012
1 parent 1284e1d commit 8bfb5a0
Showing 1 changed file with 40 additions and 1 deletion.
41 changes: 40 additions & 1 deletion re.c
Expand Up @@ -90,6 +90,25 @@ regexp_finalize_imp(void *rcv, SEL sel)
}
}

static bool
is_octal_literal(UChar *chars, long length)
{
bool ret = false;
int i;
for(i = 0; i < length; i++) {
UChar c = chars[i];
if (!rb_isdigit(c)) {
break;
}
if (c >= '8') {
return false;
}
ret = true;
}

return ret && i >= 2;
}

// Work around ICU limitations.
static void
sanitize_regexp_string(UChar **chars_p, long *chars_len_p)
Expand All @@ -105,6 +124,12 @@ sanitize_regexp_string(UChar **chars_p, long *chars_len_p)
} \
while (0)

#define expand_buffer(buffer, expand_size) \
do { \
buffer = (UChar *)xrealloc(buffer, sizeof(UChar) * (chars_len + expand_size)); \
} \
while (0)

// Replace all occurences [[:word:]] by \w.
UChar word_chars[10] = {'[', '[', ':', 'w', 'o', 'r', 'd', ':', ']', ']'};
size_t pos = 0;
Expand Down Expand Up @@ -143,7 +168,20 @@ sanitize_regexp_string(UChar **chars_p, long *chars_len_p)
break;
}
UChar c = chars[i];
if (c >= '0' && c <= '9') {
if (rb_isdigit(c)) {
if (is_octal_literal(&chars[i], chars_len)) {
// Handling for octal literals.
if (c > '0') {
// ICU need the string as octal literal \0ooo format.
expand_buffer(chars, 1);
memmove(&chars[i + 1], &chars[i],
sizeof(UChar) * (chars_len - i));
chars[i] = '0';
chars_len++;
}
break;
}

assert(n < 10);
buf[n++] = (char)c;
}
Expand All @@ -169,6 +207,7 @@ sanitize_regexp_string(UChar **chars_p, long *chars_len_p)
}

#undef copy_if_needed
#undef expand_buffer

#if 0
printf("out:\n");
Expand Down

0 comments on commit 8bfb5a0

Please sign in to comment.