Skip to content

Commit

Permalink
mb_split() can now handle empty matches like preg_split() does.
Browse files Browse the repository at this point in the history
  • Loading branch information
moriyoshi committed Feb 10, 2013
1 parent 92a7924 commit 0ea83ff
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 40 deletions.
3 changes: 3 additions & 0 deletions NEWS
Expand Up @@ -5,6 +5,9 @@ PHP NEWS
- CLI server:
. Fixed bug #64128 (buit-in web server is broken on ppc64). (Remi)

- Mbstring:
. mb_split() can now handle empty matches like preg_split() does. (Moriyoshi)

?? ??? 2012, PHP 5.4.12

- Core:
Expand Down
45 changes: 22 additions & 23 deletions ext/mbstring/php_mbregex.c
Expand Up @@ -1055,7 +1055,7 @@ PHP_FUNCTION(mb_split)
php_mb_regex_t *re;
OnigRegion *regs = NULL;
char *string;
OnigUChar *pos;
OnigUChar *pos, *chunk_pos;
int string_len;

int n, err;
Expand All @@ -1065,8 +1065,8 @@ PHP_FUNCTION(mb_split)
RETURN_FALSE;
}

if (count == 0) {
count = 1;
if (count > 0) {
count--;
}

/* create regex pattern buffer */
Expand All @@ -1076,31 +1076,30 @@ PHP_FUNCTION(mb_split)

array_init(return_value);

pos = (OnigUChar *)string;
chunk_pos = pos = (OnigUChar *)string;
err = 0;
regs = onig_region_new();
/* churn through str, generating array entries as we go */
while ((--count != 0) &&
(err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
if (regs->beg[0] == regs->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
int beg, end;
err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
if (err < 0) {
break;
}

beg = regs->beg[0], end = regs->end[0];
/* add it to the array */
if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
if ((pos - (OnigUChar *)string) < end) {
if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
--count;
} else {
err = -2;
break;
}
/* point at our new starting point */
chunk_pos = pos = (OnigUChar *)string + end;
} else {
err = -2;
break;
}
/* point at our new starting point */
n = regs->end[0];
if ((pos - (OnigUChar *)string) < n) {
pos = (OnigUChar *)string + n;
}
if (count < 0) {
count = 0;
pos++;
}
onig_region_free(regs, 0);
}
Expand All @@ -1117,9 +1116,9 @@ PHP_FUNCTION(mb_split)
}

/* otherwise we just have one last element to add to the array */
n = ((OnigUChar *)(string + string_len) - pos);
n = ((OnigUChar *)(string + string_len) - chunk_pos);
if (n > 0) {
add_next_index_stringl(return_value, (char *)pos, n, 1);
add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
} else {
add_next_index_stringl(return_value, "", 0, 1);
}
Expand Down
23 changes: 23 additions & 0 deletions ext/mbstring/tests/mb_split_empty_match.phpt
@@ -0,0 +1,23 @@
--TEST--
mb_split() empty match
--
--SKIPIF--
<?php
extension_loaded('mbstring') or die('skip');
function_exists('mb_split') or die("skip mb_split() is not available in this build");
?>
--FILE--
<?php
mb_regex_set_options('m');
var_dump(mb_split('^', "a\nb\nc"));
--EXPECT--
array(3) {
[0]=>
string(2) "a
"
[1]=>
string(2) "b
"
[2]=>
string(1) "c"
}
18 changes: 1 addition & 17 deletions ext/mbstring/tests/mb_split_variation1.phpt
Expand Up @@ -156,16 +156,12 @@ array(1) {
}

-- Iteration 10 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
}

-- Iteration 11 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
Expand All @@ -178,8 +174,6 @@ array(1) {
}

-- Iteration 13 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
Expand All @@ -192,24 +186,18 @@ array(1) {
}

-- Iteration 15 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
}

-- Iteration 16 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
}

-- Iteration 17 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
Expand Down Expand Up @@ -240,16 +228,12 @@ array(1) {
}

-- Iteration 22 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
}

-- Iteration 23 --

Warning: mb_split(): Empty regular expression in %s on line %d
array(1) {
[0]=>
string(13) "a b c d e f g"
Expand All @@ -259,4 +243,4 @@ array(1) {

Warning: mb_split() expects parameter 1 to be string, resource given in %s on line %d
bool(false)
Done
Done

0 comments on commit 0ea83ff

Please sign in to comment.