Skip to content

Commit f626a78

Browse files
committed
Avoid string duplication in preg_split()
1 parent e08c0ed commit f626a78

File tree

3 files changed

+25
-21
lines changed

3 files changed

+25
-21
lines changed

ext/pcre/php_pcre.c

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,14 +2177,14 @@ static PHP_FUNCTION(preg_split)
21772177
}
21782178

21792179
pce->refcount++;
2180-
php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
2180+
php_pcre_split_impl(pce, subject, return_value, (int)limit_val, flags);
21812181
pce->refcount--;
21822182
}
21832183
/* }}} */
21842184

21852185
/* {{{ php_pcre_split
21862186
*/
2187-
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
2187+
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
21882188
zend_long limit_val, zend_long flags)
21892189
{
21902190
pcre_extra *extra = pce->extra;/* Holds results of studying */
@@ -2235,7 +2235,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
22352235
/* Start at the beginning of the string */
22362236
start_offset = 0;
22372237
next_offset = 0;
2238-
last_match = subject;
2238+
last_match = ZSTR_VAL(subject_str);
22392239
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
22402240

22412241
#ifdef HAVE_PCRE_JIT_SUPPORT
@@ -2249,13 +2249,13 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
22492249
#ifdef HAVE_PCRE_JIT_SUPPORT
22502250
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
22512251
&& no_utf_check && !g_notempty) {
2252-
count = pcre_jit_exec(pce->re, extra, subject,
2253-
subject_len, start_offset,
2252+
count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
2253+
ZSTR_LEN(subject_str), start_offset,
22542254
no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
22552255
} else
22562256
#endif
2257-
count = pcre_exec(pce->re, extra, subject,
2258-
subject_len, start_offset,
2257+
count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2258+
ZSTR_LEN(subject_str), start_offset,
22592259
no_utf_check|g_notempty, offsets, size_offsets);
22602260

22612261
/* the string was already proved to be valid UTF-8 */
@@ -2269,14 +2269,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
22692269

22702270
/* If something matched */
22712271
if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
2272-
if (!no_empty || &subject[offsets[0]] != last_match) {
2272+
if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
22732273

22742274
if (offset_capture) {
22752275
/* Add (match, offset) pair to the return value */
2276-
add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
2276+
add_offset_pair(return_value, last_match, (int)(&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
22772277
} else {
22782278
/* Add the piece to the return value */
2279-
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
2279+
ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
22802280
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
22812281
}
22822282

@@ -2285,7 +2285,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
22852285
limit_val--;
22862286
}
22872287

2288-
last_match = &subject[offsets[1]];
2288+
last_match = &ZSTR_VAL(subject_str)[offsets[1]];
22892289
next_offset = offsets[1];
22902290

22912291
if (delim_capture) {
@@ -2295,9 +2295,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
22952295
/* If we have matched a delimiter */
22962296
if (!no_empty || match_len > 0) {
22972297
if (offset_capture) {
2298-
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2298+
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
22992299
} else {
2300-
ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
2300+
ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
23012301
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
23022302
}
23032303
}
@@ -2318,8 +2318,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
23182318
this is not necessarily the end. We need to advance
23192319
the start offset, and continue. Fudge the offset values
23202320
to achieve this, unless we're already at the end of the string. */
2321-
if (g_notempty != 0 && start_offset < subject_len) {
2322-
start_offset += calculate_unit_length(pce, subject + start_offset);
2321+
if (g_notempty != 0 && start_offset < ZSTR_LEN(subject_str)) {
2322+
start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
23232323
g_notempty = 0;
23242324
} else {
23252325
break;
@@ -2331,15 +2331,19 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
23312331
}
23322332

23332333

2334-
start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
2334+
start_offset = (int)(last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
23352335

2336-
if (!no_empty || start_offset < subject_len) {
2336+
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
23372337
if (offset_capture) {
23382338
/* Add the last (match, offset) pair to the return value */
2339-
add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
2339+
add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
23402340
} else {
23412341
/* Add the last piece to the return value */
2342-
ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
2342+
if (last_match == ZSTR_VAL(subject_str)) {
2343+
ZVAL_STR_COPY(&tmp, subject_str);
2344+
} else {
2345+
ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2346+
}
23432347
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
23442348
}
23452349
}

ext/pcre/php_pcre.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int sub
6464
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str,
6565
int limit, int *replace_count);
6666

67-
PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
67+
PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
6868
zend_long limit_val, zend_long flags);
6969

7070
PHPAPI void php_pcre_grep_impl( pcre_cache_entry *pce, zval *input, zval *return_value,

ext/spl/spl_iterators.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,7 @@ SPL_METHOD(RegexIterator, accept)
20792079
case REGIT_MODE_SPLIT:
20802080
zval_ptr_dtor(&intern->current.data);
20812081
ZVAL_UNDEF(&intern->current.data);
2082-
php_pcre_split_impl(intern->u.regex.pce, ZSTR_VAL(subject), ZSTR_LEN(subject), &intern->current.data, -1, intern->u.regex.preg_flags);
2082+
php_pcre_split_impl(intern->u.regex.pce, subject, &intern->current.data, -1, intern->u.regex.preg_flags);
20832083
count = zend_hash_num_elements(Z_ARRVAL(intern->current.data));
20842084
RETVAL_BOOL(count > 1);
20852085
break;

0 commit comments

Comments
 (0)