Skip to content

Commit

Permalink
Fix segfault caused by use of 'pass' encoding when mbstring converts …
Browse files Browse the repository at this point in the history
…multipart form POST data

When mbstring.encoding_translation=1, and PHP receives an (RFC1867)
form-based file upload, and the Content-Disposition HTTP header contains
a filename for the uploaded file, PHP will internally invoke mbstring
code to 1) try to auto-detect the text encoding of the filename, and if
that succeeds, 2) convert the filename to internal text encoding.

In such cases, the candidate text encodings which are considered during
"auto-detection" are those listed in the INI parameter
mbstring.http_input. Further, mbstring.http_input is one of the few
contexts where mbstring allows the magic string "pass" to appear in
place of an actual text encoding name.

Before mbstring's encoding auto-detection function was reimplemented,
the old implementation would never return "pass", even if "pass" was the
only candidate it was given to choose from. It is not clear if this was
intended by the original developers or not. This behavior was the result
of some rather subtle details of the implementation.

After mbstring's auto-detection function was reimplemented, if the new
implementation was given only one candidate to choose, and it was not
running in 'strict' mode, it would always return that candidate, even
if the candidate was the non-encoding "pass".

The upshot of all of this: Previously, if
mbstring.encoding_translation=1 and mbstring.http_input=pass, encoding
conversion of RFC1867 filenames would never be attempted. But after
the reimplementation, encoding 'conversion' would occur (uselessly).

Further, in December 2022, I reimplemented the relevant bit of
encoding conversion code. When doing this, I never bothered to
implement encoding/decoding routines for the non-encoding "pass",
because I thought that they would never be used. Well, in the one case
described above, those routines *would* have been used, had they
actually existed. Because they didn't exist, we get a nice NULL pointer
dereference and ensuing segfault instead.

Instead of 'fixing' this by adding encoding/decoding routines for the
non-encoding "pass", I have modified the function which the RFC1867
form-handling code invokes to auto-detect input encoding. This function
will never return "pass" now, just like the previous implementation.

Thanks to the GitHub user 'tstangner' for reporting this bug.
  • Loading branch information
alexdowad committed Jan 24, 2024
1 parent ea8d143 commit 67051eb
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 13 deletions.
32 changes: 19 additions & 13 deletions ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,7 @@ static size_t count_commas(const char *p, const char *end) {
* Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
*/
static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
bool allow_pass_encoding)
const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num)
{
if (value == NULL || value_length == 0) {
*return_list = NULL;
Expand Down Expand Up @@ -345,8 +344,7 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
}
}
} else {
const mbfl_encoding *encoding =
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
const mbfl_encoding *encoding = mbfl_name2encoding(p1);
if (!encoding) {
/* Called from an INI setting modification */
if (arg_num == 0) {
Expand Down Expand Up @@ -452,8 +450,12 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
list = (const zend_encoding**)MBSTRG(current_detect_order_list);
list_size = MBSTRG(current_detect_order_list_size);
}

return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false, false);
if (list_size == 1 && ((mbfl_encoding*)*list) == &mbfl_encoding_pass) {
/* Emulate behavior of previous implementation; it would never return "pass"
* from an encoding auto-detection operation */
return NULL;
}
return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding**)list, list_size, false, false);
}

static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
Expand All @@ -474,7 +476,7 @@ static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, s
return php_mb_parse_encoding_list(
encoding_list, encoding_list_len,
(const mbfl_encoding ***)return_list, return_size,
persistent, /* arg_num */ 0, /* allow_pass_encoding */ 0);
persistent, /* arg_num */ 0);
}

static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
Expand Down Expand Up @@ -712,7 +714,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
return SUCCESS;
}

if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0) || size == 0) {
return FAILURE;
}

Expand All @@ -728,7 +730,11 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
const mbfl_encoding **list;
size_t size;
if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
if (new_value_length == 4 && strncmp(new_value, "pass", 4) == 0) {
list = (const mbfl_encoding**)pecalloc(1, sizeof(mbfl_encoding*), 1);
*list = &mbfl_encoding_pass;
size = 1;
} else if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0) || size == 0) {
return FAILURE;
}
if (MBSTRG(http_input_list)) {
Expand Down Expand Up @@ -1383,7 +1389,7 @@ PHP_FUNCTION(mb_detect_order)
RETURN_THROWS();
}
} else {
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1)) {
RETURN_THROWS();
}
}
Expand Down Expand Up @@ -2799,7 +2805,7 @@ PHP_FUNCTION(mb_convert_encoding)
} else if (from_encodings_str) {
if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
&from_encodings, &num_from_encodings,
/* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
/* persistent */ 0, /* arg_num */ 3) == FAILURE) {
RETURN_THROWS();
}
free_from_encodings = true;
Expand Down Expand Up @@ -3163,7 +3169,7 @@ PHP_FUNCTION(mb_detect_encoding)
RETURN_THROWS();
}
} else if (encoding_str) {
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2)) {
RETURN_THROWS();
}
} else {
Expand Down Expand Up @@ -3564,7 +3570,7 @@ PHP_FUNCTION(mb_convert_variables)
RETURN_THROWS();
}
} else {
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2) == FAILURE) {
RETURN_THROWS();
}
}
Expand Down
24 changes: 24 additions & 0 deletions ext/mbstring/tests/gh13123.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
--TEST--
Segfault in mb_fast_convert() when mbstring.encoding_translation is enabled (GH-13123)
--EXTENSIONS--
mbstring
--POST_RAW--
Content-Type: multipart/form-data, boundary=Blah

--Blah
Content-Disposition: form-data; name="file"; filename="file.txt"
Content-Type: text/plain

foo
--Blah

--INI--
error_reporting=E_ALL&~E_DEPRECATED
mbstring.encoding_translation=On
mbstring.http_input=pass
--FILE--
<?php
print "Done!\n";
?>
--EXPECT--
Done!

0 comments on commit 67051eb

Please sign in to comment.