Skip to content

Commit

Permalink
Fix legacy text conversion filter for 'HTML-ENTITIES'
Browse files Browse the repository at this point in the history
Because this routine used a signed char buffer to hold the bytes
in a (possible) HTML entity, any bytes with the MSB set would
be sign-extended when converting to int; for example, 0x86 would
become 0xFFFFFF86 (or -121).

Codepoints with huge values, like 0xFFFFFF86, are not valid and
if any were passed to the output filter, it would treat them
as errors and emit error markers.
  • Loading branch information
alexdowad committed Aug 16, 2022
1 parent d9269be commit d617fca
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
Expand Up @@ -180,7 +180,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
int pos;
unsigned int ent = 0;
mbfl_html_entity_entry *entity;
char *buffer = (char*)filter->opaque;
unsigned char *buffer = (unsigned char*)filter->opaque;

if (!filter->status) {
if (c == '&' ) {
Expand All @@ -196,7 +196,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
if (filter->status > 3) {
/* numeric entity */
for (pos=3; pos<filter->status; pos++) {
int v = buffer[pos];
int v = buffer[pos];
if (v >= '0' && v <= '9') {
v = v - '0';
} else if (v >= 'A' && v <= 'F') {
Expand Down Expand Up @@ -242,13 +242,12 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(c, filter->data));
}
filter->status = 0;
/*php_error_docref("ref.mbstring", E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
buffer[filter->status] = 0;
entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
if (!strcmp((const char*)buffer+1, entity->name)) {
ent = entity->code;
break;
}
Expand Down

0 comments on commit d617fca

Please sign in to comment.