Skip to content

Commit

Permalink
Fixed multibyte related issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dstogov committed Mar 14, 2011
1 parent 0be5ca5 commit bbc879b
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 41 deletions.
2 changes: 1 addition & 1 deletion Zend/tests/multibyte/multibyte_encoding_002.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if (!extension_loaded("mbstring")) {
?>
--INI--
zend.multibyte=1
zend.internal_encoding=iso-8859-1
mbstring.internal_encoding=iso-8859-1
--FILE--
<?php
print "Hello World\n";
Expand Down
4 changes: 1 addition & 3 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,7 @@ static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)

if (script_encoding) {
/* remove BOM */
script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size);
memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size);
LANG_SCNG(script_org) = script;
LANG_SCNG(script_org) += bom_size;
LANG_SCNG(script_org_size) -= bom_size;

return script_encoding;
Expand Down
30 changes: 15 additions & 15 deletions ext/exif/exif.c
Original file line number Diff line number Diff line change
Expand Up @@ -2664,13 +2664,13 @@ static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoP
decode = ImageInfo->decode_unicode_le;
}
if (zend_multibyte_encoding_converter(
pszInfoPtr,
(unsigned char**)pszInfoPtr,
&len,
szValuePtr,
(unsigned char*)szValuePtr,
ByteCount,
ImageInfo->encode_unicode,
decode
TSRMLS_CC) != 0) {
zend_multibyte_fetch_encoding(ImageInfo->encode_unicode TSRMLS_CC),
zend_multibyte_fetch_encoding(decode TSRMLS_CC)
TSRMLS_CC) < 0) {
len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
}
return len;
Expand All @@ -2684,13 +2684,13 @@ static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoP
szValuePtr = szValuePtr+8;
ByteCount -= 8;
if (zend_multibyte_encoding_converter(
pszInfoPtr,
(unsigned char**)pszInfoPtr,
&len,
szValuePtr,
(unsigned char*)szValuePtr,
ByteCount,
ImageInfo->encode_jis,
ImageInfo->motorola_intel ? ImageInfo->decode_jis_be : ImageInfo->decode_jis_le
TSRMLS_CC) != 0) {
zend_multibyte_fetch_encoding(ImageInfo->encode_jis TSRMLS_CC),
zend_multibyte_fetch_encoding(ImageInfo->motorola_intel ? ImageInfo->decode_jis_be : ImageInfo->decode_jis_le TSRMLS_CC)
TSRMLS_CC) < 0) {
len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
}
return len;
Expand Down Expand Up @@ -2723,13 +2723,13 @@ static int exif_process_unicode(image_info_type *ImageInfo, xp_field_type *xp_fi

/* Copy the comment */
if (zend_multibyte_encoding_converter(
&xp_field->value,
(unsigned char**)&xp_field->value,
&xp_field->size,
szValuePtr,
(unsigned char*)szValuePtr,
ByteCount,
ImageInfo->encode_unicode,
ImageInfo->motorola_intel ? ImageInfo->decode_unicode_be : ImageInfo->decode_unicode_le
TSRMLS_CC) != 0) {
zend_multibyte_fetch_encoding(ImageInfo->encode_unicode TSRMLS_CC),
zend_multibyte_fetch_encoding(ImageInfo->motorola_intel ? ImageInfo->decode_unicode_be : ImageInfo->decode_unicode_le TSRMLS_CC)
TSRMLS_CC) < 0) {
xp_field->size = exif_process_string_raw(&xp_field->value, szValuePtr, ByteCount);
}
return xp_field->size;
Expand Down
2 changes: 1 addition & 1 deletion ext/mbstring/libmbfl/filters/mbfilter_ascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter);

static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "cp367", "csASCII", NULL};
static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL};

const mbfl_encoding mbfl_encoding_ascii = {
mbfl_no_encoding_ascii,
Expand Down
6 changes: 3 additions & 3 deletions ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
s = 0x224c;
}
}
if (s <= 0 || s >= 0x8080 && s < 0x10000) {
if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
int i;
s = -1;

Expand Down Expand Up @@ -693,7 +693,7 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
s = 0x224c;
}
}
if (s <= 0 || s >= 0x8080 && s < 0x10000) {
if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
int i;
s = -1;

Expand Down Expand Up @@ -841,7 +841,7 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
s = 0x224c;
}
}
if (s <= 0 || s >= 0x8080 && s < 0x10000) {
if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
int i;
s = -1;

Expand Down
2 changes: 1 addition & 1 deletion ext/mbstring/libmbfl/filters/mbfilter_cp850.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter);

static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL};
static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM850", "IBM-850", NULL};

const mbfl_encoding mbfl_encoding_cp850 = {
mbfl_no_encoding_cp850,
Expand Down
2 changes: 1 addition & 1 deletion ext/mbstring/libmbfl/filters/mbfilter_cp866.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter);

static const char *mbfl_encoding_cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL};
static const char *mbfl_encoding_cp866_aliases[] = {"CP866", "CP-866", "IBM866", "IBM-866", NULL};

const mbfl_encoding mbfl_encoding_cp866 = {
mbfl_no_encoding_cp866,
Expand Down
4 changes: 2 additions & 2 deletions ext/mbstring/libmbfl/mbfl/mbfl_memory_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc)
const unsigned char *p;

len = 0;
p = psrc;
p = (const unsigned char*)psrc;
while (*p) {
p++;
len++;
Expand All @@ -235,7 +235,7 @@ mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc)
device->buffer = tmp;
}

p = psrc;
p = (const unsigned char*)psrc;
w = &device->buffer[device->pos];
device->pos += len;
while (len > 0) {
Expand Down
2 changes: 1 addition & 1 deletion ext/mbstring/mb_gpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
if (info->report_errors) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
}
from_encoding = mbfl_no_encoding_pass;
from_encoding = &mbfl_encoding_pass;
}
}

Expand Down
11 changes: 6 additions & 5 deletions ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -2958,7 +2958,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
string.no_encoding = from_encoding->no_encoding;
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
from_encoding = mbfl_no_encoding_pass;
from_encoding = &mbfl_encoding_pass;
to_encoding = from_encoding;
string.no_encoding = from_encoding->no_encoding;
}
Expand Down Expand Up @@ -3496,7 +3496,7 @@ PHP_FUNCTION(mb_convert_variables)
break;
}
if (elistsz <= 0) {
from_encoding = mbfl_no_encoding_pass;
from_encoding = &mbfl_encoding_pass;
} else if (elistsz == 1) {
from_encoding = *elist;
} else {
Expand Down Expand Up @@ -3565,15 +3565,15 @@ PHP_FUNCTION(mb_convert_variables)

if (!from_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
from_encoding = mbfl_no_encoding_pass;
from_encoding = &mbfl_encoding_pass;
}
}
if (elist != NULL) {
efree((void *)elist);
}
/* create converter */
convd = NULL;
if (from_encoding != mbfl_no_encoding_pass) {
if (from_encoding != &mbfl_encoding_pass) {
convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
Expand Down Expand Up @@ -4418,6 +4418,7 @@ PHP_FUNCTION(mb_get_info)
array_init(return_value);
for (i = 0; i < n; i++) {
add_next_index_string(return_value, (*entry)->name, 1);
entry++;
}
}
} else if (!strcasecmp("substitute_character", typ)) {
Expand Down Expand Up @@ -4693,7 +4694,7 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis

static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
{
MBSTRG(http_input_identify) = encoding;
MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
}
/* }}} */

Expand Down
18 changes: 10 additions & 8 deletions ext/mbstring/tests/mb_encoding_aliases.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,28 @@ var_dump(mb_encoding_aliases("8bit"));
?>
--EXPECTF--
Warning: mb_encoding_aliases() expects exactly 1 parameter, 0 given in %s on line 2
array(10) {
array(11) {
[0]=>
string(14) "ANSI_X3.4-1968"
[1]=>
string(14) "ANSI_X3.4-1986"
[2]=>
string(6) "IBM367"
string(7) "IBM-367"
[3]=>
string(9) "ISO646-US"
string(6) "IBM367"
[4]=>
string(16) "ISO_646.irv:1991"
string(9) "ISO646-US"
[5]=>
string(8) "US-ASCII"
string(16) "ISO_646.irv:1991"
[6]=>
string(5) "cp367"
string(8) "US-ASCII"
[7]=>
string(7) "csASCII"
string(5) "cp367"
[8]=>
string(8) "iso-ir-6"
string(7) "csASCII"
[9]=>
string(8) "iso-ir-6"
[10]=>
string(2) "us"
}
array(0) {
Expand Down

0 comments on commit bbc879b

Please sign in to comment.