Permalink
Browse files

Switch default_charset, if not specified, from ISO-8859-1 to UTF-8

I have been wanting to make this change for years, but there is a small
chance of BC issues, so it shouldn't go into a minor release.
  • Loading branch information...
1 parent b7f2658 commit 906dd4eac5d98daa172ddf40c04f684474a05338 @rlerdorf rlerdorf committed Mar 23, 2010
View
1 NEWS
@@ -1,6 +1,7 @@
PHP NEWS
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
?? ??? 201?, PHP 5.3.99
+- default_charset if not specified is now UTF-8 instead of ISO-8859-1. (Rasmus)
?? ??? 20??, PHP 5.3.3
- Upgraded bundled PCRE to version 8.01. (Ilia)
View
@@ -711,17 +711,17 @@ inline static unsigned int get_next_char(enum entity_charset charset,
/* {{{ entity_charset determine_charset
* returns the charset identifier based on current locale or a hint.
- * defaults to iso-8859-1 */
+ * defaults to UTF-8 */
static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
{
int i;
- enum entity_charset charset = cs_8859_1;
+ enum entity_charset charset = cs_utf_8;
int len = 0;
zval *uf_result = NULL;
- /* Guarantee default behaviour for backwards compatibility */
+ /* Default is now UTF-8 */
if (charset_hint == NULL)
- return cs_8859_1;
+ return cs_utf_8;
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
@@ -855,7 +855,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
}
}
if (!found) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming iso-8859-1",
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming utf-8",
charset_hint);
}
}
@@ -22,25 +22,25 @@ var_dump(htmlspecialchars(b"<>", ENT_COMPAT, str_repeat('a', 100)));
?>
--EXPECTF--
-Warning: htmlspecialchars(): charset `1' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `1' not supported, assuming utf-8 in %s on line %d
string(35) "&lt;a href='test'&gt;Test&lt;/a&gt;"
-Warning: htmlspecialchars(): charset `12' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `12' not supported, assuming utf-8 in %s on line %d
string(35) "&lt;a href='test'&gt;Test&lt;/a&gt;"
-Warning: htmlspecialchars(): charset `125' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `125' not supported, assuming utf-8 in %s on line %d
string(35) "&lt;a href='test'&gt;Test&lt;/a&gt;"
string(35) "&lt;a href='test'&gt;Test&lt;/a&gt;"
-Warning: htmlspecialchars(): charset `12526' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `12526' not supported, assuming utf-8 in %s on line %d
string(35) "&lt;a href='test'&gt;Test&lt;/a&gt;"
string(8) "&lt;&gt;"
-Warning: htmlspecialchars(): charset `8666' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `8666' not supported, assuming utf-8 in %s on line %d
string(8) "&lt;&gt;"
string(8) "&lt;&gt;"
string(8) "&lt;&gt;"
string(8) "&lt;&gt;"
-Warning: htmlspecialchars(): charset `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' not supported, assuming utf-8 in %s on line %d
string(8) "&lt;&gt;"
@@ -53,33 +53,33 @@ array(4) {
string(5) "&amp;"
}
-- with table = HTML_ENTITIES --
-array(100) {
+array(171) {
[" "]=>
- string(6) "&nbsp;"
+ string(4) "&Pi;"
["¡"]=>
- string(7) "&iexcl;"
+ string(5) "&Rho;"
["¢"]=>
string(6) "&cent;"
["£"]=>
- string(7) "&pound;"
+ string(7) "&Sigma;"
["¤"]=>
- string(8) "&curren;"
+ string(5) "&Tau;"
["¥"]=>
- string(5) "&yen;"
+ string(6) "&perp;"
["¦"]=>
- string(8) "&brvbar;"
+ string(5) "&Phi;"
["§"]=>
- string(6) "&sect;"
+ string(5) "&Chi;"
["¨"]=>
- string(5) "&uml;"
+ string(5) "&Psi;"
["©"]=>
- string(6) "&copy;"
+ string(7) "&Omega;"
["ª"]=>
string(6) "&ordf;"
["«"]=>
string(7) "&laquo;"
["¬"]=>
- string(5) "&not;"
+ string(6) "&euro;"
["­"]=>
string(5) "&shy;"
["®"]=>
@@ -89,57 +89,57 @@ array(100) {
["°"]=>
string(5) "&deg;"
["±"]=>
- string(8) "&plusmn;"
+ string(7) "&alpha;"
["²"]=>
- string(6) "&sup2;"
+ string(6) "&beta;"
["³"]=>
- string(6) "&sup3;"
+ string(7) "&gamma;"
["´"]=>
- string(7) "&acute;"
+ string(7) "&delta;"
["µ"]=>
- string(7) "&micro;"
+ string(7) "&crarr;"
["¶"]=>
- string(6) "&para;"
+ string(6) "&zeta;"
["·"]=>
- string(8) "&middot;"
+ string(5) "&eta;"
["¸"]=>
- string(7) "&cedil;"
+ string(7) "&theta;"
["¹"]=>
- string(6) "&sup1;"
+ string(6) "&iota;"
["º"]=>
- string(6) "&ordm;"
+ string(7) "&kappa;"
["»"]=>
- string(7) "&raquo;"
+ string(8) "&lambda;"
["¼"]=>
- string(8) "&frac14;"
+ string(4) "&mu;"
["½"]=>
- string(8) "&frac12;"
+ string(4) "&nu;"
["¾"]=>
- string(8) "&frac34;"
+ string(4) "&xi;"
["¿"]=>
- string(8) "&iquest;"
+ string(9) "&omicron;"
["À"]=>
- string(8) "&Agrave;"
+ string(4) "&pi;"
["Á"]=>
- string(8) "&Aacute;"
+ string(5) "&rho;"
["Â"]=>
- string(7) "&Acirc;"
+ string(8) "&sigmaf;"
["Ã"]=>
- string(8) "&Atilde;"
+ string(7) "&sigma;"
["Ä"]=>
- string(6) "&Auml;"
+ string(5) "&tau;"
["Å"]=>
- string(7) "&Aring;"
+ string(6) "&sdot;"
["Æ"]=>
- string(7) "&AElig;"
+ string(5) "&phi;"
["Ç"]=>
- string(8) "&Ccedil;"
+ string(5) "&chi;"
["È"]=>
- string(8) "&Egrave;"
+ string(5) "&psi;"
["É"]=>
- string(8) "&Eacute;"
+ string(7) "&omega;"
["Ê"]=>
- string(7) "&Ecirc;"
+ string(5) "&loz;"
["Ë"]=>
string(6) "&Euml;"
["Ì"]=>
@@ -151,19 +151,19 @@ array(100) {
["Ï"]=>
string(6) "&Iuml;"
["Ð"]=>
- string(5) "&ETH;"
+ string(6) "&lArr;"
["Ñ"]=>
- string(8) "&Ntilde;"
+ string(6) "&uArr;"
["Ò"]=>
- string(8) "&Ograve;"
+ string(6) "&rArr;"
["Ó"]=>
- string(8) "&Oacute;"
+ string(6) "&dArr;"
["Ô"]=>
- string(7) "&Ocirc;"
+ string(6) "&hArr;"
["Õ"]=>
string(8) "&Otilde;"
["Ö"]=>
- string(6) "&Ouml;"
+ string(5) "&piv;"
["×"]=>
string(7) "&times;"
["Ø"]=>
@@ -175,7 +175,7 @@ array(100) {
["Û"]=>
string(7) "&Ucirc;"
["Ü"]=>
- string(6) "&Uuml;"
+ string(7) "&tilde;"
["Ý"]=>
string(8) "&Yacute;"
["Þ"]=>
@@ -246,14 +246,157 @@ array(100) {
string(7) "&thorn;"
["ÿ"]=>
string(6) "&yuml;"
+ ["R"]=>
+ string(7) "&OElig;"
+ ["S"]=>
+ string(7) "&oelig;"
+ ["`"]=>
+ string(8) "&spades;"
+ ["a"]=>
+ string(7) "&equiv;"
+ ["x"]=>
+ string(6) "&Yuml;"
+ ["’"]=>
+ string(6) "&rarr;"
+ ["‘"]=>
+ string(6) "&uarr;"
+ ["“"]=>
+ string(6) "&darr;"
+ ["”"]=>
+ string(6) "&harr;"
+ ["•"]=>
+ string(7) "&oplus;"
+ ["–"]=>
+ string(6) "&Zeta;"
+ ["—"]=>
+ string(8) "&otimes;"
+ ["˜"]=>
+ string(7) "&Theta;"
+ ["™"]=>
+ string(6) "&Iota;"
+ ["š"]=>
+ string(7) "&Kappa;"
+ ["›"]=>
+ string(8) "&Lambda;"
+ ["œ"]=>
+ string(4) "&Mu;"
+ [""]=>
+ string(4) "&Nu;"
+ ["ž"]=>
+ string(4) "&Xi;"
+ ["Ÿ"]=>
+ string(9) "&Omicron;"
+ [""]=>
+ string(6) "&part;"
+ [""]=>
+ string(7) "&exist;"
+ [" "]=>
+ string(7) "&rceil;"
+ [" "]=>
+ string(6) "&zwnj;"
+ [""]=>
+ string(5) "&zwj;"
+ [""]=>
+ string(5) "&lrm;"
+ [""]=>
+ string(6) "&prod;"
+ [""]=>
+ string(7) "&ndash;"
+ [""]=>
+ string(7) "&mdash;"
+ [""]=>
+ string(8) "&weierp;"
+ [""]=>
+ string(7) "&rsquo;"
+ [""]=>
+ string(7) "&radic;"
+ [""]=>
+ string(6) "&real;"
+ [""]=>
+ string(6) "&prop;"
+ [""]=>
+ string(7) "&infin;"
+ [" "]=>
+ string(5) "&ang;"
+ ["!"]=>
+ string(8) "&Dagger;"
["""]=>
string(6) "&quot;"
- ["<"]=>
- string(4) "&lt;"
- [">"]=>
- string(4) "&gt;"
["&"]=>
string(5) "&amp;"
+ [0]=>
+ string(8) "&permil;"
+ [2]=>
+ string(7) "&prime;"
+ [3]=>
+ string(7) "&Prime;"
+ [9]=>
+ string(8) "&lsaquo;"
+ [":"]=>
+ string(8) "&rsaquo;"
+ [">"]=>
+ string(4) "&gt;"
+ ["D"]=>
+ string(7) "&frasl;"
+ [""]=>
+ string(5) "&sum;"
+ [5]=>
+ string(9) "&alefsym;"
+ [""]=>
+ string(6) "&larr;"
+ [""]=>
+ string(8) "&forall;"
+ [""]=>
+ string(7) "&empty;"
+ [""]=>
+ string(7) "&nabla;"
+ [""]=>
+ string(7) "&lceil;"
+ [" "]=>
+ string(8) "&rfloor;"
+ [""]=>
+ string(7) "&minus;"
+ [""]=>
+ string(8) "&lowast;"
+ ["'"]=>
+ string(5) "&and;"
+ ["("]=>
+ string(4) "&or;"
+ [")"]=>
+ string(6) "&lang;"
+ ["*"]=>
+ string(6) "&rang;"
+ ["+"]=>
+ string(5) "&int;"
+ [4]=>
+ string(8) "&there4;"
+ ["<"]=>
+ string(4) "&lt;"
+ ["E"]=>
+ string(6) "&cong;"
+ ["H"]=>
+ string(7) "&asymp;"
+ ["d"]=>
+ string(4) "&le;"
+ ["e"]=>
+ string(8) "&hearts;"
+ ["‚"]=>
+ string(5) "&sub;"
+ ["ƒ"]=>
+ string(5) "&sup;"
+ ["„"]=>
+ string(6) "&nsub;"
+ ["†"]=>
+ string(6) "&sube;"
+ ["‡"]=>
+ string(6) "&supe;"
+ ["
+"]=>
+ string(8) "&lfloor;"
+ ["c"]=>
+ string(7) "&clubs;"
+ ["f"]=>
+ string(7) "&diams;"
}
-- with table = HTML_SPECIALCHARS --
array(4) {
Oops, something went wrong.

0 comments on commit 906dd4e

Please sign in to comment.